#317 info: infoldap: Decode values to unicode if possible
Opened 2 years ago by dbnicholson. Modified a year ago
dbnicholson/ipsilon ldap-unicode  into  master

file modified
+20 -2
@@ -114,6 +114,24 @@ 

  

          return conn

  

+     @classmethod

+     def _try_decode_value(cls, value):

+         """Decode values to unicode if possible

+ 

+         python-ldap doesn't know the LDAP schema, so all attribute

+         values are returned in binary since the value could be anything.

+         However, ipsilon expects that all info data is in string format.

+         Try to convert the value to unicode, maintaining the original

+         binary value if the decoding fails.

+         """

+         if isinstance(value, list):

+             return [cls._try_decode_value(v) for v in value]

+         else:

+             try:

+                 return value.decode('utf-8')

+             except UnicodeDecodeError:

+                 return value

+ 

      def _get_user_data(self, conn, dn):

          result = conn.search_s(dn, ldap.SCOPE_BASE)

          if result is None or result == []:
@@ -124,7 +142,7 @@ 

          for name, value in six.iteritems(result[0][1]):

              if isinstance(value, list) and len(value) == 1:

                  value = value[0]

-             data[name] = value

+             data[name] = self._try_decode_value(value)

          return data

  

      def _get_user_groups(self, conn, base, username):
@@ -137,7 +155,7 @@ 

          groups = []

          for r in results:

              if 'cn' in r[1]:

-                 groups.append(r[1]['cn'][0])

+                 groups.append(self._try_decode_value(r[1]['cn'][0]))

          return groups

  

      def get_user_data_from_conn(self, conn, dn, base, username):

python-ldap doesn't know the LDAP schema, so all attribute values are
returned in binary since the value could be anything. However, ipsilon
expects that all info data is in string format. Try to convert the value
to unicode, maintaining the original binary value if the decoding fails.

With python2, things worked OK as long as the values were ASCII. For
python3, attribute values will be passed around as bytes, which will
definitely do the wrong thing.

Signed-off-by: Dan Nicholson nicholson@endlessm.com

Full disclosure - I've only run this on python2. We're running 2.1.0 using an openid provider with ldap info. The openid consent page was failing to render when the LDAP fullname contained non-ASCII characters. However, I'm fairly confident this is wanted for python3, too. Otherwise you'll just be passing around bytes, which isn't what the rest of ipsilon expects. Jinja does seem to allow you to render bytes, but the rendered values would just look like b'value'.

Sounds sensible, do we have a way to test this ?

I've never attempted to run the test suite, but this (I believe) would be pretty similar to the consent form issue I was seeing:

diff --git a/tests/ldap.py b/tests/ldap.py
index 74feb1f..e946c14 100755
--- a/tests/ldap.py
+++ b/tests/ldap.py
@@ -49,6 +49,7 @@ sp_a = {'hostname': '${ADDRESS}',
 def fixup_sp_httpd(httpdir):
     merge = """
     MellonSetEnv "GROUPS" "groups"
+    MellonSetEnv "FULLNAME" "fullname"
     MellonMergeEnvVars On
 </Location>"""
     with open(httpdir + '/conf.d/ipsilon-saml.conf', 'r') as f:
@@ -78,9 +79,12 @@ Alias /sp ${HTTPDIR}/sp
         f.write(text)

     index = """<!--#echo var="MELLON_GROUPS" -->"""
+    fullname = """<!--#echo var="MELLON_FULLNAME" -->"""
     os.mkdir(httpdir + '/sp')
     with open(httpdir + '/sp/index.shtml', 'w') as f:
         f.write(index)
+    with open(httpdir + '/sp/fullname.shtml', 'w') as f:
+        f.write(fullname)


 class IpsilonTest(IpsilonTestBase):
@@ -140,3 +144,8 @@ if __name__ == '__main__':
         page = sess.fetch_page(idpname,
                                'https://127.0.0.11:45081/sp/index.shtml')
         page.expected_value('text()', 'Test Group;Test Group 2')
+
+    with TC.case('Access SP Protected Area Fullname'):
+        page = sess.fetch_page(idpname,
+                               'https://127.0.0.11:45081/sp/fullname.shtml')
+        page.expected_value('text()', u'Tést User')
diff --git a/tests/ldapdata.ldif b/tests/ldapdata.ldif
index bc4731c..586388a 100644
--- a/tests/ldapdata.ldif
+++ b/tests/ldapdata.ldif
@@ -13,7 +13,7 @@ description: Test People
 dn: uid=tuser,ou=People,dc=example,dc=com
 objectClass: inetOrgPerson
 uid: tuser
-cn: Test User
+cn: Tést User
 sn: Doe
 # password: tuser
 userPassword: {SSHA}g+LImPDfqn8HSf6LpJaN0Lpdp12OCbKf

Totally untested, though.

@dbnicholson Could you please rebase this?

Metadata