#2014 Fix UnicodeEncode on entering non-ascii password
Merged 6 years ago by pingou. Opened 7 years ago by farhaan.
farhaan/pagure non-ascii  into  master

file modified
+31 -11
@@ -5,18 +5,19 @@ 

  

   Authors:

     Pierre-Yves Chibon <pingou@pingoured.fr>

-    Farhaan Bukhsh <farhaan.bukhsh@gmail.com>

+    Farhaan Bukhsh <farhaan@fedoraproject.org>

  

  """

  

  import random

  import string

+ import hashlib

  import bcrypt

+ import six

  

- import hashlib

  import pagure

  from pagure.lib import model

- from kitchen.text.converters import to_unicode, to_bytes

+ from kitchen.text.converters import to_bytes

  from cryptography.hazmat.primitives import constant_time

  

  
@@ -49,14 +50,32 @@ 

  

  

  def generate_hashed_value(password):

-     """ Generate hash value for password

-     """

-     return '$2$' + bcrypt.hashpw(to_unicode(password), bcrypt.gensalt())

+     ''' Generate hash value for password.

+ 

+     :arg password: password for which the hash has to be generated.

+     :type password: str (Python 3) or unicode (Python 2)

+     :return: a hashed string of characters.

+     :rtype: an encoded string(bytes).

+     '''

+     if not isinstance(password, six.text_type):

+         raise ValueError("Password supplied is not unicode text")

+ 

+     return '$2$' + bcrypt.hashpw(password.encode('UTF_8'),

+                                  bcrypt.gensalt())

  

  

  def check_password(entered_password, user_password, seed=None):

-     """ Version checking and returning the password

-     """

+     ''' Version checking and returning the password

+ 

+     :arg entered_password: password entered by the user.

+     :type entered_password: str (Python 3) or unicode (Python 2)

+     :arg user_password: the hashed string fetched from the database.

+     :return: a Boolean depending upon the entered_password, True if the

+              password matches

+     '''

+     if not isinstance(entered_password, six.text_type):

+         raise ValueError("Entered password is not unicode text")

+ 

      if not user_password.count('$') >= 2:

          raise pagure.exceptions.PagureException(

              'Password of unknown version found in the database'
@@ -65,11 +84,12 @@ 

      _, version, user_password = user_password.split('$', 2)

  

      if version == '2':

-         password = bcrypt.hashpw(to_unicode(entered_password), user_password)

+         password = bcrypt.hashpw(entered_password.encode('UTF_8'),

+                                  user_password)

  

      elif version == '1':

-         password = '%s%s' % (to_unicode(entered_password), seed)

-         password = hashlib.sha512(password).hexdigest()

+         password = '%s%s' % (entered_password, seed)

+         password = hashlib.sha512(password.encode('utf-8')).hexdigest()

  

      else:

          raise pagure.exceptions.PagureException(

@@ -240,7 +240,7 @@ 

          # now returns 127.0.0.1 making our logic pass where it used to

          # partly fail

          if hasattr(flask, '__version__') and \

-                 tuple(flask.__version__.split('.')) <= (0,12,0):

+                 tuple(flask.__version__.split('.')) <= (0, 12, 0):

              self.assertIn(

                  'Could not set the session in the db, please report '

                  'this error to an admin', output.data)
@@ -299,11 +299,164 @@ 

          # now returns 127.0.0.1 making our logic pass where it used to

          # partly fail

          if hasattr(flask, '__version__') and \

-                 tuple(flask.__version__.split('.')) <= (0,12,0):

+                 tuple(flask.__version__.split('.')) <= (0, 12, 0):

              self.assertIn(

                  'Could not set the session in the db, please report '

                  'this error to an admin', output.data)

  

+     @patch('pagure.lib.notify.send_email', MagicMock(return_value=True))

+     def test_non_ascii_password(self):

+         """ Test login and create user functionality when the password is

+             non-ascii.

+         """

+ 

+         # Check before:

This test isn't really related to the web form accepting a non-ASCII password. It'd be good to be its own test, perhaps, although I expect other tests are also running this.

+         items = pagure.lib.search_user(self.session)

+         self.assertEqual(2, len(items))

+ 

+         # First access the new user page

This also isn't related to the test. It's best to narrow the scope of a unit test to the smallest possible piece of functionality - we want to know exactly what's not working when it fails, but adding all these pre-check assertions makes it more difficult to figure out what's going wrong. I recommend dropping this whole block, as well.

+         output = self.app.get('/user/new')

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>New user - Pagure</title>', output.data)

+         self.assertIn(

+             '<form action="/user/new" method="post">', output.data)

+ 

+         # Create the form to send there

+         # This has all the data needed

+ 

+         data = {

+             'user': 'foo',

+             'fullname': 'user foo',

+             'email_address': 'foo@bar.com',

+             'password': 'ö',

+             'confirm_password': 'ö',

+         }

+ 

+         # Submit this form  -  Doesn't work since there is no csrf token

This isn't a test for CSRF token functionality, so you can drop this whole block

+         output = self.app.post('/user/new', data=data)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>New user - Pagure</title>', output.data)

+         self.assertIn(

+             '<form action="/user/new" method="post">', output.data)

+ 

+         csrf_token = output.data.split(

+             'name="csrf_token" type="hidden" value="')[1].split('">')[0]

+ 

+         # Submit the form with the csrf token

This block can also be dropped, it's not a test about usernames being unique.

+         data['csrf_token'] = csrf_token

+         output = self.app.post('/user/new', data=data, follow_redirects=True)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>New user - Pagure</title>', output.data)

+         self.assertIn(

+             '<form action="/user/new" method="post">', output.data)

+         self.assertIn('Username already taken.', output.data)

+ 

+         # Submit the form with another username

This isn't a test about the email needing to be unique (as an aside, HTTP 200 is almost certainly not the right error code) so this block can go, too.

+         data['user'] = 'foobar'

+         output = self.app.post('/user/new', data=data, follow_redirects=True)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>New user - Pagure</title>', output.data)

+         self.assertIn('Email address already taken.', output.data)

+ 

+         # Submit the form with proper data

+         data['email_address'] = 'foobar@foobar.com'

+         output = self.app.post('/user/new', data=data, follow_redirects=True)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>Login - Pagure</title>', output.data)

+         self.assertIn(

+             'User created, please check your email to activate the account',

+             output.data)

+ 

+         # Check after:

+         items = pagure.lib.search_user(self.session)

+         self.assertEqual(3, len(items))

+ 

+         # Checking for the /login page

+         output = self.app.get('/login/')

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>Login - Pagure</title>', output.data)

+         self.assertIn(

+             '<form action="/dologin" method="post">', output.data)

+ 

+         # This has all the data needed

+         data = {

+             'username': 'foob_bar',

+             'password': 'ö',

+         }

+ 

+         # Submit this form  -  Doesn't work since there is no csrf token

This block can be dropped.

+         output = self.app.post('/dologin', data=data, follow_redirects=True)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>Login - Pagure</title>', output.data)

+         self.assertIn(

+             '<form action="/dologin" method="post">', output.data)

+         self.assertIn('Insufficient information provided', output.data)

+ 

+         # Submit the form with the csrf token  -  but invalid user

This block can be dropped.

+         data['csrf_token'] = csrf_token

+         output = self.app.post('/dologin', data=data, follow_redirects=True)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>Login - Pagure</title>', output.data)

+         self.assertIn(

+             '<form action="/dologin" method="post">', output.data)

+         self.assertIn('Username or password invalid.', output.data)

+ 

+         # Submit the form with the csrf token  -  but user not confirmed

This block can be dropped.

+         data['username'] = "foobar"

+         output = self.app.post('/dologin', data=data, follow_redirects=True)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>Login - Pagure</title>', output.data)

+         self.assertIn(

+             '<form action="/dologin" method="post">', output.data)

+         self.assertIn(

+             'Invalid user, did you confirm the creation with the url '

+             'provided by email?', output.data)

+ 

+         # User in the DB, csrf provided  -  but wrong password submitted

This would probably be a good test all by itself to make sure, for example, it doesn't just accept any non-ascii password, but it's not something we should combine with a test about whether it's possible to sign up and authenticate with a non-ascii password.

+         data['password'] = 'öö'

it should accept any non-ascii character as password right ?

Yes.

+         output = self.app.post('/dologin', data=data, follow_redirects=True)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>Login - Pagure</title>', output.data)

+         self.assertIn(

+             '<form action="/dologin" method="post">', output.data)

+         self.assertIn('Username or password invalid.', output.data)

+ 

+         # When account is not confirmed i.e user_obj != None

This block can also be dropped.

+         data['password'] = 'ö'

+         output = self.app.post('/dologin', data=data, follow_redirects=True)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>Login - Pagure</title>', output.data)

+         self.assertIn(

+             '<form action="/dologin" method="post">', output.data)

+         self.assertIn(

+             'Invalid user, did you confirm the creation with the url '

+             'provided by email?', output.data)

+ 

+         # Confirm the user so that we can log in

+         item = pagure.lib.search_user(self.session, username='foobar')

+         self.assertEqual(item.user, 'foobar')

+         self.assertNotEqual(item.token, None)

+ 

+         # Remove the token

+         item.token = None

+         self.session.add(item)

+         self.session.commit()

+ 

+         # Login but cannot save the session to the DB due to the missing IP

This comment doesn't seem relevant? I'm not seeing an error being asserted below (it looks like the block below tests that it's possible to log in, which is what we want)

+         # address in the flask request

+         data['password'] = 'ö'

+         output = self.app.post('/dologin', data=data, follow_redirects=True)

+         self.assertEqual(output.status_code, 200)

+         self.assertIn('<title>Home - Pagure</title>', output.data)

+         self.assertIn(

+             '<a class="nav-link btn btn-primary" '

+             'href="/login/?next=http://localhost/">', output.data)

+ 

+         # Check the user

+         item = pagure.lib.search_user(self.session, username='foobar')

+         self.assertEqual(item.user, 'foobar')

+         self.assertEqual(item.token, None)

+ 

      def test_confirm_user(self):

          """ Test the confirm_user endpoint. """

  

file modified
+30 -13
@@ -48,7 +48,7 @@ 

  

      def test_generate_hashed_value(self):

          ''' Test pagure.lib.login.generate_hashed_value. '''

-         password = pagure.lib.login.generate_hashed_value('foo')

+         password = pagure.lib.login.generate_hashed_value(u'foo')

          self.assertTrue(password.startswith('$2$'))

          self.assertEqual(len(password), 63)

  
@@ -56,25 +56,25 @@ 

          ''' Test pagure.lib.login.check_password. '''

  

          # Version 2

-         password = pagure.lib.login.generate_hashed_value('foo')

+         password = pagure.lib.login.generate_hashed_value(u'foo')

          self.assertTrue(

-             pagure.lib.login.check_password('foo', password))

+             pagure.lib.login.check_password(u'foo', password))

          self.assertFalse(

-             pagure.lib.login.check_password('bar', password))

+             pagure.lib.login.check_password(u'bar', password))

  

          # Version 1

-         password = '%s%s' % ('foo', APP.config.get('PASSWORD_SEED', None))

+         password = '%s%s' % (u'foo', APP.config.get('PASSWORD_SEED', None))

          password = '$1$' + hashlib.sha512(password).hexdigest()

-         self.assertTrue(pagure.lib.login.check_password('foo', password))

-         self.assertFalse(pagure.lib.login.check_password('bar', password))

+         self.assertTrue(pagure.lib.login.check_password(u'foo', password))

+         self.assertFalse(pagure.lib.login.check_password(u'bar', password))

  

          # Invalid password  -  No version

-         password = '%s%s' % ('foo', APP.config.get('PASSWORD_SEED', None))

+         password = '%s%s' % (u'foo', APP.config.get('PASSWORD_SEED', None))

          password = hashlib.sha512(password).hexdigest()

          self.assertRaises(

              PagureException,

              pagure.lib.login.check_password,

-             'foo', password

+             u'foo', password

          )

  

          # Invalid password  -  Invalid version
@@ -82,15 +82,15 @@ 

          self.assertRaises(

              PagureException,

              pagure.lib.login.check_password,

-             'foo',

+             u'foo',

              password

          )

-         password = '%s%s' % ('foo', APP.config.get('PASSWORD_SEED', None))

+         password = '%s%s' % (u'foo', APP.config.get('PASSWORD_SEED', None))

          password = hashlib.sha512(password).hexdigest()

          self.assertRaises(

              PagureException,

              pagure.lib.login.check_password,

-             'foo', password

+             u'foo', password

          )

  

          # Invalid password  -  Invalid version
@@ -98,10 +98,27 @@ 

          self.assertRaises(

              PagureException,

              pagure.lib.login.check_password,

-             'foo',

+             u'foo',

              password

          )

  

+     def test_unicode_required(self):

+         ''' Test to check for non-ascii password

+         '''

+         self.assertRaises(

+             ValueError,

+             pagure.lib.login.generate_hashed_value,

+             u'hunter2'.encode('utf-8')

+         )

+         password = pagure.lib.login.generate_hashed_value(u'foo')

+         self.assertRaises(

+             ValueError,

+             pagure.lib.login.check_password,

+             u'foo'.encode('utf-8'),

+             password

+         )

+ 

+ 

  if __name__ == '__main__':

      SUITE = unittest.TestLoader().loadTestsFromTestCase(PagureLibLogintests)

      unittest.TextTestRunner(verbosity=2).run(SUITE)

The above changes encode the password to UTF_8 to make the password
compatible for ascii as well as non-ascii character.

Signed-off-by: Farhaan Bukhsh farhaan.bukhsh@gmail.com

I might add unit tests but the Pr is up for review

This doesn't make sense to me. bcrypt.hashpw expects bytes. Why is to_unicode being called here?

The password should be decoded to unicode when the request is received and the encoding is known (from the request headers). It should then be encoded here using the same encoding used to check the password later.

I'm not sure the bcrypt library that you are pointing to is the one we use, we use the one that's packaged as py-bcrypt in Fedora

Oh my, that project seems very, very dead. Granted, I suppose it doesn't do much, but still.

I poked around and although I'm not very familiar with Python's C API it looks to me like it's expected a byte string (which makes sense as an API) .

I was very concerned that this was a CVE since using to_unicode with its default kwargs is not at all safe for passwords.

The problem is it

a) requires the bytes to be encoded with 'utf-8' which is not a safe assumption (and pointless in any case since you're immediately re-encoding it with utf-8)

b) ignores decoding errors by using 'replace' which replaces un-decodeable bytes with a special character, U+FFFD, that marks a character that was not valid UTF-8.

This means that if you submit a byte string to this function that is not UTF-8 and aren't valid UTF-8 bytes you will have a password made up of U+FFFD characters, which happy encodes to UTF-8 again. Instead of their original password, you've now made them a password of some number of U+FFFD characters.

Luckily, Flask and flask-wtf decode the password to unicode so the password is already decoded to unicode (properly, using the client-provided encoding header) when passed to this function. This function's API should be documented to require a unicode string, and then encode it to UTF-8.

@farhaan, you need to remove the call to to_unicode

bcrypt.hashpw(password.encode('utf-8'), ...)

It would also be good to add tests.

@jcline sorry for the delay was caught up with college :)

There is this line here also but it was being used for the previous version should I replace this also @jcline and @pingou ?

I have the utmost confidence on @jcline when it comes to unicode :)

rebased

7 years ago

Hey I have rebased and updated the pr @jcline and @pingou this is up for review :smile:

I really recommend breaking this out into its own test. Python's standard library has good documentation how how to organize tests.

This test will perform differently on Python 2 than it will on Python 3. In Python 2 this is going to be automatically encoded with UTF-8 to a byte string. In Python 3 this will remain a unicode string.

The password API should only accept unicode strings. You can add a u in front of the string to mark it as a unicode literal so it'll be the same in Python 2 and Python 3. I also think the function should raise a ValueError if the string provided isn't a unicode string. You can use six.text_types to get the correct class (str for Python 3, unicode for Python 2). It'd be good to have a test for that, as well.

@jcline thanks for the review I will look into organizing tests and put it in the right place :smile:

@jcline can you elaborate more on six.text_type do you want to check the password in lib/login and then raise ValueError ?

rebased

7 years ago

rebased

7 years ago

@jcline can you elaborate more on six.text_type do you want to check the password in lib/login and then raise ValueError ?

Yeah. I was referring to the six library. It provides a bunch of helpers for writing Python. In this case, we want to only accept unicode text. In Python 2, this is of type unicode, but in Python 3 this is just str. You can use six.text_type to check that the argument you got is unicode:

if not isinstance(password, six.text_type):
    raise ValueError('This API requires unicode text')

and your test case should assert that this is the case:

def test_unicode_required(self):
    self.assertRaises(ValueError, generate_hashed_value, u'hunter2'.encode('utf-8'))

If you're not already familiar with unicode and encoding in general, I recommend reading some of the excellent blog posts out there. joelonsoftware has a decent one.

This PyCon 2012 talk discusses it specifically in the context of Python.

Thanks a lot for the blogpost and the talk seems really interesting , I am going through it.

rebased

7 years ago

4 new commits added

  • Add Tests and exception for non-unicode password
  • Organize test for non-ascii password
  • Fix unicode in password and add test
  • Fix UnicodeEncode on entering non-ascii password
7 years ago

It'd also be good to note that the return type is an encoded string (bytes)

s/upoun/upon/. Might also be good to explicitly note that it returns True if the password matches.

If I recall correctly, the hashlib API requires bytestrings. This will sort of work in Python 2, but it won't in Python 3. You'll need to explicitly encode before you hash it:

password = u'%s%s' % (entered_password, seed)
password = hashlib.sha512(password.encode('utf-8').hexdigest()

4 new commits added

  • Add Tests and exception for non-unicode password
  • Organize test for non-ascii password
  • Fix unicode in password and add test
  • Fix UnicodeEncode on entering non-ascii password
7 years ago

4 new commits added

  • Add Tests and exception for non-unicode password
  • Organize test for non-ascii password
  • Fix unicode in password and add test
  • Fix UnicodeEncode on entering non-ascii password
7 years ago

rebased

6 years ago

rebased

6 years ago

@jcline do you want to have a final look at this one?

rebased

6 years ago

I'm not a fan of the length of that first test, but it's not a deal-breaker for me. Looks good to me.

I'm not a fan of the length of that first test, but it's not a deal-breaker for me. Looks good to me.

Can you help me reduce the length of the test case ?

Sure! I've left comments in-line.

This test isn't really related to the web form accepting a non-ASCII password. It'd be good to be its own test, perhaps, although I expect other tests are also running this.

This also isn't related to the test. It's best to narrow the scope of a unit test to the smallest possible piece of functionality - we want to know exactly what's not working when it fails, but adding all these pre-check assertions makes it more difficult to figure out what's going wrong. I recommend dropping this whole block, as well.

This isn't a test for CSRF token functionality, so you can drop this whole block

This isn't a test about the email needing to be unique (as an aside, HTTP 200 is almost certainly not the right error code) so this block can go, too.

This block can also be dropped, it's not a test about usernames being unique.

This would probably be a good test all by itself to make sure, for example, it doesn't just accept any non-ascii password, but it's not something we should combine with a test about whether it's possible to sign up and authenticate with a non-ascii password.

This comment doesn't seem relevant? I'm not seeing an error being asserted below (it looks like the block below tests that it's possible to log in, which is what we want)

it should accept any non-ascii character as password right ?

hey @jcline I read your comments and followed it as well didn't push the code because I have few clarification:

  • How should we divide the tests here ?
  • Which test should be placed where ?
  • How should I think about generating test cases ?

P.S: Sorry for such late action on the PR was caught up with college

I added some docs about test organization to the infra docs. They also link to the Python documentation on the unittest package and I recommend reading them since they've got a section on how to write tests. I think it'll answer a lot of your questions. If it doesn't, we can update the documentation to be more helpful!

@jcline thanks for the link I will go through and improve the cases and get back to it :)

I've rebased this PR which I believe will also allow to drop pybcrypt in favor of python-bcrypt.

If so, I'll merge this PR and open one about the change in dependencies.

Commit 9d34ee4 fixes this pull-request

Pull-Request has been merged by pingou

6 years ago