#1293 fix dict encoding in py3
Merged 5 years ago by mikem. Opened 5 years ago by tkopecek.
tkopecek/koji issue1282  into  master

file modified
+8 -25
@@ -4355,10 +4355,7 @@ 

          return result

      with zipfile.ZipFile(zippath, 'r') as archive:

          for entry in archive.infolist():

-             if six.PY2:

-                 filename = koji.fixEncoding(entry.filename)

-             else:

-                 filename = entry.filename

+             filename = koji.fixEncoding(entry.filename)

              result.append({'archive_id': archive_id,

                             'name': filename,

                             'size': entry.file_size,
@@ -4383,10 +4380,7 @@ 

          return result

      with tarfile.open(tarpath, 'r') as archive:

          for entry in archive:

-             if six.PY2:

-                 filename = koji.fixEncoding(entry.name)

-             else:

-                 filename = entry.name

+             filename = koji.fixEncoding(entry.name)

              result.append({'archive_id': archive_id,

                             'name': filename,

                             'size': entry.size,
@@ -6362,10 +6356,7 @@ 

      archiveinfo = {'buildroot_id': buildroot_id}

      archiveinfo['build_id'] = buildinfo['id']

      if metadata_only:

-         if six.PY2:

-             filename = koji.fixEncoding(fileinfo['filename'])

-         else:

-             filename = fileinfo['filename']

+         filename = koji.fixEncoding(fileinfo['filename'])

          archiveinfo['filename'] = filename

          archiveinfo['size'] = fileinfo['filesize']

          archiveinfo['checksum'] = fileinfo['checksum']
@@ -6376,10 +6367,7 @@ 

          archiveinfo['checksum_type'] = koji.CHECKSUM_TYPES[fileinfo['checksum_type']]

          archiveinfo['metadata_only'] = True

      else:

-         if six.PY2:

-             filename = koji.fixEncoding(os.path.basename(filepath))

-         else:

-             filename = os.path.basename(filepath)

+         filename = koji.fixEncoding(os.path.basename(filepath))

          archiveinfo['filename'] = filename

          archiveinfo['size'] = os.path.getsize(filepath)

          # trust values computed on hub (CG_Importer.prep_outputs)
@@ -6501,8 +6489,7 @@ 

      be created.

      """

      fname = os.path.basename(filepath)

-     if six.PY2:

-         fname = koji.fixEncoding(fname)

+     fname = koji.fixEncoding(fname)

      final_path = "%s/%s" % (destdir, fname)

      if os.path.exists(final_path):

          raise koji.GenericError("Error importing archive file, %s already exists" % final_path)
@@ -7687,10 +7674,7 @@ 

      if value is None:

          return value

      try:

-         if six.PY2:

-             return koji.fixEncodingRecurse(json.loads(value))

-         else:

-             return json.loads(value)

+         return koji.fixEncodingRecurse(json.loads(value))

      except Exception:

          if errstr is None:

              if desc is None:
@@ -9691,9 +9675,8 @@ 

          for (cltime, clname, cltext) in zip(fields['changelogtime'], fields['changelogname'],

                                              fields['changelogtext']):

              cldate = datetime.datetime.fromtimestamp(cltime).isoformat(' ')

-             if six.PY2:

-                 clname = koji.fixEncoding(clname)

-                 cltext = koji.fixEncoding(cltext)

+             clname = koji.fixEncoding(clname)

+             cltext = koji.fixEncoding(cltext)

  

              if author and author != clname:

                  continue

file modified
+22 -4
@@ -3063,10 +3063,16 @@ 

          return '%s (%s)' % (method, arch)

  

  CONTROL_CHARS = [chr(i) for i in range(32)]

- NONPRINTABLE_CHARS = six.b(''.join([c for c in CONTROL_CHARS if c not in '\r\n\t']))

+ NONPRINTABLE_CHARS = ''.join([c for c in CONTROL_CHARS if c not in '\r\n\t'])

+ if six.PY3:

+     NONPRINTABLE_CHARS_TABLE = dict.fromkeys(map(ord, NONPRINTABLE_CHARS), None)

  def removeNonprintable(value):

      # expects raw-encoded string, not unicode

-     return value.translate(None, NONPRINTABLE_CHARS)

+     if six.PY2:

+         return value.translate(None, NONPRINTABLE_CHARS)

+     else:

+         return value.translate(NONPRINTABLE_CHARS_TABLE)

+ 

  

  

  def _fix_print(value):
@@ -3089,6 +3095,12 @@ 

      If value is not valid UTF-8 to begin with, assume it is

      encoded in the 'fallback' charset.

      """

+     if six.PY3:

+         if remove_nonprintable:

+             return removeNonprintable(value)

+         else:

+             return value

+ 

      if not value:

          return six.b('')

  
@@ -3115,6 +3127,10 @@ 

  

      Similar behavior to fixEncoding, but recursive

      """

+     if six.PY3 and not remove_nonprintable:

+         # don't bother with fixing in py3

+         return value

+ 

      if isinstance(value, tuple):

          return tuple([fixEncodingRecurse(x, fallback=fallback, remove_nonprintable=remove_nonprintable) for x in value])

      elif isinstance(value, list):
@@ -3126,12 +3142,12 @@ 

              k = fixEncodingRecurse(k, fallback=fallback, remove_nonprintable=remove_nonprintable)

              ret[k] = v

          return ret

-     elif isinstance(value, six.text_type):

+     elif six.PY2 and isinstance(value, six.text_type):

          if remove_nonprintable:

              return removeNonprintable(value.encode('utf8'))

          else:

              return value.encode('utf8')

-     elif isinstance(value, str):

+     elif six.PY2 and isinstance(value, str):

          # value is a str, but may be encoded in utf8 or some

          # other non-ascii charset.  Try to verify it's utf8, and if not,

          # decode it using the fallback encoding.
@@ -3143,6 +3159,8 @@ 

              return removeNonprintable(s)

          else:

              return s

+     elif six.PY3 and isinstance(value, str) and remove_nonprintable:

+         return removeNonprintable(value)

      else:

          return value

  

file modified
+1 -1
@@ -135,7 +135,7 @@ 

              if fd:

                  try:

                      if six.PY3:

-                         os.write(fs, msg.encode('utf-8'))

+                         os.write(fd, msg.encode('utf-8'))

                      else:

                          os.write(fd, msg)

                      os.close(fd)

@@ -100,5 +100,5 @@ 

          self.assertEqual(recipients, ["user@example.com"])

          fn = os.path.join(os.path.dirname(__file__), 'data/calls', 'build_notif_1', 'message.txt')

          with open(fn, 'rb') as fp:

-             msg_expect = fp.read()

+             msg_expect = fp.read().decode()

          self.assertEqual(message, msg_expect)

@@ -18,52 +18,48 @@ 

  

      simple_values = [

          # [ value, fixed ]

-         ['', six.b('')],

-         [u'', six.b('')],

-         [u'góðan daginn', six.b('g\xc3\xb3\xc3\xb0an daginn')],

-         [u'hej', six.b('hej')],

-         [u'zdravstvuite', six.b('zdravstvuite')],

-         [u'céad míle fáilte', six.b('c\xc3\xa9ad m\xc3\xadle f\xc3\xa1ilte')],

-         [u'dobrý den', six.b('dobr\xc3\xbd den')],

-         [u'hylô', six.b('hyl\xc3\xb4')],

-         [u'jó napot', six.b('j\xc3\xb3 napot')],

-         [u'tervehdys', six.b('tervehdys')],

-         [u'olá', six.b('ol\xc3\xa1')],

-         [u'grüezi', six.b('gr\xc3\xbcezi')],

-         [u'dobre dan', six.b('dobre dan')],

-         [u'hello', six.b('hello')],

-         [u'bună ziua', six.b('bun\xc4\x83 ziua')],

-         [u'こんにちは', six.b('\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf')],

-         [u'你好', six.b('\xe4\xbd\xa0\xe5\xa5\xbd')],

-         [u'नमस्कार',  six.b('\xe0\xa4\xa8\xe0\xa4\xae\xe0\xa4\xb8\xe0\xa5\x8d\xe0\xa4\x95\xe0\xa4\xbe\xe0\xa4\xb0')],

-         [u'안녕하세요', six.b('\xec\x95\x88\xeb\x85\x95\xed\x95\x98\xec\x84\xb8\xec\x9a\x94')],

+         ['', ''],

+         [u'', ''],

+         [u'góðan daginn', 'g\xc3\xb3\xc3\xb0an daginn'],

+         [u'hej', 'hej'],

+         [u'zdravstvuite', 'zdravstvuite'],

+         [u'céad míle fáilte', 'c\xc3\xa9ad m\xc3\xadle f\xc3\xa1ilte'],

+         [u'dobrý den', 'dobr\xc3\xbd den'],

+         [u'hylô', 'hyl\xc3\xb4'],

+         [u'jó napot', 'j\xc3\xb3 napot'],

+         [u'tervehdys', 'tervehdys'],

+         [u'olá', 'ol\xc3\xa1'],

+         [u'grüezi', 'gr\xc3\xbcezi'],

+         [u'dobre dan', 'dobre dan'],

+         [u'hello', 'hello'],

+         [u'bună ziua', 'bun\xc4\x83 ziua'],

+         [u'こんにちは', '\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf'],

+         [u'你好', '\xe4\xbd\xa0\xe5\xa5\xbd'],

+         [u'नमस्कार',  '\xe0\xa4\xa8\xe0\xa4\xae\xe0\xa4\xb8\xe0\xa5\x8d\xe0\xa4\x95\xe0\xa4\xbe\xe0\xa4\xb0'],

+         [u'안녕하세요', '\xec\x95\x88\xeb\x85\x95\xed\x95\x98\xec\x84\xb8\xec\x9a\x94'],

      ]

  

      def test_fixEncoding(self):

          """Test the fixEncoding function"""

          for a, b in self.simple_values:

-             self.assertEqual(koji.fixEncoding(a), b)

              self.assertEqual(koji.fixEncoding(b), b)

-             c = a.encode('utf16')

-             self.assertEqual(koji.fixEncoding(c, fallback='utf16'), b)

-             d = a[:-3] + u'\x00\x01' + a[-3:]

-             self.assertEqual(koji.fixEncoding(d, remove_nonprintable=True), b)

+             if six.PY2:

+                 self.assertEqual(koji.fixEncoding(a), b)

+                 c = a.encode('utf16')

+                 self.assertEqual(koji.fixEncoding(c, fallback='utf16'), b)

+                 d = a[:-3] + u'\x00\x01' + a[-3:]

+                 self.assertEqual(koji.fixEncoding(d, remove_nonprintable=True), b)

+             else:

+                 self.assertEqual(koji.fixEncoding(a), a)

  

-     @mock.patch('sys.stdout', new_callable=six.StringIO)

-     def test_fix_print(self, stdout):

+     def test_fix_print(self):

          """Test the _fix_print function"""

-         expected = ''

+         actual, expected = [], []

          for a, b in self.simple_values:

-             if six.PY3:

-                 self.assertEqual(koji._fix_print(b), a)

-             else:

-                 self.assertEqual(koji._fix_print(b), b)

-             print(koji._fix_print(b))

-             if six.PY3:

-                 expected = expected + a + '\n'

-             else:

-                 expected = expected + b + '\n'

-         actual = stdout.getvalue()

+             actual.append(koji._fix_print(b))

+             expected.append(b)

+         expected = '\n'.join(expected)

+         actual = '\n'.join(actual)

          self.assertEqual(actual, expected)

  

      complex_values = [
@@ -73,21 +69,24 @@ 

          [None, None],

          [[], []],

          [{u'a': 'a' , 'b' : {'c': u'c\x00'}},

-          {six.b('a'): six.b('a') , six.b('b') : {six.b('c'):  six.b('c\x00')}}],

+          {'a': 'a' , 'b' : {'c':  'c\x00'}}],

          # iso8859-15 fallback

-         ['g\xf3\xf0an daginn', six.b('g\xc3\xb3\xc3\xb0an daginn')],

+         ['g\xf3\xf0an daginn', 'g\xc3\xb3\xc3\xb0an daginn'],

      ]

  

      nonprint = [

-         ['hello\0world\0', six.b('helloworld')],

-         [u'hello\0world\0', six.b('helloworld')],

-         [[u'hello\0world\0'], [six.b('helloworld')]],

-         [{0: u'hello\0world\0'}, {0: six.b('helloworld')}],

-         [[{0: u'hello\0world\0'}], [{0: six.b('helloworld')}]],

+         ['hello\0world\0', 'helloworld'],

+         [u'hello\0world\0', 'helloworld'],

+         [[u'hello\0world\0'], ['helloworld']],

+         [{0: u'hello\0world\0'}, {0: 'helloworld'}],

+         [[{0: u'hello\0world\0'}], [{0: 'helloworld'}]],

      ]

  

      def test_fixEncodingRecurse(self):

          """Test the fixEncodingRecurse function"""

+         if six.PY3:

+             # don't test for py3

+             return

          for a, b in self.simple_values:

              self.assertEqual(koji.fixEncoding(a), b)

          for a, b in self.complex_values:

file modified
-3
@@ -531,9 +531,6 @@ 

          return value

  

      value = koji.fixEncoding(value)

-     if six.PY3:

-         # it is bytes now, so decode to str

-         value = value.decode()

      return value.replace('&', '&').\

             replace('<', '&lt;').\

             replace('>', '&gt;')

no initial comment

I know this code needs adjustment, but it would help to be clear exactly what we're trying to address. What is the bug here?

I'm not sure that this is the right fix. I'm not sure if ignoring keys is enough. I think there is something more fundamental wrong here.

2 of the 3 places that use FixEncodingRecurse only do so in the py2 case. A number of the fixEncoding cases are also py2 only. I'm wondering if we shouldn't be leaving these strings alone in py3

Ah, reference got lost in PR. It is related to #1282 (reference in last commit).
I was also thinking, that py3 probably solved the problem of fixEncoding's existence. But still not sure about different combinations of hub and client py2/3 versions.

rebased onto 23bdf68

5 years ago

@mikem - I've removed most of the code and tried with bypassing fixEncoding for py3. Not sure about that, look especially at differences in test code. On the other hand it allowed me to remove some py2/3 switch in other parts of code. we can extend if for removeNonprintable if current code makes some sense.

Metadata Update from @mikem:
- Pull-request tagged with: testing-ready

5 years ago

The asymmetry in honoring remove_nonprintable for PY3 seems odd.

You've got an unrelated fix in there. It looks good. I don't know if it's worth pulling it out, but we should at least have an issue to track it.

The asymmetry in honoring remove_nonprintable for PY3 seems odd.

We definitely need to honor remove_nonprintable for fixEncodingRecurse. This option is used by getRPMHeaders and getChangelogEntries. In both cases, this was added to work around problem characters that could not be encoded in xmlrpc. Stripping those chars may not have been the best solution, but we need to preserve it until we have a better one.

This update adds support for remove_nonprintable to fixEncodingRecurse under python3.

https://github.com/mikem23/koji-playground/commits/pagure/pr/1293

The refactor may be slightly overkill here, but I've been meaning to do this ever since DataWalker was added. Less code, more readable.

Alternate solution:

https://github.com/mikem23/koji-playground/commits/pr1293b

A smaller change from where we are. Probably safer

Either solution is good to me, though I think the first option is better simply because it is much easier to understand.

Metadata Update from @jcupova:
- Pull-request tagged with: testing-done

5 years ago

1 new commit added

  • honor remove_nonprintable in fixEncodingRecurse under py3
5 years ago

I've filed #1318 to track the fixEncodingRecurse refactor

Commit 2250dab fixes this pull-request

Pull-Request has been merged by mikem

5 years ago