#578 cli: fix changelog encode for PY3
Merged 6 years ago by mikem. Opened 6 years ago by julian8628.
julian8628/koji changelog-encode  into  master

file modified
+17 -2
@@ -2966,6 +2966,21 @@ 

      # expects raw-encoded string, not unicode

      return value.translate(None, NONPRINTABLE_CHARS)

  

+ 

+ def _fix_print(value):

+     """Fix a string so it is suitable to print

+ 

+     In python2, this means we return a utf8 encoded str

+     In python3, this means we return unicode

+     """

+     if six.PY2 and isinstance(value, six.text_type):

+         return value.encode('utf8')

+     elif six.PY3 and isinstance(value, six.binary_type):

+         return value.decode('utf8')

+     else:

+         return value

+ 

+ 

  def fixEncoding(value, fallback='iso8859-15', remove_nonprintable=False):

      """

      Convert value to a 'str' object encoded as UTF-8.
@@ -2976,8 +2991,8 @@ 

          return six.b('')

  

      if isinstance(value, six.text_type):

-         # value is already unicode, so just convert it

-         # to a utf8-encoded str

+         # value is already unicode(py3: str), so just convert it

+         # to a utf8-encoded str(py3: bytes)

          s = value.encode('utf8')

      else:

          # value is a str, but may be encoded in utf8 or some

file modified
+5 -3
@@ -48,9 +48,11 @@ 

  except ImportError:  # pragma: no cover

      from sha import new as sha1_constructor

  

+ 

  def _changelogDate(cldate):

      return time.strftime('%a %b %d %Y', time.strptime(koji.formatTime(cldate), '%Y-%m-%d %H:%M:%S'))

  

+ 

  def formatChangelog(entries):

      """Format a list of changelog entries (dicts)

      into a string representation."""
@@ -59,9 +61,9 @@ 

          result += """* %s %s

  %s

  

- """ % (_changelogDate(entry['date']), entry['author'].encode("utf-8"),

-        entry['text'].encode("utf-8"))

- 

+ """ % (_changelogDate(entry['date']),

+        koji._fix_print(entry['author']),

+        koji._fix_print(entry['text']))

      return result

  

  DATE_RE = re.compile(r'(\d+)-(\d+)-(\d+)')

@@ -7,6 +7,7 @@ 

  import koji

  import six

  import unittest

+ import mock

  

  class FixEncodingTestCase(unittest.TestCase):

      """Main test case container"""
@@ -44,6 +45,23 @@ 

              d = a[:-3] + u'\x00\x01' + a[-3:]

              self.assertEqual(koji.fixEncoding(d, remove_nonprintable=True), b)

  

+     @mock.patch('sys.stdout', new_callable=six.StringIO)

+     def test_fix_print(self, stdout):

+         """Test the _fix_print function"""

+         expected = ''

+         for a, b in self.simple_values:

+             if six.PY3:

+                 self.assertEqual(koji._fix_print(b), a)

+             else:

+                 self.assertEqual(koji._fix_print(b), b)

+             print(koji._fix_print(b))

+             if six.PY3:

+                 expected = expected + a + '\n'

+             else:

+                 expected = expected + b + '\n'

+         actual = stdout.getvalue()

+         self.assertEqual(actual, expected)

+ 

      complex_values = [

          # [ value, fixed ]

          [{}, {}],

file modified
+34 -6
@@ -1,3 +1,4 @@ 

+ # coding=utf-8

  from __future__ import absolute_import

  import mock

  import unittest
@@ -494,12 +495,39 @@ 

  

      def test_formatChangelog(self):

          """Test formatChangelog function"""

-         entries = {'date': datetime(2017, 10, 10, 12, 34, 56),

-                    'author': 'koji <kojiadmin@localhost.local>',

-                    'text': 'This is a test release'}

-         sample = "* Tue Oct 10 2017 {0}\n{1}\n\n".format(

-                 entries['author'].encode('utf-8'), entries['text'].encode('utf-8'))

-         self.assertEqual(sample, koji.util.formatChangelog((entries,)))

+         data = [

+                 {

+                     'author': 'Happy Koji User <user1@example.com> - 1.1-1',

+                     'date': '2017-10-25 08:00:00',

+                     'date_ts': 1508932800,

+                     'text': '- Line 1\n- Line 2',

+                 },

+                 {

+                     'author': u'Happy \u0138\u014dji \u016cs\u0259\u0155 <user2@example.com>',

+                     'date': '2017-08-28 08:00:00',

+                     'date_ts': 1503921600,

+                     'text': '- some changelog entry',

+                 },

+                 {

+                     'author': 'Koji Admin <admin@example.com> - 1.49-6',

+                     'date': datetime(2017, 10, 10, 12, 34, 56),

+                     'text': '- mass rebuild',

+                 }

+                ]

+         expect = (

+ '''* Wed Oct 25 2017 Happy Koji User <user1@example.com> - 1.1-1

+ - Line 1

+ - Line 2

+ 

+ * Mon Aug 28 2017 Happy ĸōji Ŭsəŕ <user2@example.com>

+ - some changelog entry

+ 

+ * Tue Oct 10 2017 Koji Admin <admin@example.com> - 1.49-6

+ - mass rebuild

+ 

+ ''')

+         result = koji.util.formatChangelog(data)

+         self.assertMultiLineEqual(expect, result)

  

      def test_parseTime(self):

          """Test parseTime function"""

I'm not sure if there is some other reasons to invoke str.encode('utf-8') here, So adding a wrapper to make sure only str be printed here

Why not just use fixEncoding? Your fixPrint seems to do essentially the same thing.

I not sure this updated comment is any more correct.

Why not just use fixEncoding? Your fixPrint seems to do essentially the same thing.

for Python3, fixEncoding will return bytes which will be directly printed as b'\xe4\xbd\xa0\xe5\xa5\xbd'
fixPrint is supposed to make sure the bytes decoded to '你好' to fix above issue.
I think a better choice should be to make sure fixEncoding to always return str, but I'm not sure if it will introduce other problems.

wrote a candidate modifying fixEncoding and fixEncodingRecurse here:
https://pagure.io/fork/julian8628/koji/c/3d55e9d

I cannot replicate #577 with client HEAD (or from koji-1.13.0 release). Can you?

Nevermind, got it

Interestingly, fixEncoding is barely used client side. It's used several places in the hub, web, and builder, but in the client lib the only place is in parse_pom (which itself is mostly used on hub/builder, but is used in the import-archive command).

Point being, there's not a lot of places where fixEncoding runs under python3 right now, pretty much just import-archive --type maven.

So I wonder if import-archive works on py3 as-is, or if it would work better if fixEncoding returned str.

btw, the .encode('utf-8') was added here:

commit e565217a73e82ee1f58c7f170bb8bdec76c66e99
Author: Mike McLean <mikem@redhat.com>
Date:   Tue Dec 8 22:40:05 2009 -0500

    avoid unicode errors involving changelogs (rhbz 545387, patch by dmach)

https://bugzilla.redhat.com/show_bug.cgi?id=545387

Interestingly, fixEncoding is barely used client side. It's used several places in the hub, web, and builder, but in the client lib the only place is in parse_pom (which itself is mostly used on hub/builder, but is used in the import-archive command).
Point being, there's not a lot of places where fixEncoding runs under python3 right now, pretty much just import-archive --type maven.
So I wonder if import-archive works on py3 as-is, or if it would work better if fixEncoding returned str.

Tested import-archive --type maven with current master and https://pagure.io/fork/julian8628/koji/c/3d55e9d

They both work fine.

ping @mikem @tkopecek for reviewing this candidate

When rebased, this breaks one of the newer unit tests. I've made an adjustment to that test here:
https://github.com/mikem23/koji-playground/commits/pagure/pr/578

This function works well enough, but I'm cautious about how we will want to approach this long term.

On my branch, I've renamed the function to _fix_print and added a docstring. I want to discourage clients from using this function for now. I think we'll end up revisiting this as we port more of the code to python3.

Also I removed the if not value case. So far, we're only using this is a place where we expect an actual string, so let's not get ahead of ourselves. We don't know what we want to do with other values yet, if anything.

If that looks ok to you, I can merge it tomorrow.

If that looks ok to you, I can merge it tomorrow.

That's good. I've rebased your commit into this PR

rebased onto 3464ada

6 years ago

1 new commit added

  • fix unit test - test_formatChangelog
6 years ago

Commit 62d4d1d fixes this pull-request

Pull-Request has been merged by mikem

6 years ago