#102 Fix decoding errors on Python 2
Merged 4 years ago by lsedlar. Opened 4 years ago by lsedlar.
lsedlar/compose-utils py27-decoding  into  master

file modified
+25 -4
@@ -14,6 +14,8 @@ 

  import re

  from distutils.version import LooseVersion

  

+ import six

+ 

  import kobo.pkgset

  from kobo.rpmlib import parse_nvra, make_nvr, make_nvra, get_changelogs_from_header

  from kobo.threads import ThreadPool, WorkerThread
@@ -37,6 +39,23 @@ 

      return '{0:.{1}f} {2}'.format(size, prec, chosen)

  

  

+ def to_utf8(text):

+     if six.PY3:

+         # Nothing needs to happen on Python 3, we already get correctly decoded

+         # data from kobo or rpm.

+         return text

+     if not isinstance(text, str):

+         # Text is already decoded to unicode, nothing to do...

+         return text

+     encodings = ["ascii", "utf8", "latin1", "latin2"]

+     for encoding in encodings:

+         try:

+             return text.decode(encoding)

+         except UnicodeDecodeError:

+             pass

+     return text.decode("ascii", "ignore")

+ 

+ 

  clogs = {}

  

  
@@ -51,7 +70,7 @@ 

          ts = kwargs.pop("ts", None)

          header = kobo.rpmlib.get_rpm_header(file_path, ts=ts)

  

-         self.summary = kobo.rpmlib.get_header_field(header, "summary")

+         self.summary = to_utf8(kobo.rpmlib.get_header_field(header, "summary"))

  

          if self.sourcerpm:

              key = self.sourcerpm
@@ -96,10 +115,10 @@ 

          result = []

          try:

              old_time = old_changelog[0].time

-             old_nvr = LooseVersion(old_changelog[0].name.rsplit(None, 1)[-1])

+             old_nvr = LooseVersion(to_utf8(old_changelog[0].name).rsplit(None, 1)[-1])

              while new_changelog:

                  entry = new_changelog.pop(0)

-                 new_nvr = LooseVersion(entry.name.rsplit(None, 1)[-1])

+                 new_nvr = LooseVersion(to_utf8(entry.name).rsplit(None, 1)[-1])

                  if entry.time < old_time or (

                      entry.time == old_time and new_nvr <= old_nvr

                  ):
@@ -365,7 +384,9 @@ 

  

              data["changelog"] = []

              for i in get_changelog_diff_from_headers(old_package, new_package, max_logs):

-                 data["changelog"].append("* %s %s\n%s" % (i.ctime, i.name, i.text))

+                 data["changelog"].append(

+                     "* %s %s\n%s" % (i.ctime, to_utf8(i.name), to_utf8(i.text))

+                 )

  

              # TODO: comps, system release

              # if rpm.versionCompare(old_package, new_package.header) == -1:

@@ -60,7 +60,7 @@ 

            "dropped_rpms": ["pungi-ostree"],

            "size": 13,

            "size_change": 5,

-           "changelog": ["* Pungi changed.\n* A lot."]

+           "changelog": ["* Pungi changed.\n* A lot.\n* Příliš žluťoučký kůň."]

        }

    ],

    "downgraded_packages": [

@@ -35,6 +35,7 @@ 

  Changelog:

    * Pungi changed.

    * A lot.

+   * Příliš žluťoučký kůň.

  

  

  

@@ -29,6 +29,7 @@ 

  Changelog:

    * Pungi changed.

    * A lot.

+   * Příliš žluťoučký kůň.

  

  

  

file modified
+12 -8
@@ -10,6 +10,8 @@ 

  

  from kobo.rpmlib import ChangelogEntry

  

+ import six

+ 

  from .helpers import get_compose, get_fixture

  

  import productmd.compose
@@ -28,7 +30,7 @@ 

      'old_rpms': ['Dummy-firefox'],

      'common_rpms': ['Dummy-firefox'],

      'rpms': ['Dummy-firefox'],

-     'changelog': ['* Tue Mar 15 2016 Lubomír Sedlář <lubomir.sedlar@gmail.com> - 1:0.1.0-1\n- new version'],

+     "changelog": [u"* Tue Mar 15 2016 Lubomír Sedlář <lubomir.sedlar@gmail.com> - 1:0.1.0-1\n- new version"],

      'added_rpms': [],

      'dropped_rpms': [],

      'nvr': 'Dummy-firefox-1:0.1.0-1',
@@ -60,7 +62,7 @@ 

      'old_rpms': ['cloud-init'],

      'common_rpms': ['cloud-init'],

      'rpms': ['cloud-init'],

-     'changelog': ['* Tue Sep 05 2017 Lubomír Sedlář <lsedlar@redhat.com> - 0.7.9-9.module_f8c7dcdc\n- First release'],

+     "changelog": [u"* Tue Sep 05 2017 Lubomír Sedlář <lsedlar@redhat.com> - 0.7.9-9.module_f8c7dcdc\n- First release"],

      'added_rpms': [],

      'dropped_rpms': [],

      'nvr': 'cloud-init-0.7.9-9.module_f8c7dcdc',
@@ -210,19 +212,21 @@ 

          changelog = ComposeChangelog()

          changelog._get_summary = mock.Mock(return_value=[])

          with open(get_fixture('verbose-full.txt')) as f:

-             expected = f.read().split('\n')

+             expected = six.ensure_text(f.read(), encoding="utf-8").splitlines()

          self.maxDiff = None

-         self.assertEqual(changelog.get_verbose_log(self.data).split('\n'),

-                          expected)

+         self.assertEqual(

+             changelog.get_verbose_log(self.data).splitlines(), expected

+         )

  

      def test_verbose_short(self):

          changelog = ComposeChangelog()

          changelog._get_summary = mock.Mock(return_value=[])

          with open(get_fixture('verbose-short.txt')) as f:

-             expected = f.read().split('\n')

+             expected = six.ensure_text(f.read(), encoding="utf-8").splitlines()

          self.maxDiff = None

-         self.assertEqual(changelog.get_verbose_log(self.data, shorten=True).split('\n'),

-                          expected)

+         self.assertEqual(

+             changelog.get_verbose_log(self.data, shorten=True).splitlines(), expected

+         )

  

  

  class TestCompareChangelogs(unittest.TestCase):

When changelogs contain non-ascii data, there could be a decoding problem on old Python version when the changelog is serialized. This was not exercised by the test suite.

This partially reverts 86c951c, but only for Python 2.

rebased onto 2bdf757e00cd8fdda827bdbc664c35a1975bbdc3

4 years ago

rebased onto f55be30

4 years ago

Didn't manage to run unittests, but looks OK.

Pull-Request has been merged by lsedlar

4 years ago