#4214 Fix for reading config files when contains UTF-8 chars
Merged 3 months ago by tkopecek. Opened 4 months ago by jcupova.
jcupova/koji issue-4191  into  master

file modified
+6 -1
@@ -25,6 +25,7 @@ 

  from __future__ import absolute_import, division

  

  import base64

+ import codecs

  import datetime

  import errno

  import hashlib
@@ -2339,7 +2340,11 @@ 

          elif strict:

              raise ConfigurationError("Config file %s can't be opened."

                                       % config_file)

-     config.read(cfgs)

+     if six.PY3:

+         config.read(cfgs, encoding="utf8")

Python 3 has utf-8 as a default encoding, so it is worth to add a comment why it is needed here. I assume that it is because that it is not working in case when there is C locale enabled? In such case it would turn to encoding='locale' thus using ascii instead of utf-8.

+     else:

+         for cfg in cfgs:

+             config.readfp(codecs.open(cfg, 'r', 'utf8'))

      return config

  

  

file added
+32
@@ -0,0 +1,32 @@ 

+ import locale

+ from functools import wraps

+ 

+ 

+ class mylocale:

+     """Locale context manager. Resets to user default at exit."""

+ 

+     def __init__(self, value=None):

+         self.value = value

+ 

+     def __enter__(self):

+         locale.setlocale(locale.LC_ALL, locale=self.value)

+ 

+     def __exit__(self, _type, value, traceback):

+         # This resets to user default. Implementing a proper save and restore

+         # for locale is quite tricky, and this serves our testing needs.

+         locale.setlocale(locale.LC_ALL, "")

+         # return false so we don't eat exceptions

+         return False

+ 

+     def __call__(self, func):

+         """When called, act as a decorator"""

+ 

+         @wraps(func)

+         def wrapped(*args, **kwargs):

+             with self:

+                 return func(*args, **kwargs)

+ 

+         return wrapped

+ 

+ 

+ # the end

@@ -0,0 +1,7 @@ 

+ # comment ěščřčž

+ [koji]

+ # key_file = äöüß

+ ; test comment with unicode 🫠

+ # test comment with unicode ☢️

+ setting = hello

+ value = world 🌍

file modified
+31 -11
@@ -3,7 +3,6 @@ 

  import calendar

  from datetime import datetime

  import errno

- import locale

  import logging

  import tempfile

  import threading
@@ -26,6 +25,7 @@ 

  

  import koji

  import koji.util

+ from ..common import mylocale

  

  

  class EnumTestCase(unittest.TestCase):
@@ -272,7 +272,6 @@ 

          mock.patch.stopall()

  

      def test_read_config_files(self):

- 

          # bad config_files

          for files in [0,

                        False,
@@ -298,7 +297,7 @@ 

              self.assertTrue(isinstance(conf,

                                         six.moves.configparser.ConfigParser.__class__))

          self.real_parser_clz.assert_called_once()

-         self.real_parser_clz.return_value.read.assert_called_once_with([files])

+         self.real_parser_clz.return_value.read.assert_called_once_with([files], encoding='utf8')

  

          # list as config_files

          self.reset_mock()
@@ -306,7 +305,7 @@ 

          koji.read_config_files(files)

  

          self.real_parser_clz.assert_called_once()

-         self.real_parser_clz.return_value.read.assert_called_once_with(files)

+         self.real_parser_clz.return_value.read.assert_called_once_with(files, encoding='utf8')

  

          # tuple as config_files

          self.reset_mock()
@@ -369,7 +368,8 @@ 

              ['test1.conf',

               'gooddir/test1-1.conf',

               'gooddir/test1-2.conf',

-              'test2.conf'])

+              'test2.conf'],

+             encoding='utf8')

          self.assertEqual(self.manager.isdir.call_count, 5)

          self.assertEqual(self.manager.isfile.call_count, 6)

          self.assertEqual(self.manager.access.call_count, 4)
@@ -407,6 +407,23 @@ 

          self.assertEqual(listdir_mock.call_count, 2)

  

  

+ class ConfigFileTestCase2(unittest.TestCase):

+     """Additional tests for config file reading functions"""

+ 

+     def setUp(self):

+         self.datadir = os.path.dirname(__file__) + '/data/cfg'

+ 

+     def tearDown(self):

+         mock.patch.stopall()

+ 

+     def test_unicode(self):

+         fn = self.datadir + '/uni1.conf'

+         if not os.path.exists(fn):

+             raise Exception('missing config')

+         with mylocale(value='C'):

+             koji.read_config_files(fn)

+ 

+ 

  class MavenUtilTestCase(unittest.TestCase):

      """Test maven relative functions"""

      maxDiff = None
@@ -750,10 +767,9 @@ 

                  config.read_file(conf_file)

          return config

  

+     @mylocale(('en_US', 'UTF-8'))

      def test_formatChangelog(self):

          """Test formatChangelog function"""

-         # force locale to compare 'expect' value

-         locale.setlocale(locale.LC_ALL, ('en_US', 'UTF-8'))

          data = [

              {

                  'author': 'Happy Koji User <user1@example.com> - 1.1-1',
@@ -787,8 +803,6 @@ 

          result = koji.util.formatChangelog(data)

          self.assertMultiLineEqual(expect, result)

  

-         locale.setlocale(locale.LC_ALL, "")

- 

      def test_parseTime(self):

          """Test parseTime function"""

          now = datetime.now()
@@ -1282,7 +1296,8 @@ 

  

      @patch('koji.util._rmtree')

      def test_rmtree_directory(self, _rmtree):

-         """ Tests that the koji.util._rmtree_nofork function returns nothing when the path is a directory.

+         """ Tests that the koji.util._rmtree_nofork function returns nothing

+         when the path is a directory.

          """

          stat = mock.MagicMock()

          stat.st_dev = 'dev'
@@ -1552,7 +1567,8 @@ 

      @mock.patch('os.unlink')

      @mock.patch('os.waitpid')

      @mock.patch('os._exit')

-     def test_rmtree_parent_logfail(self, _exit, waitpid, unlink, fork, rmtree_nofork, logsend, mkstemp):

+     def test_rmtree_parent_logfail(self, _exit, waitpid, unlink, fork, rmtree_nofork, logsend,

+                                    mkstemp):

          log = self.tempdir + '/rmtree-log.jsonl'

          fd = os.open(log, os.O_RDWR | os.O_CREAT)

          mkstemp.return_value = fd, log
@@ -1844,6 +1860,7 @@ 

                          os.makedirs('%s/a/%s/c/d/%s/e/f/%s/g/h' % (dirname, i, j, k))

  

          sync = threading.Event()

+ 

          def do_rmtree(dirname):

              sync.wait()

              koji.util.rmtree(dirname)
@@ -1870,6 +1887,7 @@ 

                      os.makedirs('%s/a/%s/c/d/%s/e/f/%s/g/h' % (dirname, i, j, k))

  

          sync = threading.Event()

+ 

          def do_rmtree(dirname):

              sync.wait()

              koji.util.rmtree(dirname)
@@ -1895,6 +1913,7 @@ 

                      os.makedirs('%s/a/%s/c/d/%s/e/f/%s/g/h' % (dirname, i, j, k))

  

          sync = multiprocessing.Event()

+ 

          def do_rmtree(dirname):

              sync.wait()

              koji.util.rmtree(dirname)
@@ -2048,6 +2067,7 @@ 

          for inp, out in cases:

              self.assertEqual(koji.util.format_shell_cmd(inp, text_width=40), out)

  

+ 

  class TestExtractBuildTask(unittest.TestCase):

      def test_valid_binfos(self):

          binfos = [

@@ -24,47 +24,47 @@ 

  

  

  CONFIG1 = {

-         'paths': {

-             'default_mounts': '/mnt/archive,/mnt/workdir',

-             'safe_roots': '/mnt/workdir/tmp',

-             'path_subs':

-             '/mnt/archive/prehistory/,/mnt/prehistoric_disk/archive/prehistory',

-         },

-         'path0': {

-             'mountpoint': '/mnt/archive',

-             'path': 'archive.org:/vol/archive',

-             'fstype': 'nfs',

-             'options': 'ro,hard,intr,nosuid,nodev,noatime,tcp',

-         }}

+     'paths': {

+         'default_mounts': '/mnt/archive,/mnt/workdir',

+         'safe_roots': '/mnt/workdir/tmp',

+         'path_subs':

+         '/mnt/archive/prehistory/,/mnt/prehistoric_disk/archive/prehistory',

+     },

+     'path0': {

+         'mountpoint': '/mnt/archive',

+         'path': 'archive.org:/vol/archive',

+         'fstype': 'nfs',

+         'options': 'ro,hard,intr,nosuid,nodev,noatime,tcp',

+     }}

  

  

  CONFIG2 = {

-         'paths': {

-             'default_mounts': '/mnt/archive,/mnt/workdir',

-             'safe_roots': '/mnt/workdir/tmp',

-             'path_subs':

-                 '\n'

-                 '/mnt/archive/prehistory/,/mnt/prehistoric_disk/archive/prehistory\n'

-                 '/mnt/archve/workdir,/mnt/workdir\n',

-         },

-         'path0': {

-             'mountpoint': '/mnt/archive',

-             'path': 'archive.org:/vol/archive',

-             'fstype': 'nfs',

-             'options': 'ro,hard,intr,nosuid,nodev,noatime,tcp',

-         },

-         'path1': {

-             'mountpoint': '/mnt/workdir',

-             'path': 'archive.org:/vol/workdir',

-             'fstype': 'nfs',

-             'options': 'ro,hard,intr,nosuid,nodev,noatime,tcp',

-         },

-         'path2': {

-             'mountpoint': '/mnt/prehistoric_disk',

-             'path': 'archive.org:/vol/prehistoric_disk',

-             'fstype': 'nfs',

-             'options': 'ro,hard,intr,nosuid,nodev,noatime,tcp',

-         }}

+     'paths': {

+         'default_mounts': '/mnt/archive,/mnt/workdir',

+         'safe_roots': '/mnt/workdir/tmp',

+         'path_subs':

+             '\n'

+             '/mnt/archive/prehistory/,/mnt/prehistoric_disk/archive/prehistory\n'

+             '/mnt/archve/workdir,/mnt/workdir\n',

+     },

+     'path0': {

+         'mountpoint': '/mnt/archive',

+         'path': 'archive.org:/vol/archive',

+         'fstype': 'nfs',

+         'options': 'ro,hard,intr,nosuid,nodev,noatime,tcp',

+     },

+     'path1': {

+         'mountpoint': '/mnt/workdir',

+         'path': 'archive.org:/vol/workdir',

+         'fstype': 'nfs',

+         'options': 'ro,hard,intr,nosuid,nodev,noatime,tcp',

+     },

+     'path2': {

+         'mountpoint': '/mnt/prehistoric_disk',

+         'path': 'archive.org:/vol/prehistoric_disk',

+         'fstype': 'nfs',

+         'options': 'ro,hard,intr,nosuid,nodev,noatime,tcp',

+     }}

  

  

  class FakeConfigParser(object):
@@ -75,7 +75,7 @@ 

          else:

              self.CONFIG = copy.deepcopy(config)

  

-     def read(self, path):

+     def read(self, path, encoding):

          return

  

      def sections(self):
@@ -105,8 +105,7 @@ 

          options.workdir = '/tmp/nonexistentdirectory'

          with self.assertRaises(koji.GenericError) as cm:

              runroot.RunRootTask(123, 'runroot', {}, session, options)

-         self.assertEqual(cm.exception.args[0],

-             "bad config: missing options in path0 section")

+         self.assertEqual(cm.exception.args[0], "bad config: missing options in path0 section")

  

      @mock.patch(CONFIG_PARSER)

      def test_bad_config_absolute_path(self, config_parser):
@@ -119,7 +118,8 @@ 

          with self.assertRaises(koji.GenericError) as cm:

              runroot.RunRootTask(123, 'runroot', {}, session, options)

          self.assertEqual(cm.exception.args[0],

-             "bad config: all paths (default_mounts, safe_roots, path_subs) needs to be absolute: ")

+                          "bad config: all paths (default_mounts, safe_roots, path_subs) "

+                          "needs to be absolute: ")

  

      @mock.patch(CONFIG_PARSER)

      def test_valid_config(self, config_parser):
@@ -186,13 +186,14 @@ 

  

          # valid item

          self.assertEqual(self.t._get_path_params('/mnt/archive', 'rw'),

-             ('archive.org:/vol/archive/', '/mnt/archive', 'nfs', 'rw,hard,intr,nosuid,nodev,noatime,tcp'))

+                          ('archive.org:/vol/archive/', '/mnt/archive', 'nfs',

+                           'rw,hard,intr,nosuid,nodev,noatime,tcp'))

  

      @mock.patch('koji._open_text_file')

      @mock.patch('os.path.isdir')

      @mock.patch('runroot.log_output')

      def test_do_mounts(self, log_output, is_dir, open_mock):

-         log_output.return_value = 0 # successful mount

+         log_output.return_value = 0  # successful mount

  

          # no mounts, don't do anything

          self.t.logger = mock.MagicMock()
@@ -202,8 +203,7 @@ 

          # mountpoint has no absolute_path

          with self.assertRaises(koji.GenericError) as cm:

              self.t.do_mounts('rootdir', [('nfs:nfs', 'relative_path', 'nfs', '')])

-         self.assertEqual(cm.exception.args[0],

-                 "invalid mount point: relative_path")

+         self.assertEqual(cm.exception.args[0], "invalid mount point: relative_path")

  

          # cover missing opts

          self.t.do_mounts('rootdir', [('nfs:nfs', '/mnt/archive', 'nfs', None)])
@@ -212,23 +212,24 @@ 

          log_output.reset_mock()

          mounts = [self.t._get_path_params('/mnt/archive')]

          self.t.do_mounts('rootdir', mounts)

-         log_output.assert_called_once_with(self.session, 'mount',

-                 ['mount', '-t', 'nfs', '-o', 'ro,hard,intr,nosuid,nodev,noatime,tcp',

-                 'archive.org:/vol/archive/', 'rootdir/mnt/archive'],

-                 '/tmp/nonexistentdirectory/tasks/123/123/do_mounts.log',

-                 'tasks/123/123', append=True, logerror=True)

+         log_output.assert_called_once_with(

+             self.session, 'mount',

+             ['mount', '-t', 'nfs', '-o', 'ro,hard,intr,nosuid,nodev,noatime,tcp',

+              'archive.org:/vol/archive/', 'rootdir/mnt/archive'],

+             '/tmp/nonexistentdirectory/tasks/123/123/do_mounts.log', 'tasks/123/123',

+             append=True, logerror=True)

  

          # mount command failed

          log_output.reset_mock()

          log_output.return_value = 1

-         #self.t.undo_mounts = mock.MagicMock()

+         # self.t.undo_mounts = mock.MagicMock()

          mounts = [self.t._get_path_params('/mnt/archive')]

          with self.assertRaises(koji.GenericError) as cm:

              self.t.do_mounts('rootdir', mounts)

          self.assertEqual(cm.exception.args[0],

-             'Unable to mount rootdir/mnt/archive: mount -t nfs -o'

-             ' ro,hard,intr,nosuid,nodev,noatime,tcp archive.org:/vol/archive/'

-             ' rootdir/mnt/archive was killed by signal 1')

+                          'Unable to mount rootdir/mnt/archive: mount -t nfs -o'

+                          ' ro,hard,intr,nosuid,nodev,noatime,tcp archive.org:/vol/archive/'

+                          ' rootdir/mnt/archive was killed by signal 1')

  

          # bind ok

          log_output.return_value = 0
@@ -237,11 +238,12 @@ 

          mount[3] += ',bind'

          is_dir.return_value = True

          self.t.do_mounts('rootdir', [mount])

-         log_output.assert_called_once_with(self.session, 'mount',

-                 ['mount', '-t', 'none', '-o', 'ro,hard,intr,nosuid,nodev,noatime,tcp,bind',

-                 'archive.org:/vol/archive/', 'rootdir/mnt/archive'],

-                 '/tmp/nonexistentdirectory/tasks/123/123/do_mounts.log',

-                 'tasks/123/123', append=True, logerror=True)

+         log_output.assert_called_once_with(

+             self.session, 'mount',

+             ['mount', '-t', 'none', '-o', 'ro,hard,intr,nosuid,nodev,noatime,tcp,bind',

+              'archive.org:/vol/archive/', 'rootdir/mnt/archive'],

+             '/tmp/nonexistentdirectory/tasks/123/123/do_mounts.log', 'tasks/123/123',

+             append=True, logerror=True)

  

          # bind - target doesn't exist

          mount = list(self.t._get_path_params('/mnt/archive'))
@@ -250,7 +252,7 @@ 

          with self.assertRaises(koji.GenericError) as cm:

              self.t.do_mounts('rootdir', [mount])

          self.assertEqual(cm.exception.args[0],

-             "No such directory or mount: archive.org:/vol/archive/")

+                          "No such directory or mount: archive.org:/vol/archive/")

  

          # bg option forbidden

          log_output.reset_mock()
@@ -259,8 +261,7 @@ 

          is_dir.return_value = False

          with self.assertRaises(koji.GenericError) as cm:

              self.t.do_mounts('rootdir', [mount])

-         self.assertEqual(cm.exception.args[0],

-             "bad config: background mount not allowed")

+         self.assertEqual(cm.exception.args[0], "bad config: background mount not allowed")

  

      def test_do_extra_mounts(self):

          self.t.do_mounts = mock.MagicMock()
@@ -288,7 +289,6 @@ 

              self.t.do_extra_mounts('rootdir', ['/mnt/workdir/tmp/../xyz'])

          self.t.do_mounts.assert_not_called()

  

- 

      @mock.patch('koji._open_text_file')

      @mock.patch('runroot.scan_mounts')

      @mock.patch('os.unlink')
@@ -318,10 +318,11 @@ 

          with self.assertRaises(koji.GenericError) as cm:

              self.t.undo_mounts('rootdir')

          self.assertEqual(cm.exception.args[0],

-             'Unable to unmount: mountpoint: error, mount_2: error, mount_1: error')

+                          'Unable to unmount: mountpoint: error, mount_2: error, mount_1: error')

  

          os_unlink.assert_not_called()

  

+ 

  class TestHandler(unittest.TestCase):

      @mock.patch(CONFIG_PARSER)

      def setUp(self, config_parser):
@@ -335,7 +336,7 @@ 

  

          options = mock.MagicMock()

          options.workdir = '/tmp/nonexistentdirectory'

-         #options.topurl = 'http://topurl'

+         # options.topurl = 'http://topurl'

          options.topurls = None

          self.t = runroot.RunRootTask(123, 'runroot', {}, self.session, options)

          self.t.config['default_mounts'] = ['default_mount']
@@ -371,10 +372,9 @@ 

              'id': 1,

          }

          self.session.host.getHost.return_value = {'arches': 'x86_64'}

-         self.t.handler('tag_name', 'noarch', 'command', weight=10.0,

-                 repo_id=1, packages=['rpm_a', 'rpm_b'], new_chroot=True,

-                 mounts=['/mnt/a'], skip_setarch=True,

-                 upload_logs=['log_1', 'log_2'])

+         self.t.handler('tag_name', 'noarch', 'command', weight=10.0, repo_id=1,

+                        packages=['rpm_a', 'rpm_b'], new_chroot=True, mounts=['/mnt/a'],

+                        skip_setarch=True, upload_logs=['log_1', 'log_2'])

  

          # calls

          self.session.host.setTaskWeight.assert_called_once_with(self.t.id, 10.0)
@@ -382,15 +382,18 @@ 

          self.session.getBuildConfig.assert_called_once_with('tag_name')

          self.session.repoInfo.assert_called_once_with(1, strict=True)

          self.session.host.subtask.assert_not_called()

-         runroot.BuildRoot.assert_called_once_with(self.session, self.t.options,

-                 'tag_name', 'x86_64', self.t.id, repo_id=1, setup_dns=True,

-                 internal_dev_setup=None)

+         runroot.BuildRoot.assert_called_once_with(

+             self.session, self.t.options, 'tag_name', 'x86_64', self.t.id, repo_id=1,

+             setup_dns=True, internal_dev_setup=None)

          self.session.host.setBuildRootState.assert_called_once_with(678, 'BUILDING')

          self.br.mock.assert_has_calls([

              mock.call(['--install', 'rpm_a', 'rpm_b']),

-             mock.call(['--new-chroot', '--arch', 'arch', '--chroot', '--', '/bin/sh', '-c', '{ command; } < /dev/null 2>&1 | /usr/bin/tee /builddir/runroot.log; exit ${PIPESTATUS[0]}']),

+             mock.call(['--new-chroot', '--arch', 'arch', '--chroot', '--', '/bin/sh', '-c',

+                        '{ command; } < /dev/null 2>&1 | /usr/bin/tee /builddir/runroot.log; '

+                        'exit ${PIPESTATUS[0]}']),

          ])

-         self.session.host.updateBuildRootList.assert_called_once_with(678, self.br.getPackageList())

+         self.session.host.updateBuildRootList.assert_called_once_with(

+             678, self.br.getPackageList())

          self.t.do_mounts.assert_called_once_with('/rootdir', ['default_mount'])

          self.t.do_extra_mounts.assert_called_once_with('/rootdir', ['/mnt/a'])

          self.t.uploadFile.assert_has_calls([

Python 3 has utf-8 as a default encoding, so it is worth to add a comment why it is needed here. I assume that it is because that it is not working in case when there is C locale enabled? In such case it would turn to encoding='locale' thus using ascii instead of utf-8.

Metadata Update from @tkopecek:
- Pull-request tagged with: testing-ready

4 months ago

Also, some testing configs would be valuable here.

Dropping testing-ready, currently merge conflict with another testing-ready PR.

Metadata Update from @jcupova:
- Pull-request untagged with: testing-ready

4 months ago

Before we go about making these changes, do we have a replicator for #4191 ?

A casual attempt to replicate the complaint ("comment in the config files contains an unicode character"), does not seem to hit an error with the original code.

Metadata Update from @jcupova:
- Pull-request tagged with: testing-ready

3 months ago

Metadata Update from @jcupova:
- Pull-request untagged with: testing-ready

3 months ago

Metadata Update from @mfilip:
- Pull-request tagged with: testing-ready

3 months ago

rebased onto ad221fe

3 months ago

I've replicated it with such comment in web.conf:

# comment ěščřčž
[web]
SiteName = koji

KojiHubURL = http://koji-hub/kojihub
KojiFilesURL = http://localhost:8080/kojifiles
...

I've replicated it with such comment in web.conf:

I'm still unable to do so. What platform? What locale?

Can you replicate it with a plain call to config.read?

I've tried to make a unit test that manifests this and so far I have failed. The situation is complicated by the various places that we adjust locale in the unit tests without resetting it (and the the difficulty in sanely doing such a reset).

Ok, finally got it. almalinuxorg/8-base container. This hits it

import koji
import locale
locale.setlocale(locale.LC_ALL, 'C')
cfg = koji.read_config_files('test.cfg')

I thought I'd tried this permutation before. I must have done something odd.

Metadata Update from @mfilip:
- Pull-request tagged with: testing-done

3 months ago

additional unit test here -- https://pagure.io/fork/mikem/koji/commits/pr4214updates
(fails on rhel8 if I revert the fix)

1 new commit added

  • add unit test
3 months ago

Commit 0a7e76d fixes this pull-request

Pull-Request has been merged by tkopecek

3 months ago