#17 Lookaside: encoding repo name to UTF-8
Closed 8 years ago by lsedlar. Opened 8 years ago by araszka.
araszka/rpkg bz1241059  into  master

file modified
+5 -2
@@ -387,7 +387,6 @@ 

  

      def load_push_url(self):

          """Find the pushurl or url of remote of branch we're on."""

- 

          try:

              url = self.repo.git.config('--get', 'remote.%s.pushurl'

                                         % self.branch_remote)
@@ -397,7 +396,11 @@ 

                                             % self.branch_remote)

              except git.GitCommandError as e:

                  raise rpkgError('Unable to find remote push url: %s' % e)

-         self._push_url = url

+         if isinstance(url, unicode):

+             # GitPython >= 1.0 return unicode. It must be encoded to string.

+             self._push_url = url.encode('utf-8')

+         else:

+             self._push_url = url

  

      @property

      def commithash(self):

@@ -0,0 +1,24 @@ 

+ import os

+ 

+ from . import CommandTestCase

+ 

+ 

+ class CommandPackageNameTestCase(CommandTestCase):

+     def test_name_is_not_unicode(self):

+         self.make_new_git(self.module)

+ 

+         import pyrpkg

+         cmd = pyrpkg.Commands(self.path, self.lookaside, self.lookasidehash,

+                               self.lookaside_cgi, self.gitbaseurl,

+                               self.anongiturl, self.branchre, self.kojiconfig,

+                               self.build_client, self.user, self.dist,

+                               self.target, self.quiet)

+         cmd.clone(self.module, anon=True)

+ 

+         moduledir = os.path.join(self.path, self.module)

+         cmd.path = moduledir

+ 

+         # pycurl can't handle unicode variable

+         # module_name needs to be string

+         self.assertNotEquals(type(cmd.module_name), unicode)

+         self.assertEquals(type(cmd.module_name), str)

no initial comment

I don't like the proposed change:
- Find where the Unicode string firstly appeared and convert it there.
- Sources have unit tests so write a test to cover this issue

I would much prefer to only encode if url is actually a unicode instance. As is, the code would crash if someone has older GitPython (that returns strs), and the url actually contains non-ascii characters.

>>> "ščř".encode('utf-8')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 0: ordinal not in range(128)

I have rebase the patch on master and merged.