#734 hub: add strict behavior in `get_archive_file()` and `list_archive_files()`
Merged 6 years ago by mikem. Opened 6 years ago by julian8628.
julian8628/koji issue/719  into  master

file modified
+47 -24
@@ -24,42 +24,42 @@ 

  

  import base64

  import calendar

- import urlparse

- import koji

- import koji.auth

- import koji.db

- import koji.plugin

- import koji.policy

  import datetime

  import errno

- import logging

  import fcntl

  import fnmatch

- from koji.util import md5_constructor

- from koji.util import sha1_constructor

- from koji.util import dslice

- from koji.util import multi_fnmatch

- from koji.util import safer_move

+ import functools

  import json

+ import logging

  import os

  import re

- import rpm

  import shutil

  import stat

  import subprocess

  import sys

  import tarfile

  import tempfile

- import traceback

  import time

+ import traceback

+ import urlparse

  import xmlrpclib

  import zipfile

- import functools

+ 

+ import rpm

  import six

  

+ import koji

+ import koji.auth

+ import koji.db

+ import koji.plugin

+ import koji.policy

  import koji.xmlrpcplus

  from koji.context import context

- 

+ from koji.util import dslice

+ from koji.util import md5_constructor

+ from koji.util import multi_fnmatch

+ from koji.util import safer_move

+ from koji.util import sha1_constructor

  

  logger = logging.getLogger('koji.hub')

  
@@ -4283,7 +4283,7 @@ 

      archive.close()

      return result

  

- def list_archive_files(archive_id, queryOpts=None):

+ def list_archive_files(archive_id, queryOpts=None, strict=False):

      """

      Get information about the files contained in the archive with the given ID.

      Returns a list of maps with with following keys:
@@ -4291,6 +4291,12 @@ 

      archive_id: id of the archive the file is contained in (integer)

      name: name of the file (string)

      size: uncompressed size of the file (integer)

+ 

+     If strict is True, raise GenericError if:

+       - build btype of this archive belong to is not maven, win or image

+       - archive_type is not that we are able to expand

+ 

+     Regardless of strict, an error will be raised if the archive_id is invalid

      """

      archive_info = get_archive(archive_id, strict=True)

  
@@ -4316,17 +4322,25 @@ 

          file_path = os.path.join(koji.pathinfo.imagebuild(build_info),

                                   archive_info['filename'])

      else:

+         # TODO: support other build types

+         if strict:

+             raise koji.GenericError("Unsupported build type")

          return _applyQueryOpts([], queryOpts)

  

      if archive_type['name'] in ('zip', 'jar'):

-         return _applyQueryOpts(_get_zipfile_list(archive_id, file_path), queryOpts)

+         filelist = _get_zipfile_list(archive_id, file_path)

      elif archive_type['name'] == 'tar':

-         return _applyQueryOpts(_get_tarball_list(archive_id, file_path), queryOpts)

+         filelist = _get_tarball_list(archive_id, file_path)

      else:

-         # XXX support other archive types

-         return _applyQueryOpts([], queryOpts)

+         # TODO: support other archive types

+         if strict:

+             raise koji.GenericError(

+                 "Unsupported archive type: %s" % archive_type['name'])

+         filelist = []

+     return _applyQueryOpts(filelist, queryOpts)

+ 

  

- def get_archive_file(archive_id, filename):

+ def get_archive_file(archive_id, filename, strict=False):

      """

      Get information about a file with the given filename

      contained in the archive with the given ID.
@@ -4335,14 +4349,23 @@ 

      archive_id: id of the archive the file is contained in (integer)

      name: name of the file (string)

      size: uncompressed size of the file (integer)

+ 

+     If strict is True, raise GenericError if:

+       - this file is not found in the archive

+       - build btype of this archive belong to is not maven, win or image

+       - archive_type is not that we are able to expand

+ 

+     Regardless of strict, an error will be raised if the archive_id is invalid

      """

-     files = list_archive_files(archive_id)

+     files = list_archive_files(archive_id, strict=strict)

      for file_info in files:

          if file_info['name'] == filename:

              return file_info

-     #otherwise

+     if strict:

+         raise koji.GenericError('No such file: %s in archive#%s' % (filename, archive_id))

      return None

  

+ 

  def list_task_output(taskID, stat=False, all_volumes=False):

      """List the files generated by the task with the given ID.  This

      will usually include one or more RPMs, and one or more log files.

@@ -0,0 +1,61 @@ 

+ import unittest

+ import mock

+ 

+ import koji

+ import kojihub

+ 

+ FILES = [{'archive_id': 1,

+           'name': 'archive1.zip',

+           'size': 1024},

+          {'archive_id': 1,

+           'name': 'archive2.jar',

+           'size': 4096}]

+ 

+ EMPTY_FILES = []

+ 

+ 

+ class TestGetArchiveFile(unittest.TestCase):

+ 

+     @mock.patch('kojihub.list_archive_files')

+     def test_simple(self, list_archive_files):

+         list_archive_files.return_value = FILES

+ 

+         rv = kojihub.get_archive_file(1, 'archive1.zip')

+         list_archive_files.assert_called_with(1, strict=False)

+         self.assertEqual(rv, FILES[0])

+ 

+         list_archive_files.reset_mock()

+         rv = kojihub.get_archive_file(1, 'archive1.zip', strict=True)

+         list_archive_files.assert_called_with(1, strict=True)

+         self.assertEqual(rv, FILES[0])

+ 

+     @mock.patch('kojihub.list_archive_files')

+     def test_empty_files(self, list_archive_files):

+         list_archive_files.return_value = EMPTY_FILES

+ 

+         rv = kojihub.get_archive_file(1, 'archive1.zip')

+         list_archive_files.assert_called_with(1, strict=False)

+         self.assertIsNone(rv)

+ 

+         list_archive_files.reset_mock()

+         list_archive_files.side_effect = koji.GenericError('error message')

+ 

+         with self.assertRaises(koji.GenericError) as cm:

+             kojihub.get_archive_file(1, 'archive1.zip', strict=True)

+         list_archive_files.assert_called_with(1, strict=True)

+         self.assertEqual(cm.exception.args[0], 'error message')

+ 

+     @mock.patch('kojihub.list_archive_files')

+     def test_non_existing_file(self, list_archive_files):

+         list_archive_files.return_value = FILES

+ 

+         rv = kojihub.get_archive_file(1, 'archive3.xml')

+         list_archive_files.assert_called_with(1, strict=False)

+         self.assertEqual(rv, None)

+ 

+         list_archive_files.reset_mock()

+ 

+         with self.assertRaises(koji.GenericError) as cm:

+             kojihub.get_archive_file(1, 'archive3.xml', strict=True)

+         list_archive_files.assert_called_with(1, strict=True)

+         self.assertEqual(cm.exception.args[0], 'No such file: archive3.xml in archive#1')

@@ -0,0 +1,159 @@ 

+ import unittest

+ 

+ import mock

+ 

+ import koji

+ import kojihub

+ 

+ GET_ARCHIVE_RV = {'id': 1, 'build_id': 2, 'type_id': 3,

+                   'filename': 'somearchive.zip'}

+ GET_ARCHIVE_TYPE_RV = {'id': 3, 'name': 'zip'}

+ GET_BUILD_RV = {'id': 2, 'name': 'somebuild', 'version': '1.2.3',

+                 'release': '1.el6', 'volume_name': 'archive_vol'}

+ 

+ 

+ class TestListArchiveFiles(unittest.TestCase):

+     def setUp(self):

+         self.mm = mock.MagicMock()

+         self.mm.get_image_build = mock.patch('kojihub.get_image_build',

+                                              return_value=None).start()

+         self.mm.get_win_build = mock.patch('kojihub.get_win_build',

+                                            return_value=None).start()

+         self.mm.get_maven_build = mock.patch('kojihub.get_maven_build',

+                                              return_value=None).start()

+         self.mm.get_build = mock.patch('kojihub.get_build',

+                                        return_value=GET_BUILD_RV).start()

+         self.mm.get_archive_type = mock.patch('kojihub.get_archive_type',

+                                               return_value=GET_ARCHIVE_TYPE_RV).start()

+         self.mm.get_archive = mock.patch('kojihub.get_archive',

+                                          return_value=GET_ARCHIVE_RV).start()

+ 

+     def tearDown(self):

+         mock.patch.stopall()

+ 

+     def test_simple(self):

+         rv = kojihub.list_archive_files(1)

+         self.mm.get_archive.assert_called_once_with(1, strict=True)

+         self.mm.get_archive_type.assert_called_once_with(type_id=3,

+                                                          strict=True)

+         self.mm.get_build.assert_called_once_with(2, strict=True)

+         self.mm.get_maven_build.assert_called_once_with(2)

+         self.mm.get_win_build.assert_called_once_with(2)

+         self.mm.get_image_build.assert_called_once_with(2)

+         self.assertListEqual(rv, [])

+ 

+     @mock.patch('kojihub.get_maven_archive',

+                 return_value={'archive_id': 1,

+                               'group_id': 'gid',

+                               'artifact_id': 'aid',

+                               'version': '1.0.0'})

+     @mock.patch('kojihub._get_zipfile_list', return_value=[])

+     def test_simple_strict_empty(self, get_zipfile_list, get_maven_archive):

+         self.mm.get_maven_build.return_value = {'build_id': 2,

+                                                 'group_id': 'gid',

+                                                 'artifact_id': 'aid',

+                                                 'version': '1.0.0'}

+         rv = kojihub.list_archive_files(1, strict=True)

+         self.assertListEqual(rv, [])

+ 

+     def test_simple_strict_bad_btype(self):

+         with self.assertRaises(koji.GenericError) as cm:

+             kojihub.list_archive_files(1, strict=True)

+         self.assertEqual(cm.exception.args[0], "Unsupported build type")

+ 

+     @mock.patch('kojihub.get_maven_archive',

+                 return_value={'archive_id': 1,

+                               'group_id': 'gid',

+                               'artifact_id': 'aid',

+                               'version': '1.0.0'})

+     def test_simple_strict_bad_archive_type(self, get_maven_archive):

+         self.mm.get_archive_type.return_value = {'id': 9, 'name': 'txt'}

+         self.mm.get_maven_build.return_value = {'build_id': 2,

+                                                 'group_id': 'gid',

+                                                 'artifact_id': 'aid',

+                                                 'version': '1.0.0'}

+         with self.assertRaises(koji.GenericError) as cm:

+             kojihub.list_archive_files(1, strict=True)

+         self.assertEqual(cm.exception.args[0], "Unsupported archive type: txt")

+ 

+     @mock.patch('kojihub.get_maven_archive',

+                 return_value={'archive_id': 1,

+                               'group_id': 'gid',

+                               'artifact_id': 'aid',

+                               'version': '1.0.0'})

+     @mock.patch('kojihub._get_zipfile_list', return_value=[{'archive_id': 1,

+                                                             'name': 'file1',

+                                                             'size': 4096,

+                                                             'mtime': 1000},

+                                                            {'archive_id': 1,

+                                                             'name': 'file2',

+                                                             'size': 512000,

+                                                             'mtime': 103420},

+                                                            ])

+     def test_maven_archive(self, get_zipfile_list, get_maven_archive):

+         self.mm.get_maven_build.return_value = {'build_id': 2,

+                                                 'group_id': 'gid',

+                                                 'artifact_id': 'aid',

+                                                 'version': '1.0.0'}

+         rv = kojihub.list_archive_files(1)

+         get_maven_archive.assert_called_once_with(1, strict=True)

+         get_zipfile_list.assert_called_once_with(1,

+                                                  '/mnt/koji/vol/archive_vol/packages'

+                                                  '/somebuild/1.2.3/1.el6/maven/gid/aid/1.0.0/somearchive.zip')

+         self.assertListEqual(rv, [

+             {'archive_id': 1, 'mtime': 1000, 'name': 'file1', 'size': 4096},

+             {'archive_id': 1, 'mtime': 103420, 'name': 'file2',

+              'size': 512000}])

+ 

+     @mock.patch('kojihub.get_win_archive', return_value={'archive_id': 1,

+                                                          'relpath': 'rpath',

+                                                          'platform': 'all',

+                                                          'version': 'src'})

+     @mock.patch('kojihub._get_zipfile_list', return_value=[{'archive_id': 1,

+                                                             'name': 'file1',

+                                                             'size': 4096,

+                                                             'mtime': 1000},

+                                                            {'archive_id': 1,

+                                                             'name': 'file2',

+                                                             'size': 512000,

+                                                             'mtime': 103420},

+                                                            ])

+     def test_win_archive(self, get_zipfile_list, get_win_archive):

+         self.mm.get_win_build.return_value = {'build_id': 2,

+                                               'platform': 'all'}

+         rv = kojihub.list_archive_files(1)

+         get_win_archive.assert_called_once_with(1, strict=True)

+         get_zipfile_list.assert_called_once_with(1,

+                                                  '/mnt/koji/vol/archive_vol/packages'

+                                                  '/somebuild/1.2.3/1.el6/win/rpath/somearchive.zip')

+         self.assertListEqual(rv, [

+             {'archive_id': 1, 'mtime': 1000, 'name': 'file1', 'size': 4096},

+             {'archive_id': 1, 'mtime': 103420, 'name': 'file2',

+              'size': 512000}])

+ 

+     @mock.patch('kojihub.get_image_archive', return_value={'archive_id': 1,

+                                                            'arch': 'noarch'})

+     @mock.patch('kojihub._get_tarball_list', return_value=[{'archive_id': 1,

+                                                             'name': 'file1',

+                                                             'size': 4096,

+                                                             'mtime': 1000,

+                                                             'mode': '0755',

+                                                             'user': 1000,

+                                                             'group': 1000},

+                                                            {'archive_id': 1,

+                                                             'name': 'file2',

+                                                             'size': 512000,

+                                                             'mtime': 103420,

+                                                             'mode': '0644',

+                                                             'user': 1001,

+                                                             'group': 1001}

+                                                            ])

+     def test_image_archive(self, get_tarball_list, get_image_archive):

+         self.mm.get_archive_type.return_value = {'id': 3, 'name': 'tar'}

+         self.mm.get_image_build.return_value = {'build_id': 2}

+         rv = kojihub.list_archive_files(1, queryOpts={'countOnly': True})

+         get_image_archive.assert_called_once_with(1, strict=True)

+         get_tarball_list.assert_called_once_with(1,

+                                                  '/mnt/koji/vol/archive_vol/packages'

+                                                  '/somebuild/1.2.3/1.el6/images/somearchive.zip')

+         self.assertEqual(rv, 2)

fixes: #719

Changing the behavior to raise GenericError when no result returns might bring some unexpected problems. So here I just add strict argument like other apis

rebased onto abadd42c2a7d90d34136f668de8b2e150bcfa0fb

6 years ago

TODO: need to add test case for list_archive_files

1 new commit added

  • hub: unittest for list_archive_files()
6 years ago

unit test done.
@mikem @tkopecek @jcupova could you help review this?

If strict is used, previous line will throw GenericError, so this seems to be unreachable block.

If there are some files, but filename doesn't match, it will not end here (nevertheless, return default None at the end of method). I would kill this whole subblock (else/else) and just return None as it is in original code.

If strict is used, previous line will throw GenericError, so this seems to be unreachable block.

If there're one or more than one item returned from files = list_archive_files(archive_id, strict=strict), but no one matched the filename the GenericError will be thrown here, like the test case test_non_existing_file()

If there are some files, but filename doesn't match, it will not end here (nevertheless, return default None at the end of method). I would kill this whole subblock (else/else) and just return None as it is in original code.

Ah yes, the last else block is not necessary. Thanks to point it out.

Sorry, I wasn't clear in original comment. What I'm suggesting is simplification to (no change of logic):

    for file_info in files:
        if file_info['name'] == filename:
            return file_info
    if strict:
        raise koji.GenericError('No such file: %s in archive#%s' % (filename, archive_id))
    return None

rebased onto 6c663e07c4731dc7c8bb1ba835bd444001711e8b

6 years ago

Sorry, I wasn't clear in original comment. What I'm suggesting is simplification to (no change of logic):

Updated :wine_glass:

Can you file an issue for the TODO that you added?

In the final else: case in list_archive_files() we are applying a filter to a list that we know to be empty. Perhaps this can be streamlined. Or am I missing something?

nitpick: why escape a single quote when easily avoid it by using double quotes?

long line and grammar a touch awkward, maybe:

If strict is True, raise GenericError if:
  - there are no files found for the archive
  - the archive is not a type we are able to expand

Might also be worth adding this note too:

Regardless of strict, an error will be raised if the archive_id is invalid

So this is going beyond issue #719, which only asks for getArchiveFile to raise errors when the requested filename does not exist in the archive. I'm not convinced that the strict option is really needed in list_archive_files().

On one hand, it's just optional, but... I mean, technically if we are asked to list the files in an empty archive, then [] is the correct answer and probably shouldn't be an error.

In the final else: case in list_archive_files() we are applying a filter to a list that we know to be empty. Perhaps this can be streamlined. Or am I missing something?

how about https://pagure.io/fork/julian8628/koji/c/a990887a6ebcbbb95d97842f6527133f3f589ce8?branch=issue%2F719-2

rebased onto 98237e5dd20570443ac21bb537e87dd83b870c53

6 years ago

So this is going beyond issue #719, which only asks for getArchiveFile to raise errors when the requested filename does not exist in the archive. I'm not convinced that the strict option is really needed in list_archive_files().
On one hand, it's just optional, but... I mean, technically if we are asked to list the files in an empty archive, then [] is the correct answer and probably shouldn't be an error.

I made list_archive_files() more strict to throw an error when it's an empty archive, since when calling getArchiveFile(archive_id, filename, strict=True) the error message for empty archive and for non-existing filename in archive filelist should be different. Putting the empty result checking in list_archive_files() seems more straight, and it goes with the same way as issue #721. These two issues are the first time to bring strict argument to list* APIs. I guess way could think them again about if it's meaningful to throw errors when result is an empty list.

My original idea is, to avoid change the default behavior and to make it unnecessary to let client check the result again, this option might be helpful.

In the final else: case in list_archive_files() we are applying a filter to a list that we know to be empty. Perhaps this can be streamlined. Or am I missing something?

how about https://pagure.io/fork/julian8628/koji/c/a990887a6ebcbbb95d97842f6527133f3f589ce8?branch=issue%2F719-2

Yes, that looks better. Except, I don't think we should error on the empty archive case.

Sure, with strict, we can error on btypes or archivetypes that we can't handle. However, if we get to the end and we have an actual filelist from an actual archive that happens to be empty, then this is not an error condition, even with strict on.

I made list_archive_files() more strict to throw an error when it's an empty archive, since when calling getArchiveFile(archive_id, filename, strict=True) the error message for empty archive and for non-existing filename in archive filelist should be different.

There should be no error message for empty archives at all. It is simply not an error. It is correct information, and we have no reason to assume that the archive shouldn't be empty.

Putting the empty result checking in list_archive_files() seems more straight, and it goes with the same way as issue #721. These two issues are the first time to bring strict argument to list* APIs. I guess way could think them again about if it's meaningful to throw errors when result is an empty list.

For calls like get_build, where the input is expected to determine a single valid build, the strict option is a way for the caller to ensure they have a sane value. That doesn't really apply for empty archives here.

My original idea is, to avoid change the default behavior and to make it unnecessary to let client check the result again, this option might be helpful.

Let's put it in a different light. The strict options are generally a convenience for the caller, so that they don't have to put a separate assertion in their own code. They could always just implement strict themselves.

So, in what cases would a caller want to raise an error simply because an archive has no files?

In general, I think the change to get_archive_file is fine, but that the value of the rest is less clear and out of scope of the issue. I'm tempted to say split this up.

Sure, with strict, we can error on btypes or archivetypes that we can't handle. However, if we get to the end and we have an actual filelist from an actual archive that happens to be empty, then this is not an error condition, even with strict on.

Yes, it makes sense. Updated.
Should I move the changes in list_archive_files to a new PR?

rebased onto ef89efe

6 years ago

Should I move the changes in list_archive_files to a new PR?

I think it's ok now. Going to merge with minor docstring tweak

Commit c344d85 fixes this pull-request

Pull-Request has been merged by mikem

6 years ago