#2254 openRemoteFile retries and checks downloaded content
Merged 2 years ago by tkopecek. Opened 2 years ago by tkopecek.
tkopecek/koji issue2130  into  master

file modified
-1
@@ -1148,7 +1148,6 @@ 

          opts = dict([(k, getattr(self.options, k)) for k in ('topurl', 'topdir')])

          opts['tempdir'] = self.workdir

          with koji.openRemoteFile(relpath, **opts) as fo:

-             koji.check_rpm_file(fo)

              h = koji.get_rpm_header(fo)

          if not koji.get_header_field(h, 'sourcepackage'):

              raise koji.BuildError("%s is not a source package" % srpm)

file modified
+50 -8
@@ -55,6 +55,8 @@ 

  import six.moves.configparser

  import six.moves.http_client

  import six.moves.urllib

+ from requests.adapters import HTTPAdapter

+ from requests.packages.urllib3.util.retry import Retry

  from six.moves import range, zip

  

  from koji.xmlrpcplus import Fault, dumps, getparser, loads, xmlrpc_client
@@ -1748,6 +1750,50 @@ 

      return rv

  

  

+ def request_with_retry(retries=3, backoff_factor=0.3,

+                        status_forcelist=(500, 502, 504, 408, 429), session=None):

+     # stolen from https://www.peterbe.com/plog/best-practice-with-retries-with-requests

+     session = session or requests.Session()

+     retry = Retry(total=retries, read=retries, connect=retries,

+                   backoff_factor=backoff_factor,

+                   status_forcelist=status_forcelist)

+     adapter = HTTPAdapter(max_retries=retry)

+     session.mount('http://', adapter)

+     session.mount('https://', adapter)

+     return session

+ 

+ 

+ def downloadFile(url, path=None, fo=None):

+     """download remote file.

+ 

+     :param str url: URL to download from

+     :param str path: relative path where to save the file

+     :param FileObject fo: if specified path will not be used (only for filetype

+                           detection)

+     """

+ 

+     if not fo:

+         fo = open(path, "w+b")

+ 

+     resp = request_with_retry().get(url, stream=True)

+     try:

+         for chunk in resp.iter_content(chunk_size=8192):

+             fo.write(chunk)

+     finally:

+         resp.close()

+     if resp.headers.get('Content-Length') and fo.tell() != int(resp.headers['Content-Length']):

+         raise GenericError("Downloaded file %s doesn't match expected size (%s vs %s)" %

+                            (url, fo.tell(), resp.headers['Content-Length']))

+     fo.seek(0)

+     if path and path.endswith('.rpm'):

+         # if it is an rpm run basic checks (assume that anything ending with the suffix,

+         # but not being rpm is suspicious anyway)

+         try:

+             check_rpm_file(fo)

+         except Exception as ex:

+             raise GenericError("Downloaded rpm %s is corrupted:\n%s" % (url, str(ex)))

+ 

+ 

  def openRemoteFile(relpath, topurl=None, topdir=None, tempdir=None):

      """Open a file on the main server (read-only)

  
@@ -1756,13 +1802,7 @@ 

      if topurl:

          url = "%s/%s" % (topurl, relpath)

          fo = tempfile.TemporaryFile(dir=tempdir)

-         resp = requests.get(url, stream=True)

-         try:

-             for chunk in resp.iter_content(chunk_size=8192):

-                 fo.write(chunk)

-         finally:

-             resp.close()

-         fo.seek(0)

+         downloadFile(url, path=relpath, fo=fo)

      elif topdir:

          fn = "%s/%s" % (topdir, relpath)

          fo = open(fn)
@@ -1790,13 +1830,15 @@ 

      """Check that the open file appears to be an rpm"""

      # TODO: trap exception and raise something with more infomation

      ts = rpm.TransactionSet()

+     # for basic validity we can ignore sigs as there needn't be public keys installed

+     ts.setVSFlags(rpm._RPMVSF_NOSIGNATURES)

      try:

          hdr = ts.hdrFromFdno(fo.fileno())

      except rpm.error as ex:

          raise GenericError("rpm's header can't be extracted: %s (rpm error: %s)" %

                             (fo.name, ', '.join(ex.args)))

      try:

-         rpm.TransactionSet().hdrCheck(hdr.unload())

+         ts.hdrCheck(hdr.unload())

      except rpm.error as ex:

          raise GenericError("rpm's header can't be checked: %s (rpm error: %s)" %

                             (fo.name, ', '.join(ex.args)))

file modified
+3 -10
@@ -28,7 +28,6 @@ 

  import signal

  import time

  

- import requests

  import six.moves.xmlrpc_client

  from six.moves import range

  
@@ -495,15 +494,9 @@ 

                  return fn

              self.logger.debug("Downloading %s", relpath)

              url = "%s/%s" % (self.options.topurl, relpath)

-             resp = requests.get(url, stream=True)

-             try:

-                 if not os.path.exists(os.path.dirname(fn)):

-                     os.makedirs(os.path.dirname(fn))

-                 with open(fn, 'wb') as fdst:

-                     for chunk in resp.iter_content(chunk_size=8192):

-                         fdst.write(chunk)

-             finally:

-                 resp.close()

+             if not os.path.exists(os.path.dirname(fn)):

+                 os.makedirs(os.path.dirname(fn))

+             koji.downloadFile(url, path=fn)

          else:

              fn = "%s/%s" % (self.options.topdir, relpath)

          return fn

@@ -161,6 +161,56 @@ 

              for m in mocks:

                  m.assert_not_called()

  

+         for m in mocks:

+             m.reset_mock()

+ 

+         # downloaded size is larger than content-length

+         with requests_mock.Mocker() as m_requests:

+             text = 'random content'

+             m_requests.register_uri('GET', url, text=text,

+                     headers = {'Content-Length': "3"})

+             m_TemporaryFile.return_value.tell.return_value = len(text)

+             with self.assertRaises(koji.GenericError):

+                 koji.openRemoteFile(path, topurl=topurl)

+             m_TemporaryFile.assert_called_once()

+             m_TemporaryFile.return_value.tell.assert_called()

+             m_open.assert_not_called()

+ 

+         for m in mocks:

+             m.reset_mock()

+ 

+         # downloaded size is shorter than content-length

+         with requests_mock.Mocker() as m_requests:

+             text = 'random content'

+             m_requests.register_uri('GET', url, text=text,

+                     headers = {'Content-Length': "100"})

+             m_TemporaryFile.return_value.tell.return_value = len(text)

+             with self.assertRaises(koji.GenericError):

+                 koji.openRemoteFile(path, topurl=topurl)

+             m_TemporaryFile.assert_called_once()

+             m_TemporaryFile.return_value.tell.assert_called()

+             m_open.assert_not_called()

+ 

+     def test_openRemoteFile_valid_rpm(self):

+         # downloaded file is correct rpm

+         with requests_mock.Mocker() as m_requests:

+             topurl = 'http://example.com/koji'

+             path = 'tests/test_lib/data/rpms/test-src-1-1.fc24.src.rpm'

+             url = os.path.join(topurl, path)

+             m_requests.register_uri('GET', url, body=open(path, 'rb'))

+             #with self.assertRaises(koji.GenericError):

+             koji.openRemoteFile(path, topurl=topurl)

+ 

+     def test_openRemoteFile_invalid_rpm(self):

+         # downloaded file is correct rpm

+         with requests_mock.Mocker() as m_requests:

+             topurl = 'http://example.com/koji'

+             path = 'file.rpm'

+             url = os.path.join(topurl, path)

+             m_requests.register_uri('GET', url, text='123')

+             with self.assertRaises(koji.GenericError):

+                 koji.openRemoteFile(path, topurl=topurl)

+ 

      def test_joinpath_bad(self):

          bad_joins = [

              ['/foo', '../bar'],

Metadata Update from @tkopecek:
- Pull-request tagged with: testing-ready

2 years ago

2 new commits added

  • lib: check consistency of rpm from openRemoteFile
  • lib: retry openRemoteFile and check size
2 years ago

rebased onto eb3788a63741c072db8d8e69465ff96dc0d9c55f

2 years ago

please see also ticket #2282 , tasks.py/localPath() suffers from the same issues as openRemoteFile()

1 new commit added

  • use downloadFile for localPath and openRemoteFile
2 years ago

rebased onto a408e9dc046c57bc18175257ba5ac3166ec5df4d

2 years ago

Right, I've forced them to use same downloadFile

OK, I am "stuck" at 1.20.1 level, because it's what's on the Fedora builders :-)

Metadata Update from @julian8628:
- Pull-request untagged with: testing-ready

2 years ago

Metadata Update from @tkopecek:
- Pull-request tagged with: testing-ready

2 years ago

rebased onto ac1d21a

2 years ago

Metadata Update from @mfilip:
- Pull-request tagged with: testing-done

2 years ago

pretty please pagure-ci rebuild

2 years ago

1 new commit added

  • fix flake8 issues
2 years ago

Commit 7eacf6b fixes this pull-request

Pull-Request has been merged by tkopecek

2 years ago

first catch on buildvm-s390x-08

2020-06-05 18:38:43,933 [INFO] koji.TaskManager: Attempting to take task 45461806
2020-06-05 18:38:47,814 [INFO] koji.TaskManager: pids: {45461806: 2563043}
2020-06-05 18:38:47,919 [INFO] koji.TaskManager: open task: {'waiting': None, 'id': 45461806, 'weight': 1.5}
2020-06-05 18:38:57,683 [WARNING] koji.TaskManager: TRACEBACK: Traceback (most recent call last):
  File "/usr/lib/python3.7/site-packages/koji/daemon.py", line 1294, in runTask
    response = (handler.run(),)
  File "/usr/lib/python3.7/site-packages/koji/tasks.py", line 312, in run
    return koji.util.call_with_argcheck(self.handler, self.params, self.opts)
  File "/usr/lib/python3.7/site-packages/koji/util.py", line 263, in call_with_argcheck
    return func(*args, **kwargs)
  File "/usr/sbin/kojid", line 1338, in handler
    fn = self.localPath("work/%s" % pkg)
  File "/usr/lib/python3.7/site-packages/koji/tasks.py", line 482, in localPath
    koji.downloadFile(url, path=fn)
  File "/usr/lib/python3.7/site-packages/koji/__init__.py", line 1706, in downloadFile
    (url, fo.tell(), resp.headers['Content-Length']))
koji.GenericError: Downloaded file http://kojipkgs-cache01.s390.fedoraproject.org/work/tasks/1775/45461775/espresso-4.1.2-4.fc33.src.rpm doesn't match expected size (22837386 vs 22934903)

2020-06-05 18:39:07,146 [INFO] koji.TaskManager: pids: {45461806: 2563043}
2020-06-05 18:39:07,255 [INFO] koji.TaskManager: Task 45461806 (pid 2563043) exited with status 0
2020-06-05 18:39:07,263 [INFO] koji.TaskManager: Expiring subsession 102577734 (task 45461806)

The good news is that the user gets a accurate information about why the build failed.