From ac1d21ab1018966d765bc18639460ead6b96bf2a Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Jun 02 2020 07:58:13 +0000 Subject: [PATCH 1/5] lib: retry openRemoteFile and check size --- diff --git a/koji/__init__.py b/koji/__init__.py index 4dc8c7d..bf39ea9 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -55,6 +55,8 @@ import six import six.moves.configparser import six.moves.http_client import six.moves.urllib +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry from six.moves import range, zip from koji.xmlrpcplus import Fault, dumps, getparser, loads, xmlrpc_client @@ -1748,6 +1750,18 @@ def format_exc_plus(): return rv +def request_with_retry(retries=3, backoff_factor=0.3, + status_forcelist=(500, 502, 504, 408, 429), session=None): + # stolen from https://www.peterbe.com/plog/best-practice-with-retries-with-requests + session = session or requests.Session() + retry = Retry(total=retries, read=retries, connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + return session + def openRemoteFile(relpath, topurl=None, topdir=None, tempdir=None): """Open a file on the main server (read-only) @@ -1756,12 +1770,15 @@ def openRemoteFile(relpath, topurl=None, topdir=None, tempdir=None): if topurl: url = "%s/%s" % (topurl, relpath) fo = tempfile.TemporaryFile(dir=tempdir) - resp = requests.get(url, stream=True) + resp = request_with_retry().get(url, stream=True) try: for chunk in resp.iter_content(chunk_size=8192): fo.write(chunk) finally: resp.close() + if resp.headers.get('Content-Length') and fo.tell() != int(resp.headers['Content-Length']): + raise GenericError("Downloaded file %s doesn't match expected size (%s vs %s)" % + (url, fo.tell(), resp.headers['Content-Length'])) fo.seek(0) elif topdir: fn = "%s/%s" % (topdir, relpath) diff --git a/tests/test_lib/test_utils.py b/tests/test_lib/test_utils.py index c2b2e21..7ec1b22 100644 --- a/tests/test_lib/test_utils.py +++ b/tests/test_lib/test_utils.py @@ -161,6 +161,38 @@ class MiscFunctionTestCase(unittest.TestCase): for m in mocks: m.assert_not_called() + for m in mocks: + m.reset_mock() + + # downloaded size is larger than content-length + with requests_mock.Mocker() as m_requests: + text = 'random content' + m_requests.register_uri('GET', url, text=text, + headers = {'Content-Length': "3"}) + m_TemporaryFile.return_value.tell.return_value = len(text) + # using neither + with self.assertRaises(koji.GenericError): + koji.openRemoteFile(path, topurl=topurl) + m_TemporaryFile.assert_called_once() + m_TemporaryFile.return_value.tell.assert_called() + m_open.assert_not_called() + + for m in mocks: + m.reset_mock() + + # downloaded size is shorter than content-length + with requests_mock.Mocker() as m_requests: + text = 'random content' + m_requests.register_uri('GET', url, text=text, + headers = {'Content-Length': "100"}) + m_TemporaryFile.return_value.tell.return_value = len(text) + # using neither + with self.assertRaises(koji.GenericError): + koji.openRemoteFile(path, topurl=topurl) + m_TemporaryFile.assert_called_once() + m_TemporaryFile.return_value.tell.assert_called() + m_open.assert_not_called() + def test_joinpath_bad(self): bad_joins = [ ['/foo', '../bar'], From 95f72a6983b030622f7c03a3c3e0c9b19ad69465 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Jun 02 2020 07:58:13 +0000 Subject: [PATCH 2/5] lib: check consistency of rpm from openRemoteFile Fixes: https://pagure.io/koji/issue/2130 --- diff --git a/builder/kojid b/builder/kojid index d450e05..acd0c99 100755 --- a/builder/kojid +++ b/builder/kojid @@ -1148,7 +1148,6 @@ class BuildTask(BaseTaskHandler): opts = dict([(k, getattr(self.options, k)) for k in ('topurl', 'topdir')]) opts['tempdir'] = self.workdir with koji.openRemoteFile(relpath, **opts) as fo: - koji.check_rpm_file(fo) h = koji.get_rpm_header(fo) if not koji.get_header_field(h, 'sourcepackage'): raise koji.BuildError("%s is not a source package" % srpm) diff --git a/koji/__init__.py b/koji/__init__.py index bf39ea9..6f247f5 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -1751,17 +1751,18 @@ def format_exc_plus(): def request_with_retry(retries=3, backoff_factor=0.3, - status_forcelist=(500, 502, 504, 408, 429), session=None): + status_forcelist=(500, 502, 504, 408, 429), session=None): # stolen from https://www.peterbe.com/plog/best-practice-with-retries-with-requests session = session or requests.Session() retry = Retry(total=retries, read=retries, connect=retries, - backoff_factor=backoff_factor, - status_forcelist=status_forcelist) + backoff_factor=backoff_factor, + status_forcelist=status_forcelist) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session + def openRemoteFile(relpath, topurl=None, topdir=None, tempdir=None): """Open a file on the main server (read-only) @@ -1780,6 +1781,13 @@ def openRemoteFile(relpath, topurl=None, topdir=None, tempdir=None): raise GenericError("Downloaded file %s doesn't match expected size (%s vs %s)" % (url, fo.tell(), resp.headers['Content-Length'])) fo.seek(0) + if relpath.endswith('.rpm'): + # if it is an rpm run basic checks (assume that anything ending with the suffix, + # but not being rpm is suspicious anyway) + try: + check_rpm_file(fo) + except Exception as ex: + raise GenericError("Downloaded rpm %s is corrupted:\n%s" % (url, str(ex))) elif topdir: fn = "%s/%s" % (topdir, relpath) fo = open(fn) diff --git a/tests/test_lib/test_utils.py b/tests/test_lib/test_utils.py index 7ec1b22..c252ee0 100644 --- a/tests/test_lib/test_utils.py +++ b/tests/test_lib/test_utils.py @@ -170,7 +170,6 @@ class MiscFunctionTestCase(unittest.TestCase): m_requests.register_uri('GET', url, text=text, headers = {'Content-Length': "3"}) m_TemporaryFile.return_value.tell.return_value = len(text) - # using neither with self.assertRaises(koji.GenericError): koji.openRemoteFile(path, topurl=topurl) m_TemporaryFile.assert_called_once() @@ -186,13 +185,32 @@ class MiscFunctionTestCase(unittest.TestCase): m_requests.register_uri('GET', url, text=text, headers = {'Content-Length': "100"}) m_TemporaryFile.return_value.tell.return_value = len(text) - # using neither with self.assertRaises(koji.GenericError): koji.openRemoteFile(path, topurl=topurl) m_TemporaryFile.assert_called_once() m_TemporaryFile.return_value.tell.assert_called() m_open.assert_not_called() + def test_openRemoteFile_valid_rpm(self): + # downloaded file is correct rpm + with requests_mock.Mocker() as m_requests: + topurl = 'http://example.com/koji' + path = 'tests/test_lib/data/rpms/test-src-1-1.fc24.src.rpm' + url = os.path.join(topurl, path) + m_requests.register_uri('GET', url, body=open(path, 'rb')) + #with self.assertRaises(koji.GenericError): + koji.openRemoteFile(path, topurl=topurl) + + def test_openRemoteFile_invalid_rpm(self): + # downloaded file is correct rpm + with requests_mock.Mocker() as m_requests: + topurl = 'http://example.com/koji' + path = 'file.rpm' + url = os.path.join(topurl, path) + m_requests.register_uri('GET', url, text='123') + with self.assertRaises(koji.GenericError): + koji.openRemoteFile(path, topurl=topurl) + def test_joinpath_bad(self): bad_joins = [ ['/foo', '../bar'], From e07e03e75199d979e1e7419baee139931579dbcf Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Jun 02 2020 07:58:13 +0000 Subject: [PATCH 3/5] use downloadFile for localPath and openRemoteFile --- diff --git a/koji/__init__.py b/koji/__init__.py index 6f247f5..0ded965 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -1763,6 +1763,36 @@ def request_with_retry(retries=3, backoff_factor=0.3, return session +def downloadFile(url, path=None, fo=None): + """download remote file. + + :param str url: URL to download from + :param str path: relative path where to save the file + :param FileObject fo: if specified path will not be used (only for filetype + detection) + """ + + if not fo: + fo = open(path, "wb") + + resp = request_with_retry().get(url, stream=True) + try: + for chunk in resp.iter_content(chunk_size=8192): + fo.write(chunk) + finally: + resp.close() + if resp.headers.get('Content-Length') and fo.tell() != int(resp.headers['Content-Length']): + raise GenericError("Downloaded file %s doesn't match expected size (%s vs %s)" % + (url, fo.tell(), resp.headers['Content-Length'])) + fo.seek(0) + if path and path.endswith('.rpm'): + # if it is an rpm run basic checks (assume that anything ending with the suffix, + # but not being rpm is suspicious anyway) + try: + check_rpm_file(fo) + except Exception as ex: + raise GenericError("Downloaded rpm %s is corrupted:\n%s" % (url, str(ex))) + def openRemoteFile(relpath, topurl=None, topdir=None, tempdir=None): """Open a file on the main server (read-only) @@ -1771,23 +1801,7 @@ def openRemoteFile(relpath, topurl=None, topdir=None, tempdir=None): if topurl: url = "%s/%s" % (topurl, relpath) fo = tempfile.TemporaryFile(dir=tempdir) - resp = request_with_retry().get(url, stream=True) - try: - for chunk in resp.iter_content(chunk_size=8192): - fo.write(chunk) - finally: - resp.close() - if resp.headers.get('Content-Length') and fo.tell() != int(resp.headers['Content-Length']): - raise GenericError("Downloaded file %s doesn't match expected size (%s vs %s)" % - (url, fo.tell(), resp.headers['Content-Length'])) - fo.seek(0) - if relpath.endswith('.rpm'): - # if it is an rpm run basic checks (assume that anything ending with the suffix, - # but not being rpm is suspicious anyway) - try: - check_rpm_file(fo) - except Exception as ex: - raise GenericError("Downloaded rpm %s is corrupted:\n%s" % (url, str(ex))) + downloadFile(url, path=relpath, fo=fo) elif topdir: fn = "%s/%s" % (topdir, relpath) fo = open(fn) diff --git a/koji/tasks.py b/koji/tasks.py index 33583a6..b8ed344 100644 --- a/koji/tasks.py +++ b/koji/tasks.py @@ -495,15 +495,9 @@ class BaseTaskHandler(object): return fn self.logger.debug("Downloading %s", relpath) url = "%s/%s" % (self.options.topurl, relpath) - resp = requests.get(url, stream=True) - try: - if not os.path.exists(os.path.dirname(fn)): - os.makedirs(os.path.dirname(fn)) - with open(fn, 'wb') as fdst: - for chunk in resp.iter_content(chunk_size=8192): - fdst.write(chunk) - finally: - resp.close() + if not os.path.exists(os.path.dirname(fn)): + os.makedirs(os.path.dirname(fn)) + koji.downloadFile(url, path=fn) else: fn = "%s/%s" % (self.options.topdir, relpath) return fn From 4c2bf80c0de3fd0f3fe441e604089dfb122b58bb Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Jun 02 2020 08:43:22 +0000 Subject: [PATCH 4/5] fix checking signed files Rpms which are signed with unknown key would raise an error. Ignore checking of signing key validity. --- diff --git a/koji/__init__.py b/koji/__init__.py index 0ded965..970fc16 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -1773,7 +1773,7 @@ def downloadFile(url, path=None, fo=None): """ if not fo: - fo = open(path, "wb") + fo = open(path, "w+b") resp = request_with_retry().get(url, stream=True) try: @@ -1829,13 +1829,15 @@ def _check_rpm_file(fo): """Check that the open file appears to be an rpm""" # TODO: trap exception and raise something with more infomation ts = rpm.TransactionSet() + # for basic validity we can ignore sigs as there needn't be public keys installed + ts.setVSFlags(rpm._RPMVSF_NOSIGNATURES) try: hdr = ts.hdrFromFdno(fo.fileno()) except rpm.error as ex: raise GenericError("rpm's header can't be extracted: %s (rpm error: %s)" % (fo.name, ', '.join(ex.args))) try: - rpm.TransactionSet().hdrCheck(hdr.unload()) + ts.hdrCheck(hdr.unload()) except rpm.error as ex: raise GenericError("rpm's header can't be checked: %s (rpm error: %s)" % (fo.name, ', '.join(ex.args))) From ea0750a89ff06207be80cb91df6a80d663c0b622 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Jun 05 2020 08:53:30 +0000 Subject: [PATCH 5/5] fix flake8 issues --- diff --git a/koji/__init__.py b/koji/__init__.py index 970fc16..39d01a5 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -1793,6 +1793,7 @@ def downloadFile(url, path=None, fo=None): except Exception as ex: raise GenericError("Downloaded rpm %s is corrupted:\n%s" % (url, str(ex))) + def openRemoteFile(relpath, topurl=None, topdir=None, tempdir=None): """Open a file on the main server (read-only) diff --git a/koji/tasks.py b/koji/tasks.py index b8ed344..678b513 100644 --- a/koji/tasks.py +++ b/koji/tasks.py @@ -28,7 +28,6 @@ import random import signal import time -import requests import six.moves.xmlrpc_client from six.moves import range