From 51cc2ec85b880ca3bb8cb3be29429e089557b4f0 Mon Sep 17 00:00:00 2001 From: Jan Kaluza Date: Nov 22 2018 07:35:51 +0000 Subject: Fetch just container.yaml and content_sets.yml instead of cloning whole dist-git repo. Currently, Freshmaker is cloning the git repository of every image it is going to rebuild to find out the content of container.yaml and content_sets.yml to decide whether it needs to generate Pulp composes itself or if the OSBS can do it iself. Cloning whole repository is slow and downloads unnecessary files. In this commit, new `get_distgit_files` method is introduce which uses "git archive" instead of "git clone" to get only particular files which are needed by Freshmaker from the dist-git repository. This is much faster and also does not consume any extra resources. --- diff --git a/freshmaker/lightblue.py b/freshmaker/lightblue.py index 5977596..dfdc5e3 100644 --- a/freshmaker/lightblue.py +++ b/freshmaker/lightblue.py @@ -37,7 +37,7 @@ from six.moves.urllib.parse import urlparse import concurrent.futures from freshmaker import log, conf from freshmaker.kojiservice import koji_service -from freshmaker.utils import sorted_by_nvr, clone_distgit_repo, temp_dir +from freshmaker.utils import sorted_by_nvr, get_distgit_files import koji @@ -360,50 +360,48 @@ class ContainerImage(dict): namespace = "rpms" name = repository - prefix = "freshmaker-%s-%s-%s" % (namespace, name, commit) - with temp_dir(prefix=prefix) as repodir: - try: - clone_distgit_repo(namespace, name, repodir, commit=commit, - ssh=False, logger=log) - except OSError as e: - self.log_error("Error while cloning dist-git repo: %s" % e) - return data - - content_sets_path = os.path.join(repodir, "content_sets.yml") - if not os.path.exists(content_sets_path): - log.debug("%s: Should generate Pulp repo, %s does not exist.", - nvr, content_sets_path) - data["generate_pulp_repos"] = True - return data + try: + files = get_distgit_files( + namespace, name, commit, ["content_sets.yml", "container.yaml"], + ssh=False, logger=log) + except OSError as e: + self.log_error("Error while fetching dist-git repo files: %s" % e) + return data - try: - with open(content_sets_path, 'r') as f: - content_sets_yaml = yaml.load(f) - except Exception as err: - log.exception(err) - data["generate_pulp_repos"] = True - return data - - for content_sets in content_sets_yaml.values(): - data["content_sets"] += content_sets - - container_path = os.path.join(repodir, "container.yaml") - if not os.path.exists(container_path): - log.debug("%s: Should generate Pulp repo, %s does not exist.", - nvr, container_path) - data["generate_pulp_repos"] = True - return data - - with open(container_path, 'r') as f: - container_yaml = yaml.load(f) - - if ("compose" not in container_yaml or - "pulp_repos" not in container_yaml["compose"] or - not container_yaml["compose"]["pulp_repos"]): - log.debug("%s: Should generate Pulp repo, pulp_repos not " - "enabled in %s.", nvr, container_path) - data["generate_pulp_repos"] = True - return data + content_sets_data = files["content_sets.yml"] + container_data = files["container.yaml"] + + if content_sets_data is None: + log.debug("%s: Should generate Pulp repo, content_sets.yml does " + "not exist.", nvr) + data["generate_pulp_repos"] = True + return data + + try: + content_sets_yaml = yaml.load(content_sets_data) + except Exception as err: + log.exception(err) + data["generate_pulp_repos"] = True + return data + + for content_sets in content_sets_yaml.values(): + data["content_sets"] += content_sets + + if container_data is None: + log.debug("%s: Should generate Pulp repo, container.yaml does not " + "exist.", nvr) + data["generate_pulp_repos"] = True + return data + + container_yaml = yaml.load(container_data) + + if ("compose" not in container_yaml or + "pulp_repos" not in container_yaml["compose"] or + not container_yaml["compose"]["pulp_repos"]): + log.debug("%s: Should generate Pulp repo, pulp_repos not " + "enabled in containery.yaml.", nvr) + data["generate_pulp_repos"] = True + return data # This is workaround until OSBS-5919 is fixed. if "arches" in self and self["arches"] == "x86_64": diff --git a/freshmaker/utils.py b/freshmaker/utils.py index a1218ce..e6e15ed 100644 --- a/freshmaker/utils.py +++ b/freshmaker/utils.py @@ -32,6 +32,8 @@ import tempfile import time import koji import kobo.rpmlib +import tarfile +import io from freshmaker import conf, app, log from freshmaker.types import ArtifactType @@ -206,9 +208,19 @@ def clone_repo(url, dest, branch='master', logger=None, commit=None): return dest -def clone_distgit_repo(namespace, name, dest, branch='master', ssh=True, - user=None, logger=None, commit=None): - """clone a git repo""" +def get_distgit_url(namespace, name, ssh, user): + """ + Returns the dist-git repository URL. + + :param str namespace: Namespace in which the repository is located, for + example "rpms", "containers", "modules", ... + :param str name: Name of the repository inside the namespace. + :param bool ssh: If True, SSH auth will be used when fetching the files. + :param str user: If set, overrides the default user for SSH auth. + + :rtype: str + :return: The dist-git repository URL. + """ if ssh: if user is None: if hasattr(conf, 'git_user'): @@ -220,10 +232,66 @@ def clone_distgit_repo(namespace, name, dest, branch='master', ssh=True, repo_url = conf.git_base_url repo_url = os.path.join(repo_url, namespace, name) + return repo_url + + +def clone_distgit_repo(namespace, name, dest, branch='master', ssh=True, + user=None, logger=None, commit=None): + """clone a git repo""" + repo_url = get_distgit_url(namespace, name, ssh, user) return clone_repo(repo_url, dest, branch=branch, logger=logger, commit=commit) +def get_distgit_files( + namespace, name, commit_or_branch, files, ssh=True, user=None, + logger=None): + """ + Fetches the `files` from dist-git repository defined by `namespace`, + `name` and `commit_or_branch` and returns them. + + This is much faster than cloning the dist-git repository and should be + preferred method to get the files from dist-git in case the full clone + of repository is not needed. + + :param str namespace: Namespace in which the repository is located, for + example "rpms", "containers", "modules", ... + :param str name: Name of the repository inside the namespace. + :param str commit_or_branch: Commit hash or branch name. + :param list files: List of strings defining the files to fetch. + :param bool ssh: If True, SSH auth will be used when fetching the files. + :param str user: If set, overrides the default user for SSH auth. + :param freshmaker.log logger: Logger instance. + + :rtype: dict + :return: Dictionary with file name as key and file content as value. + If the file does not exist in a dist-git repo, None is used as value. + """ + repo_url = get_distgit_url(namespace, name, ssh, user) + + # Use the "git archive" to get the files in tarball and then extract + # them and return in dict. We need to go file by file, because the + # "git archive" would fail completely in case any file does not exist + # in the git repo. + ret = {} + for f in files: + try: + cmd = ['git', 'archive', '--remote=%s' % repo_url, + commit_or_branch, f] + tar_data = _run_command(cmd, logger=logger, return_output=True) + tar_bytes = io.BytesIO(tar_data) + tar = tarfile.open(fileobj=tar_bytes) + for member in tar.getmembers(): + ret[member.name] = tar.extractfile(member).read() + except OSError as e: + if "path not found" in str(e): + ret[os.path.basename(f)] = None + else: + raise + + return ret + + def add_empty_commit(repo, msg="bump", author=None, logger=None): """Commit an empty commit to repo""" if author is None: diff --git a/tests/test_lightblue.py b/tests/test_lightblue.py index e5b25c1..ff37bbe 100644 --- a/tests/test_lightblue.py +++ b/tests/test_lightblue.py @@ -23,7 +23,7 @@ import json import six -from mock import call, patch, Mock, mock_open +from mock import call, patch, Mock from six.moves import http_client import freshmaker @@ -147,32 +147,33 @@ class TestGetAdditionalDataFromDistGit(helpers.FreshmakerTestCase): self.patcher = helpers.Patcher( "freshmaker.lightblue.") - self.clone_distgit_repo = self.patcher.patch("clone_distgit_repo") - self.path_exists = self.patcher.patch("os.path.exists") - self.patched_open = self.patcher.patch("open", create=True) + self.get_distgit_files = self.patcher.patch("get_distgit_files") + self.get_distgit_files.return_value = { + "content_sets.yml": None, + "container.yaml": None, + } def tearDown(self): super(TestGetAdditionalDataFromDistGit, self).tearDown() self.patcher.unpatch_all() def test_generate(self): - self.path_exists.return_value = True - self.patched_open.side_effect = [ - mock_open(read_data="x86_64:\n - content_set").return_value, - mock_open(read_data="compose:\n pulp_repos: True").return_value] + self.get_distgit_files.return_value = { + "content_sets.yml": "x86_64:\n - content_set", + "container.yaml": "compose:\n pulp_repos: True", + } image = ContainerImage.create({"brew": {"build": "nvr"}}) ret = image._get_additional_data_from_distgit( "rpms/foo-docker", "branch", "commit") self.assertEqual(ret["generate_pulp_repos"], False) - self.clone_distgit_repo.assert_called_once_with( - 'rpms', 'foo-docker', - helpers.AnyStringWith('freshmaker-rpms-foo-docker'), - commit='commit', logger=log, ssh=False) + self.get_distgit_files.assert_called_once_with( + 'rpms', 'foo-docker', "commit", + ["content_sets.yml", "container.yaml"], logger=log, ssh=False) def test_generate_os_error(self): - self.clone_distgit_repo.side_effect = OSError( + self.get_distgit_files.side_effect = OSError( "Got an error (128) from git: fatal: reference is not a tree: " "4d42e2009cec70d871c65de821396cd750d523f1") @@ -183,35 +184,33 @@ class TestGetAdditionalDataFromDistGit(helpers.FreshmakerTestCase): self.assertEqual( image["error"], - "Error while cloning dist-git repo: Got an error (128) from git: " + "Error while fetching dist-git repo files: Got an error (128) from git: " "fatal: reference is not a tree: 4d42e2009cec70d871c65de821396cd750d523f1") - self.clone_distgit_repo.assert_called_once_with( - 'rpms', 'foo-docker', - helpers.AnyStringWith('freshmaker-rpms-foo-docker'), - commit='commit', logger=log, ssh=False) + self.get_distgit_files.assert_called_once_with( + 'rpms', 'foo-docker', 'commit', + ["content_sets.yml", "container.yaml"], logger=log, ssh=False) def test_generate_no_namespace(self): - self.path_exists.return_value = True - self.patched_open.side_effect = [ - mock_open(read_data="x86_64:\n - content_set").return_value, - mock_open(read_data="compose:\n pulp_repos: True").return_value] + self.get_distgit_files.return_value = { + "content_sets.yml": "x86_64:\n - content_set", + "container.yaml": "compose:\n pulp_repos: True", + } image = ContainerImage.create({"brew": {"build": "nvr"}}) ret = image._get_additional_data_from_distgit( "foo-docker", "branch", "commit") self.assertEqual(ret["generate_pulp_repos"], False) - self.clone_distgit_repo.assert_called_once_with( - 'rpms', 'foo-docker', - helpers.AnyStringWith('freshmaker-rpms-foo-docker'), - commit='commit', logger=log, ssh=False) + self.get_distgit_files.assert_called_once_with( + 'rpms', 'foo-docker', "commit", + ["content_sets.yml", "container.yaml"], logger=log, ssh=False) def test_generate_no_pulp_repos(self): - self.path_exists.return_value = True - self.patched_open.side_effect = [ - mock_open(read_data="x86_64:\n - content_set").return_value, - mock_open(read_data="compose:\n pulp_repos_x: True").return_value] + self.get_distgit_files.return_value = { + "content_sets.yml": "x86_64:\n - content_set", + "container.yaml": "compose:\n pulp_repos_x: True", + } image = ContainerImage.create({"brew": {"build": "nvr"}}) ret = image._get_additional_data_from_distgit( @@ -219,10 +218,10 @@ class TestGetAdditionalDataFromDistGit(helpers.FreshmakerTestCase): self.assertEqual(ret["generate_pulp_repos"], True) def test_generate_pulp_repos_false(self): - self.path_exists.return_value = True - self.patched_open.side_effect = [ - mock_open(read_data="x86_64:\n - content_set").return_value, - mock_open(read_data="compose:\n pulp_repos: False").return_value] + self.get_distgit_files.return_value = { + "content_sets.yml": "x86_64:\n - content_set", + "container.yaml": "compose:\n pulp_repos: False", + } image = ContainerImage.create({"brew": {"build": "nvr"}}) ret = image._get_additional_data_from_distgit( @@ -230,9 +229,10 @@ class TestGetAdditionalDataFromDistGit(helpers.FreshmakerTestCase): self.assertEqual(ret["generate_pulp_repos"], True) def test_generate_no_content_sets_yml(self): - def mocked_path_exists(path): - return not path.endswith("content_sets.yml") - self.path_exists.side_effect = mocked_path_exists + self.get_distgit_files.return_value = { + "content_sets.yml": None, + "container.yaml": "compose:\n pulp_repos: False", + } image = ContainerImage.create({"brew": {"build": "nvr"}}) ret = image._get_additional_data_from_distgit( @@ -240,12 +240,10 @@ class TestGetAdditionalDataFromDistGit(helpers.FreshmakerTestCase): self.assertEqual(ret["generate_pulp_repos"], True) def test_generate_no_container_yaml(self): - def mocked_path_exists(path): - return not path.endswith("container.yaml") - self.path_exists.side_effect = mocked_path_exists - self.patched_open.side_effect = [ - mock_open(read_data="x86_64:\n - content_set").return_value, - mock_open(read_data="compose:\n pulp_repos: False").return_value] + self.get_distgit_files.return_value = { + "content_sets.yml": "x86_64:\n - content_set", + "container.yaml": None, + } image = ContainerImage.create({"brew": {"build": "nvr"}}) ret = image._get_additional_data_from_distgit( @@ -260,7 +258,11 @@ class TestContainerImageObject(helpers.FreshmakerTestCase): self.patcher = helpers.Patcher( 'freshmaker.lightblue.') - self.clone_distgit_repo = self.patcher.patch("clone_distgit_repo") + self.get_distgit_files = self.patcher.patch("get_distgit_files") + self.get_distgit_files.return_value = { + "content_sets.yml": None, + "container.yaml": None, + } self.dummy_image = ContainerImage.create({ '_id': '1233829', @@ -516,7 +518,11 @@ class TestQueryEntityFromLightBlue(helpers.FreshmakerTestCase): self.patcher = helpers.Patcher( 'freshmaker.lightblue.') - self.clone_distgit_repo = self.patcher.patch("clone_distgit_repo") + self.get_distgit_files = self.patcher.patch("get_distgit_files") + self.get_distgit_files.return_value = { + "content_sets.yml": None, + "container.yaml": None, + } self.fake_server_url = 'lightblue.localhost' self.fake_cert_file = 'path/to/cert'