#70 [frontend][dist-git] import task only once
Merged 6 years ago by clime. Opened 6 years ago by praiskup.

@@ -3,132 +3,132 @@

      "_description": "url-source test",

      "_expected_outcome": "success",

  

-     "branch": "f23",

+     "branches": ["f23"],

      "project": "example",

      "source_json": "{\"url\": \"http://localhost:5000/static/example-1.0.5-1.fc23.src.rpm\"}",

      "source_type": 1,

-     "task_id": "1-f23",

+     "task_id": "1",

      "user": "clime"

  },

  {

      "_description": "url-source test for a different branch",

      "_expected_outcome": "success",

  

-     "branch": "master",

+     "branches": ["master"],

      "project": "example",

      "source_json": "{\"url\": \"http://localhost:5000/static/example-1.0.5-1.fc23.src.rpm\"}",

      "source_type": 1,

-     "task_id": "2-master",

+     "task_id": "2",

      "user": "clime"

  },

  {

      "_description": "upload-source test",

      "_expected_outcome": "success",

  

-     "branch": "master",

+     "branches": ["master"],

      "project": "example",

      "source_json": "{\"tmp\": \"tmp2hagml\", \"pkg\": \"example-1.0.5-1.fc23.src.rpm\"}",

      "source_type": 2,

-     "task_id": "3-master",

+     "task_id": "3",

      "user": "clime"

  },

  {

      "_description": "tito-source test",

      "_expected_outcome": "success",

  

-     "branch": "master",

+     "branches": ["master"],

      "project": "example",

      "source_json": "{\"tito_test\": true, \"git_dir\": \"\", \"git_url\": \"git@localhost:example.git\", \"git_branch\": \"\"}",

      "source_type": 3,

-     "task_id": "4-master",

+     "task_id": "4",

      "user": "clime"

  },

  {

      "_description": "MockSCM-source test",

      "_expected_outcome": "success",

  

-     "branch": "master",

+     "branches": ["master"],

      "project": "example",

      "source_json": "{\"scm_branch\": \"\", \"scm_url\": \"https://github.com/clime/example.git\", \"scm_type\": \"git\", \"spec\": \"example.spec\"}",

      "source_type": 4,

-     "task_id": "5-master",

+     "task_id": "5",

      "user": "clime"

  },

  {

      "_description": "PyPI-source test",

      "_expected_outcome": "success",

  

-     "branch": "master",

+     "branches": ["master"],

      "project": "example",

      "source_json": "{\"pypi_package_version\": \"\", \"pypi_package_name\": \"motionpaint\", \"python_versions\": []}",

      "source_type": 5,

-     "task_id": "6-master",

+     "task_id": "6",

      "user": "clime"

  },

  {

      "_description": "MockSCM-source test for a spec file inside a subdirectory",

      "_expected_outcome": "success",

  

-     "branch": "f23",

+     "branches": ["f23"],

      "project": "example",

      "source_json": "{\"scm_branch\": \"\", \"scm_url\": \"https://github.com/clime/example2.git\", \"scm_type\": \"git\", \"spec\": \"subpkg/example.spec\"}",

      "source_type": 4,

-     "task_id": "7-f23",

+     "task_id": "7",

      "user": "clime"

  },

  {

      "_description": "tito-source test for a git repo subdirectory",

      "_expected_outcome": "success",

  

-     "branch": "master",

+     "branches": ["master"],

      "project": "example",

      "source_json": "{\"tito_test\": true, \"git_dir\": \"subpkg\", \"git_url\": \"git@localhost:example2.git\", \"git_branch\": \"\"}",

      "source_type": 3,

-     "task_id": "8-master",

+     "task_id": "8",

      "user": "clime"

  },

  {

      "_description": "tito-source test for a different-than-master branch",

      "_expected_outcome": "success",

  

-     "branch": "master",

+     "branches": ["master"],

      "project": "example",

      "source_json": "{\"tito_test\": true, \"git_dir\": \"subpkg\", \"git_url\": \"git@localhost:example2.git\", \"git_branch\": \"somebranch\"}",

      "source_type": 3,

-     "task_id": "9-master",

+     "task_id": "9",

      "user": "clime"

  },

  {

      "_description": "RubyGem import test",

      "_expected_outcome": "success",

  

-     "branch": "master",

+     "branches": ["master"],

      "project": "example",

      "source_json": "{\"gem_name\": \"A_123\"}",

      "source_type": 6,

-     "task_id": "10-master",

+     "task_id": "10",

      "user": "clime"

  },

  {

      "_description": "Dist-git import test",

      "_expected_outcome": "success",

  

-     "branch": "master",

+     "branches": ["master"],

      "project": "example",

      "source_json": "{\"clone_url\": \"https://src.fedoraproject.org/git/rpms/389-admin-console.git\", \"branch\": \"f25\" }",

      "source_type": 7,

-     "task_id": "11-master",

+     "task_id": "11",

      "user": "clime"

  },

  {

      "_description": "Mageia macros tests",

      "_expected_outcome": "success",

  

-     "branch": "mga6",

+     "branches": ["mga6"],

      "project": "example",

      "source_json": "{\"url\": \"http://localhost:5000/static/packagekit-1.1.3-2.mga6.src.rpm\"}",

      "source_type": 1,

-     "task_id": "12-mga6",

+     "task_id": "12",

      "user": "clime"

  }

  ]

@@ -23,8 +23,7 @@

          outsize=`jq '. | length' $OUT`

          for (( i = 0; i < $outsize; i++ )); do

              mkdir $MYTMPDIR && cd $MYTMPDIR

-             read repo_name git_hash pkg_name task_id <<< `jq ".[$i] | .repo_name, .git_hash, .pkg_name, .task_id" $OUT`

-             branch=`echo ${task_id//\"} | cut -d- -f2`

+             read branch repo_name git_hash pkg_name task_id <<< `jq ".[$i] | .branch, .repo_name, .git_hash, .pkg_name, .task_id" $OUT`

              if [[ git_hash != null ]]; then

                  rlLog "-------------- TASK: ${task_id//\"} --------------"

                  rlRun "git clone http://localhost/cgit/${repo_name}.git" 0

@@ -45,7 +45,7 @@

          self.task_id = None

          self.user = None

          self.project = None

-         self.branch = None

+         self.branches = []

  

          self.source_type = None

          self.source_json = None
@@ -53,7 +53,7 @@

  

          self.package_name = None

          self.package_version = None

-         self.git_hash = None

+         self.git_hashes = {}

  

          # For SRPM_LINK and SRPM_UPLOAD

          self.package_url = None
@@ -100,7 +100,7 @@

          task.user = dict_data["user"]

          task.project = dict_data["project"]

  

-         task.branch = dict_data["branch"]

+         task.branches = dict_data["branches"]

          task.source_type = dict_data["source_type"]

          task.source_json = dict_data["source_json"]

          task.source_data = json.loads(dict_data["source_json"])
@@ -142,13 +142,21 @@

  

          return task

  

-     def get_dict_for_frontend(self):

+     def get_dict_for_frontend(self, branch):

+         if not branch in self.git_hashes or not self.git_hashes[branch]:

+             return {

+                 "task_id": self.task_id,

+                 "error": "Could not import this branch.",

+                 "branch": branch,

+             }

+ 

          return {

              "task_id": self.task_id,

              "pkg_name": self.package_name,

              "pkg_version": self.package_version,

              "repo_name": self.reponame,

-             "git_hash": self.git_hash

+             "git_hash": self.git_hashes[branch],

+             "branch": branch,

          }

  

  
@@ -618,15 +626,17 @@

                  log.info("Package already exists...continuing")

              else:

                  raise e

-         try:

-             cmd = ["/usr/share/dist-git/mkbranch", task.branch, task.reponame]

-             check_output(cmd, stderr=subprocess.STDOUT)

-         except CalledProcessError as e:

-             log.error("cmd: {}, rc: {}, msg: {}".format(cmd, e.returncode, e.output))

-             if e.returncode == 128:

-                 log.info("Branch already exists...continuing")

-             else:

-                 raise e

+ 

+         for branch in task.branches:

+             try:

+                 cmd = ["/usr/share/dist-git/mkbranch", branch, task.reponame]

+                 check_output(cmd, stderr=subprocess.STDOUT)

+             except CalledProcessError as e:

+                 log.error("cmd: {}, rc: {}, msg: {}".format(cmd, e.returncode, e.output))

+                 if e.returncode == 128:

+                     log.info("Branch already exists...continuing")

+                 else:

+                     raise e

  

      def post_back(self, data_dict):

          """
@@ -661,22 +671,18 @@

                  provider = MockScmProvider(task, fetched_srpm_path, self.opts)

                  tarball_path, spec_path, task.package_name, task.package_version = provider.get_sources()

                  self.before_git_import(task)

-                 task.git_hash = self.distgit_import(task, tarball_path, spec_path)

+                 task.git_hashes = self.distgit_import(task, tarball_path, spec_path)

                  self.after_git_import()

              else:

                  provider = SourceProvider(task, fetched_srpm_path, self.opts)

                  provider.get_srpm()

                  task.package_name, task.package_version = self.pkg_name_evr(fetched_srpm_path)

                  self.before_git_import(task)

-                 task.git_hash = self.git_import_srpm(task, fetched_srpm_path)

+                 task.git_hashes = self.git_import_srpm(task, fetched_srpm_path)

                  self.after_git_import()

  

-             log.debug("sending a response - success")

-             self.post_back(task.get_dict_for_frontend())

- 

          except CoprDistGitException as e:

              log.exception("Exception raised during srpm import:")

-             self.post_back_safe({"task_id": task.task_id, "error": e.strtype})

          except Exception as e:

              log.exception("Unexpected error")

              self.post_back_safe({"task_id": task.task_id, "error": "unknown"})
@@ -684,7 +690,12 @@

          finally:

              provider.cleanup()

              shutil.rmtree(tmp_root, ignore_errors=True)

-             self.teardown_per_task_logging(per_task_log_handler)

+ 

+         log.info("sending a responses for branches {0}".format(', '.join(task.branches)))

+         for branch in task.branches:

+             self.post_back_safe(task.get_dict_for_frontend(branch))

+         self.teardown_per_task_logging(per_task_log_handler)

+ 

  

      def setup_per_task_logging(self, task):

          # Avoid putting logs into subdirectories when dist git branch name

@@ -133,4 +133,8 @@

              cp, "dist-git", "mock_scm_chroot", "fedora-rawhide-x86_64"

          )

  

+         opts.git_base_url = _get_conf(

+             cp, "dist-git", "git_base_url", "/var/lib/dist-git/git/%(module)s"

+         )

+ 

          return opts

file modified
+138 -68
@@ -7,11 +7,11 @@

  import traceback

  import types

  import grp

+ import subprocess

  

  # pyrpkg uses os.getlogin(). It requires tty which is unavailable when we run this script as a daemon

  # very dirty solution for now

  import pwd

- from dist_git.exceptions import PackageImportException

  

  os.getlogin = lambda: pwd.getpwuid(os.getuid())[0]

  # monkey patch end
@@ -21,7 +21,6 @@

  

  log = logging.getLogger(__name__)

  

- 

  def my_upload_fabric(opts):

      def my_upload(repo_dir, reponame, abs_filename, filehash):

          """
@@ -63,7 +62,6 @@

      :param result: shared dict from Manager().dict()

      """

  

-     git_base_url = "/var/lib/dist-git/git/%(module)s"

      repo_dir = os.path.join(tmp_dir, task.package_name)

      log.debug("repo_dir: {}".format(repo_dir))

      # use rpkg for importing the source rpm
@@ -71,7 +69,7 @@

                          lookaside="",

                          lookasidehash="md5",

                          lookaside_cgi="",

-                         gitbaseurl=git_base_url,

+                         gitbaseurl=opts.git_base_url,

                          anongiturl="",

                          branchre="",

                          kojiconfig="",
@@ -86,35 +84,90 @@

      # I also add one parameter "repo_dir" to that function with this hack

      commands.lookasidecache.upload = types.MethodType(my_upload_fabric(opts), repo_dir)

      try:

-         log.info("clone the pkg repository into tmp directory")

-         commands.clone(module, tmp_dir, task.branch)

-         log.info("import the source rpm into git and save filenames of sources")

-         upload_files = commands.import_srpm(src_filepath)

+         log.info("clone the '{0}' pkg repository into tmp directory".format(module))

+         commands.clone(module, tmp_dir)

      except:

-         log.exception("Failed to import the source rpm: {}".format(src_filepath))

+         log.exception("Failed to clone the repo.")

          return

  

-     log.info("save the source files into lookaside cache")

-     oldpath = os.getcwd()

-     os.chdir(repo_dir) # we need to be in repo_dir for the following to work

-     try:

-         commands.upload(upload_files, replace=True)

-     except:

-         log.exception("Error during source uploading")

-         return

+     message = "import_srpm"

+     committed = set()

+     for branch in task.branches:

+         result[branch] = False # failure by default

+         log.info("checkout '{0}' branch".format(branch))

+         commands.switch_branch(branch)

+ 

+         oldpath = os.getcwd()

+         try:

+             os.chdir(repo_dir) # we need to be in repo_dir for the following to work

+             if not committed:

+                 log.info("save the files from {0} into lookaside cache".format(src_filepath))

+                 upload_files = commands.import_srpm(src_filepath)

+                 commands.upload(upload_files, replace=True)

+                 try:

+                     commands.commit(message)

+                 except Exception:

+                     log.exception("error during commit (probably nothing to commit), ignored")

+             else:

+                 sync_actual_branch(committed, branch, message)

+ 

+             # Mark the branch as committed.

+             committed.add(branch)

+         except:

+             log.exception("Error during upload, commit or merge.")

+             continue

+         finally:

+             os.chdir(oldpath)

+ 

+         log.debug("git push")

+         message = "import_srpm"

+         try:

+             commands.push()

+             log.debug("commit and push done")

+         except rpkgError:

+             log.exception("error during push, ignored")

+ 

+         try:

+             # Ensure that nothing was lost.

+             commands.check_repo(all_pushed=True)

+             # 'commands.commithash' is "cached" property, we need to re-fresh

+             # the actual commit id..

+             commands.load_commit()

+             result[branch] = commands.commithash

+         except:

+             pass

+ 

+ 

+ def sync_actual_branch(committed_branches, actual_branch, message):

+     """

+     Reset the 'actual_branch' contents to contents of all branches in

+     already 'committed_branches' (param is set of strings).  But if possible,

+     try to fast-forward merge only to minimize the git payload and to keep the

+     git history as flatten as possible across all branches.

+     Before calling this method, ensure that you are in the git directory and the

+     'actual_branch' is checked out.

+     """

+     for branch in committed_branches:

+         # Try to fast-forward merge against any other already pushed branch.

+         # Note that if the branch is already there then merge request is no-op.

+         if not subprocess.call(['git', 'merge', branch, '--ff-only']):

+             log.debug("merged '{0}' fast forward into '{1}' or noop".format(branch, actual_branch))

+             return

  

-     os.chdir(oldpath)

+     # No --fast-forward merge possible -> reset to the first available one.

+     branch = next(iter(committed_branches))

+     log.debug("resetting branch '{0}' to contents of '{1}'".format(actual_branch, branch))

+     subprocess.check_call(['git', 'read-tree', '-m', '-u', branch])

  

-     log.debug("git push")

-     message = "import_srpm"

-     try:

-         commands.commit(message)

-         commands.push()

-         log.debug("commit and push done")

-     except rpkgError:

-         log.exception("error during commit and push, ignored")

+     # Get the AuthorDate from the original commit, to have consistent feeling.

+     date = subprocess.check_output(['git', 'show', branch, '-q', '--format=%ai'])

  

-     result["hash"] = commands.commithash

+     if subprocess.call(['git', 'diff', '--cached', '--exit-code']):

+         # There's something to commit.

+         subprocess.check_call(['git', 'commit', '--no-verify', '-m', message,

+             '--date', date])

+     else:

+         log.debug("nothing to commit into branch '{0}'".format(actual_branch))

  

  

  def do_git_srpm_import(opts, src_filepath, task, tmp_dir):
@@ -127,14 +180,9 @@

                     args=(opts, src_filepath, task, tmp_dir, result_dict))

      proc.start()

      proc.join()

-     if result_dict.get("hash") is None:

-         raise PackageImportException("Failed to import the source rpm: {}".format(src_filepath))

- 

-     return str(result_dict["hash"])

+     return result_dict

  

  def do_distgit_import(opts, tarball_path, spec_path, task, tmp_dir):

-     git_base_url = "/var/lib/dist-git/git/%(module)s"

- 

      repo_dir = os.path.join(tmp_dir, task.package_name)

      log.debug("repo_dir: {}".format(repo_dir))

  
@@ -143,7 +191,7 @@

                          lookaside="",

                          lookasidehash="md5",

                          lookaside_cgi="",

-                         gitbaseurl=git_base_url,

+                         gitbaseurl=opts.git_base_url,

                          anongiturl="",

                          branchre="",

                          kojiconfig="",
@@ -160,41 +208,63 @@

      # I also add one parameter "repo_dir" to that function with this hack

      commands.lookasidecache.upload = types.MethodType(my_upload_fabric(opts), repo_dir)

  

+     result = {}

+ 

      try:

          log.info("clone the pkg repository into tmp directory")

-         commands.clone(module, tmp_dir, task.branch)

- 

-         shutil.copy(spec_path, repo_dir)

- 

-         for f in ('.gitignore', 'sources'):

-             if not os.path.exists(repo_dir+"/"+f):

-                 open(repo_dir+"/"+f, 'w').close()

- 

-         commands.repo.index.add(('.gitignore', 'sources', os.path.basename(spec_path)))

+         commands.clone(module, tmp_dir)

      except:

          log.exception("Failed to clone the Git repository and add files.")

-         return

- 

-     oldpath = os.getcwd()

-     log.info(repo_dir)

- 

-     os.chdir(repo_dir) # we need to be in repo_dir for the following to work

-     try:

-         log.info("save the source files into lookaside cache")

-         commands.upload([tarball_path], replace=True)

-     except:

-         log.exception("Error during source uploading")

-         return

- 

-     os.chdir(oldpath)

- 

-     log.debug("git push")

-     message = "import_srpm"

-     try:

-         commands.commit(message)

-         commands.push()

-         log.debug("commit and push done")

-     except rpkgError:

-         log.exception("error during commit and push, ignored")

- 

-     return commands.commithash

+         return result

+ 

+     message = "import_mock_scm"

+     committed = set()

+     for branch in task.branches:

+         result[branch] = False # failure by default

+         log.info("checkout '{0}' branch".format(branch))

+         commands.switch_branch(branch)

+ 

+         if not committed:

+             shutil.copy(spec_path, repo_dir)

+             for f in ('.gitignore', 'sources'):

+                 if not os.path.exists(repo_dir+"/"+f):

+                     open(repo_dir+"/"+f, 'w').close()

+             commands.repo.index.add(('.gitignore', 'sources', os.path.basename(spec_path)))

+ 

+         oldpath = os.getcwd()

+         try:

+             os.chdir(repo_dir) # we need to be in repo_dir for the following to work

+             log.info("Working in: {0}".format(repo_dir))

+ 

+             if not committed:

+                 log.info("save the source files into lookaside cache")

+                 commands.upload([tarball_path], replace=True)

+                 try:

+                     commands.commit(message)

+                 except Exception:

+                     # Probably nothing to be committed.  We historically ignore isues here.

+                     log.exception("error during commit (probably nothing to commit), ignored")

+             else:

+                 sync_actual_branch(committed, branch, message)

+ 

+             # Mark the branch as committed.

+             committed.add(branch)

+ 

+         except:

+             log.exception("Error during source uploading, merge, or commit")

+             continue

+         finally:

+             os.chdir(oldpath)

+ 

+ 

+         log.debug("git push")

+         try:

+             commands.push()

+             log.debug("commit and push done")

+         except rpkgError:

+             log.exception("error during push, ignored")

+ 

+         commands.load_commit()

+         result[branch] = commands.commithash

+ 

+     return result

file modified
+3 -1
@@ -1,1 +1,3 @@

- PYTHONPATH=./src: python2 -B -m pytest --cov-report term-missing --cov ./dist_git tests $@

+ #! /bin/sh

+ 

+ PYTHONPATH=./src: python2 -B -m pytest --cov-report term-missing --cov ./dist_git tests "$@"

@@ -0,0 +1,1 @@

+ empty git repo

@@ -0,0 +1,54 @@

+ #! /bin/sh

+ 

+ : ${dummy_version="$(date +"%Y%m%d_%H%M")"}

+ 

+ spec=qiuck-package.spec

+ tarball=tarball.tar.gz

+ 

+ cat > "$spec" <<EOF

+ Name:		quick-package

+ Version:	$dummy_version

+ Release:	0%{?dist}

+ Summary:	dummy package

+ 

+ Group:		NONE

+ License:	GPL

+ URL:		http://example.com/

+ 

+ Source0:	$tarball

+ 

+ %{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}}

+ 

+ %description

+ 

+ 

+ %prep

+ 

+ 

+ %build

+ 

+ 

+ %install

+ rm -rf \$RPM_BUILD_ROOT

+ mkdir -p \$RPM_BUILD_ROOT/%{_pkgdocdir}

+ xz -d %{SOURCE0} --stdout > \$RPM_BUILD_ROOT/%{_pkgdocdir}/README

+ 

+ 

+ %files

+ %doc %{_pkgdocdir}/README

+ 

+ %changelog

+ * Thu Jun 05 2014 Pavel Raiskup <praiskup@redhat.com> - $dummy_version-1

+ - does nothing!

+ EOF

+ 

+ echo "nothing special here" > README

+ tar czf "$tarball" README

+ 

+ rpmbuild --define "_sourcedir $PWD" \

+     --define "_rpmdir $PWD" \

+     --define "_specdir $PWD" \

+     --define "_builddir $PWD" \

+     --define "_srcrpmdir $PWD" \

+     --define "dist %{nil}" \

+     -bs "$spec"

@@ -0,0 +1,294 @@

+ # coding: utf-8

+ 

+ import tempfile

+ import pytest

+ import shutil

+ import time

+ from bunch import Bunch

+ import os

+ from subprocess import call, check_output

+ import mock

+ 

+ from dist_git.srpm_import import actual_do_git_srpm_import, do_distgit_import

+ 

+ def scriptdir():

+     return os.path.dirname(os.path.realpath(__file__))

+ 

+ @pytest.yield_fixture

+ def mc_group():

+     with mock.patch("os.setgid") as handle:

+         yield handle

+ 

+ 

+ @pytest.yield_fixture(scope='module')

+ def tmpdir():

+     tdir = tempfile.mkdtemp()

+     print("working in " + tdir)

+     os.chdir(tdir)

+     yield tdir

+     shutil.rmtree(tdir)

+ 

+ 

+ @pytest.yield_fixture

+ def git_repos(tmpdir):

+     dirname = 'git_repos'

+     os.mkdir(dirname)

+     yield os.path.join(tmpdir, dirname)

+     shutil.rmtree('git_repos')

+ 

+ @pytest.yield_fixture

+ def origin(git_repos):

+     print("creating origin")

+     modulename = 'bob/blah/quick-package'

+     abspath = os.path.join(git_repos, modulename)

+     assert 0 == os.system(

+     """ set -e

+     mkdir -p {module}

+     git init --bare {module}

+     git clone {module} work

+     cd work

+     echo 'empty git repo' > README

+     git add README

+     git commit -m 'initial commit'

+     git push

+     """.format(module=abspath)

+     )

+     yield os.path.join(git_repos, modulename)

+     shutil.rmtree('work')

+ 

+ @pytest.yield_fixture

+ def branches(origin):

+ 

+     branches = ['f20', 'epel7', 'el6', 'fedora/26', 'rhel-7']

+     middle_branches = ['el6', 'fedora/26']

+     border_branches = ['f20', 'epel7', 'rhel-7']

+ 

+     assert 0 == os.system(

+     """ set -e

+     cd work

+     for branch in {branches}

+     do

+         git branch $branch

+         git checkout $branch

+         git push origin $branch

+     done

+     """.format(branches=' '.join(branches))

+     )

+ 

+     yield (origin, branches, middle_branches, border_branches)

+ 

+ @pytest.yield_fixture

+ def lookaside(tmpdir):

+     assert 0 == os.system(

+     """ set -e

+     mkdir lookaside

+     """)

+     yield True

+     shutil.rmtree('lookaside')

+ 

+ 

+ 

+ @pytest.yield_fixture

+ def opts_basic(tmpdir, lookaside):

+     class _:

+         pass

+     opts = _()

+     opts.lookaside_location = os.path.join(tmpdir, 'lookaside')

+     opts.git_base_url = os.path.join(tmpdir, 'git_repos/%(module)s')

+     yield opts

+ 

+ 

+ def trivial_spec_and_tarball(version):

+     if version in trivial_spec_and_tarball.map:

+         return trivial_spec_and_tarball.map[version]

+ 

+     where = os.path.join(os.getcwd(), 'raw_files', str(version))

+ 

+     assert 0 == os.system(""" set -e

+     mkdir -p {where}

+     cd {where}

+     export dummy_version={version}

+     {script_dir}/generate_qiuck_package --nosrpm

+     pwd

+     find

+     """.format(where=where, version=version, script_dir=scriptdir()))

+ 

+     result = (

+         os.path.join(where, './qiuck-package.spec'),

+         os.path.join(where, 'tarball.tar.gz'),

+         Bunch({

+             'package_name': 'quick-package',

+             'user': 'bob',

+             'project':'blah',

+         })

+     )

+     trivial_spec_and_tarball.map[version] = result

+     return result

+ 

+ trivial_spec_and_tarball.map = {}

+ 

+ 

+ def trivial_srpm(version):

+     if version in trivial_srpm.map:

+         return trivial_srpm.map[version]

+ 

+     assert 0 == os.system( """ set -e

+     mkdir -p srpm_dir && cd srpm_dir

+     export dummy_version={version}

+     {script_dir}/generate_qiuck_package

+     """.format(script_dir=scriptdir(), version=version)

+     )

+     srpm_path = os.path.join(os.getcwd(), 'srpm_dir',

+                              'quick-package-{0}-0.src.rpm'.format(version))

+     result = (

+         srpm_path,

+         Bunch({

+             'package_name': 'quick-package',

+             'user': 'bob',

+             'project':'blah',

+         })

+     )

+ 

+     trivial_srpm.map[version] = result

+     return result

+ 

+ trivial_srpm.map = {}

+ 

+ 

+ def branch_hash(directory, branch):

+     cmd = 'set -e && cd {0} && git rev-parse {1}'.format(directory, branch)

+     return check_output(cmd, shell=True)

+ 

+ 

+ def compare_branches(branches, remote, local=None, result_hash=None):

+     sample_hash = branch_hash(remote, branches[0])

+ 

+     for branch in branches[1:]:

+         remote_hash = branch_hash(remote, branch)

+         assert remote_hash

+         assert remote_hash == sample_hash

+ 

+         if local:

+             branch_hash(local, branch) == sample_hash

+         if result_hash:

+             assert result_hash[branch] in sample_hash

+ 

+     return sample_hash

+ 

+ 

+ @pytest.fixture

+ def initial_commit_everywhere(request, branches, mc_group, opts_basic):

+     origin, all_branches, _, _ = branches

+ 

+     # Commit first version and compare remote side with local side.

+     result = request.instance.commit_to_branches(all_branches, opts_basic, 1)

+     init_hash = compare_branches(all_branches, origin, result_hash=result)

+ 

+     return (branches, opts_basic, init_hash)

+ 

+ 

+ class TestSrpmImport(object):

+     import_type = 'srpm'

+ 

+     def commit_to_branches(self, to_branches, opts, version):

+         workdir = os.path.join(os.getcwd(), 'workdir-for-import')

+         os.mkdir(workdir)

+ 

+         if self.import_type == 'srpm':

+             srpm, task = trivial_srpm(version)

+         else:

+             spec, tarball, task = trivial_spec_and_tarball(version)

+ 

+         task.branches = to_branches

+         result = {}

+ 

+         if self.import_type == 'srpm':

+             actual_do_git_srpm_import(opts, srpm, task, workdir, result)

+         else:

+             result = do_distgit_import(opts, tarball, spec, task, workdir)

+ 

+         shutil.rmtree(workdir)

+         return result

+ 

+     def setup_method(self, method):

+         trivial_srpm.map = {}

+         trivial_spec_and_tarball.map = {}

+ 

+     def test_merged_everything(self, initial_commit_everywhere):

+         branches, opts, v1_hash = initial_commit_everywhere

+         origin, all_branches, middle_branches, border_branches = branches

+ 

+         result = self.commit_to_branches(border_branches, opts, 2)

+         v2_hash = compare_branches(border_branches, origin, result_hash=result)

+         unchanged = compare_branches(middle_branches, origin)

+         assert unchanged == v1_hash

+         assert v2_hash != v1_hash

+ 

+         # And commit again to all branches.  Because we first committed to

+         # border branches, the "merge" algorithm is able to "fast forward" merge

+         # also middle brancehs.... So all branches should have the same hash

+         # now.

+         result = self.commit_to_branches(all_branches, opts, 3)

+         v3_hash = compare_branches(all_branches, origin, result_hash=result)

+         assert v3_hash != v1_hash

+         assert v3_hash != v2_hash

+ 

+     def test_diverge_middle_branches(self, initial_commit_everywhere):

+         branches, opts, v1_hash = initial_commit_everywhere

+         origin, all_branches, middle_branches, border_branches = branches

+ 

+         result = self.commit_to_branches(middle_branches, opts, 2)

+         v2_hash = compare_branches(middle_branches, origin, result_hash=result)

+         unchanged = compare_branches(border_branches, origin)

+         assert unchanged == v1_hash

+         assert v2_hash != v1_hash

+ 

+         # This means that (a) first we commit second patch to 'border_branches',

+         # and thatn we commit third patch to 'middle_branches'.

+         result = self.commit_to_branches(all_branches, opts, 3)

+         v3_hash_a = compare_branches(middle_branches, origin, result_hash=result)

+         v3_hash_b = compare_branches(border_branches, origin, result_hash=result)

+ 

+         assert v3_hash_a != v3_hash_b

+         assert v3_hash_a != v2_hash

+         assert v3_hash_b != v2_hash

+         assert v3_hash_a != v1_hash

+         assert v3_hash_b != v1_hash

+ 

+     def test_no_op_1(self, initial_commit_everywhere):

+         branches, opts, v1_hash = initial_commit_everywhere

+         origin, all_branches, middle_branches, border_branches = branches

+ 

+         # This imports the same srpm, which means nothing should change in the

+         # git repostiory.

+         result = self.commit_to_branches(all_branches, opts, 1)

+         unchanged = compare_branches(all_branches, origin, result_hash=result)

+         # Hash is unchanged, while still the 'result' above is fine!

+         assert unchanged == v1_hash

+ 

+         result = self.commit_to_branches(border_branches, opts, 2)

+         v2_hash_a = compare_branches(border_branches, origin, result_hash=result)

+         assert v2_hash_a != v1_hash

+ 

+         # This should sync the remaining branches.  It also means that

+         # border_branches are importing the same version, thus nothing happens

+         # to them.

+         result = self.commit_to_branches(all_branches, opts, 2)

+         v2_hash_b = compare_branches(all_branches, origin, result_hash=result)

+         assert v2_hash_a == v2_hash_b

+ 

+         result = self.commit_to_branches(middle_branches, opts, 3)

+         v3_hash_a = compare_branches(middle_branches, origin, result_hash=result)

+         assert v3_hash_a != v2_hash_a

+ 

+         # Get different timestamp to get different hash.

+         time.sleep(1)

+ 

+         result = self.commit_to_branches(all_branches, opts, 3)

+         v3_hash_b = compare_branches(border_branches, origin, result_hash=result)

+         assert v3_hash_a != v3_hash_b

+         assert v3_hash_a != v2_hash_a

+ 

+ 

+ class TestRawImport(TestSrpmImport):

+     import_type = 'raw'

@@ -130,7 +130,7 @@

              "user": self.USER_NAME,

              "project": self.PROJECT_NAME,

  

-             "branch": self.BRANCH,

+             "branches": [ self.BRANCH ],

              "source_type": SourceType.SRPM_LINK,

              "source_json": json.dumps({"url": "http://example.com/pkg.src.rpm"})

          }
@@ -139,7 +139,7 @@

              "user": self.USER_NAME,

              "project": self.PROJECT_NAME,

  

-             "branch": self.BRANCH,

+             "branches": [ self.BRANCH ],

              "source_type": SourceType.SRPM_UPLOAD,

              "source_json": json.dumps({"tmp": "tmp_2", "pkg": "pkg_2.src.rpm"})

          }
@@ -179,7 +179,7 @@

          task = self.dgi.try_to_obtain_new_tasks()[0]

          assert task.task_id == self.task_data_1["task_id"]

          assert task.user == self.USER_NAME

-         assert task.branch == self.BRANCH

+         assert self.BRANCH in task.branches

          assert task.package_url == "http://example.com/pkg.src.rpm"

  

      def test_try_to_obtain_ok_2(self, mc_get):
@@ -187,7 +187,7 @@

          task = self.dgi.try_to_obtain_new_tasks()[0]

          assert task.task_id == self.task_data_2["task_id"]

          assert task.user == self.USER_NAME

-         assert task.branch == self.BRANCH

+         assert self.BRANCH in task.branches

          assert task.package_url == "http://front/tmp/tmp_2/pkg_2.src.rpm"

  

      def test_try_to_obtain_new_task_unknown_source_type(self, mc_get):
@@ -313,12 +313,12 @@

              setattr(self.dgi, name, mc)

  

          mc_methods["pkg_name_evr"].return_value = self.PACKAGE_NAME, self.PACKAGE_VERSION

-         mc_methods["git_import_srpm"].return_value = self.FILE_HASH

+         mc_methods["git_import_srpm"].return_value = {'f22': self.FILE_HASH}

  

          self.dgi.do_import(self.task_1)

          assert self.task_1.package_name == self.PACKAGE_NAME

          assert self.task_1.package_version == self.PACKAGE_VERSION

-         assert self.task_1.git_hash == self.FILE_HASH

+         assert self.task_1.git_hashes['f22'] == self.FILE_HASH

  

          for name in internal_methods:

              mc = mc_methods[name]

@@ -75,11 +75,12 @@

      @classmethod

      def get_build_importing_queue(cls):

          """

-         Returns BuildChroots which are waiting to be uploaded to dist git

+         Returns Builds which are waiting to be uploaded to dist git

          """

-         query = (models.BuildChroot.query.join(models.Build)

+         query = (models.Build.query.join(models.BuildChroot)

                   .filter(models.Build.canceled == false())

-                  .filter(models.BuildChroot.status == helpers.StatusEnum("importing")))

+                  .filter(models.BuildChroot.status == helpers.StatusEnum("importing"))

+                 )

          query = query.order_by(models.BuildChroot.build_id.asc())

          return query

  
@@ -643,21 +644,17 @@

      terminal_states = {StatusEnum("failed"), StatusEnum("succeeded"), StatusEnum("canceled")}

  

      @classmethod

-     def get_chroots_from_dist_git_task_id(cls, task_id):

+     def get_buildchroots_by_build_id_and_branch(cls, build_id, branch):

          """

-         Returns a list of BuildChroots identified with task_id

-         task_id consists of a name of git branch + build id

-         Example: 42-f22 -> build id 42, chroots fedora-22-*

+         Returns a list of BuildChroots identified by build_id and dist-git

+         branch name.

          """

-         build_id, branch = task_id.split("-", 1)

-         build = cls.get_by_id(build_id).one()

-         build_chroots = build.build_chroots

- 

-         # What chroots this branch is for?

-         branch_chroots = models.DistGitBranch.query.get(branch).chroots

-         branch_chroots = [x.name for x in branch_chroots]

- 

-         return [ch for ch in build_chroots if ch.name in branch_chroots]

+         return (

+             models.BuildChroot.query

+             .join(models.MockChroot)

+             .filter(models.BuildChroot.build_id==build_id)

+             .filter(models.MockChroot.distgit_branch_name==branch)

+         ).all()

  

  

      @classmethod

@@ -559,6 +559,17 @@

              return self.repos.split()

  

      @property

+     def import_task_id(self):

+         return str(self.id)

+ 

+     @property

+     def import_log_url(self):

+         if app.config["COPR_DIST_GIT_LOGS_URL"]:

+             return "{}/{}.log".format(app.config["COPR_DIST_GIT_LOGS_URL"],

+                                       self.import_task_id.replace('/', '_'))

+         return None

+ 

+     @property

      def result_dir_name(self):

          # We can remove this ugly condition after migrating Copr to new machines

          # It is throw-back from era before dist-git
@@ -958,10 +969,6 @@

          return "{}-{}".format(self.build_id, self.name)

  

      @property

-     def import_task_id(self):

-         return "{}-{}".format(self.build_id, self.mock_chroot.distgit_branch_name)

- 

-     @property

      def dist_git_url(self):

          if app.config["DIST_GIT_URL"]:

              if self.state == "forked":
@@ -975,13 +982,6 @@

          return None

  

      @property

-     def import_log_url(self):

-         if app.config["COPR_DIST_GIT_LOGS_URL"]:

-             return "{}/{}.log".format(app.config["COPR_DIST_GIT_LOGS_URL"],

-                                       self.import_task_id.replace('/', '_'))

-         return None

- 

-     @property

      def result_dir_url(self):

          return "/".join([app.config["BACKEND_BASE_URL"],

                           u"results",

@@ -178,9 +178,9 @@

                  {% endif %}

                </td>

                <td>

-                 {% if chroot.import_log_url %}

-                     <a href="{{chroot.import_log_url}}">

-                       {{ chroot.import_task_id }}.log

+                 {% if chroot.build.import_log_url %}

+                     <a href="{{chroot.build.import_log_url}}">

+                       {{ chroot.build.import_task_id }}.log

                      </a>

                  {% else %}

                    -

@@ -34,15 +34,18 @@

          builds_for_import = BuildsLogic.get_build_importing_queue().filter(models.Build.is_background == true()).limit(30)

  

      for task in builds_for_import:

-         copr = task.build.copr

+         copr = task.copr

+         branches = set()

+         for b_ch in task.build_chroots:

+             branches.add(b_ch.mock_chroot.distgit_branch_name)

  

          task_dict = {

              "task_id": task.import_task_id,

              "user": copr.owner_name, # TODO: user -> owner

-             "project": task.build.copr.name,

-             "branch": task.mock_chroot.distgit_branch_name,

-             "source_type": task.build.source_type,

-             "source_json": task.build.source_json,

+             "project": task.copr.name,

+             "branches": list(branches),

+             "source_type": task.source_type,

+             "source_json": task.source_json,

          }

          if task_dict not in builds_list:

              builds_list.append(task_dict)
@@ -66,10 +69,11 @@

      """

      result = {"updated": False}

  

-     if "task_id" in flask.request.json:

+     if "task_id" in flask.request.json and 'branch' in flask.request.json:

          app.logger.debug(flask.request.data)

          task_id = flask.request.json["task_id"]

-         build_chroots = BuildsLogic.get_chroots_from_dist_git_task_id(task_id)

+         branch = flask.request.json["branch"]

+         build_chroots = BuildsLogic.get_buildchroots_by_build_id_and_branch(task_id, branch)

          build = build_chroots[0].build

  

          # Is it OK?

@@ -72,7 +72,7 @@

      def test_build_queue_1(self, f_users, f_coprs, f_mock_chroots, f_builds, f_db):

          self.db.session.commit()

          data = BuildsLogic.get_build_importing_queue().all()

-         assert len(data) == 5

+         assert len(data) == 3

  

      def test_build_queue_2(self, f_users, f_coprs, f_mock_chroots, f_db):

          self.db.session.commit()

From now, each build request generates only one source import
request for dist-git machine; but the source is imported into all
affected dist-git branches at once.

Previously, for N chroots enabled in one build, the build request
was imported into dist-git N-times. For the SRPM_LINK or
SRPM_UPLOAD it was really expensive burden -- especially for large
RPMs (for each chroot/branch, the src.rpm was downloaded,
extracted and "uploaded" to lookaside cache). Similarly other
import methods (e.g. MOCK_SCM or GIT_AND_TITO) resulted for a
O(N) cloning/downloading. Fetching source N-times from
third-party providers was also sort of DoS'ing.

This also fixes serious race condition for imports: Because each
chroot was imported by separate worker before -- e.g. for
GIT_AND_TITO the git clones were done with slight time-shifts.
This in turn caused a risk that each chroot/branch could be
imported from completely different code-base.

Resolves #70, https://bugzilla.redhat.com/show_bug.cgi?id=1384609

rebased

6 years ago

rebased

6 years ago

rebased

6 years ago

Why not just use list here and append to it in the loop?

the name of the method does not fit names of its parameters.

Here, join with BuildChroot should no longer be necessary. You can order by models.Build.id below and the build chroot attributes being directly in 'task' are not needed in backend_general.py:dist_git_importing_queue. Doesn't it generate basically the same task into the queue several times? You switched order here but that does not really make any difference.

This whole code is strange. You use 'uploaded' in two different context here and in sync_with_branches. Why not simply checkout each branch and import, upload and commit to it as it was before? 'my_upload_fabric' takes care of not uploading the same thing twice or you can also do it here e.g. by keeping track of 'upload_files' and not uploading twice the same files. That way it would be much easier to check that the code works.

Please, look at the provided comments.

Intentionally using dictionary (hash table) to avoid duplicates with good time complexity.

I still have to filter by StatusEnum("importing").. so the join is needed, or? :) I'll try to have a look deeply .. Simply, if there's at least BuildChroot.status == importing, generate only one task into the task queue.

You could have used set and convert that into list in the end. That would much better communicate the intention.

Right, now 'importing' should be actually a state of build, not state of chroot with the logic you implemented. Well, we mainly need to ensure there are not duplicated tasks in the queue.

This whole code is strange. You use 'uploaded' in two different context here
and in sync_with_branches

That's the same context. sync_with_branches syncs branch with uploaded_branches, while marks one additional branch as "uploaded".

Why not simply checkout each branch and import, upload and commit to it as it was before?

(1) Because of simplicity and performance -> re-importing of huge SRPMs (gigabytes) costs a lot of IO (i face this issue internally where we don't have huge IO throughput on dist-git side)
(2) I thought that the source is first uploaded, and then refused serverside ... but in copr case, it is not real "upload" ... nevermind, even counting the md5sum is useless when we know it is not needed
(3) because we want to have 'fast-forward' merges as long as it is possible (to keep the git storage rather compact (storage) and synoptical). This code guarantees that all branches in dist-git have the same git hash if that's possible.

you can also do it here e.g. by keeping track of 'upload_files' and not uploading twice the same files.

We can, but I'd rather keep the git "clean".

That way it would be much easier to check that the code works.

Hmm, testing requires both frontend and distgit up&running ... , but you can try dev-copr.devel.redhat.com to test that it works ... (Red Hat private service).

OK, the list is not duplicated, dunno why ... :/

Ok, actually, I guess, the order matters here. Sry.

But there is the order_by, what do you propose? I guess I need a small hint now ...

I meant that models.Build.query.join(models.BuildChroot) is different from models.BuildChroot.query.join(models.Build) then if the records are not duplicated. This is quite unexpected for me but if it works, then this part is ok.

I changed the name and the implementation (now the set of build chroots is taken directly from database).

rebased

6 years ago

Thanks for the review! Please take another look ... except from your points, I fixed bugs on dist-git side; so failure in 'git commit' is actually ignored.

Yeah, well I would like you to also provide tests for this. This was just quick pre-review and not sure if i won't have more comments (although the code looks pretty well already). See this: https://pagure.io/copr/copr/blob/master/f/beaker-tests/Regression/dist-git/tests/batch1 <- this needs to pass. And I would also like to see the tests covering that crazy merging ('read-tree') logic. Thank you.

You rely on later merging here for adding .gitignore, sources, and spec if a branch has been commited before? That's very hackish solution. This whole thing is likely to contain mistakes and it's almost impossible for other programmers to develop upon this code.

It would be much better to do commited.union(sync_with_branches(committed, branch, message)) so that it is clear that 'commited' is being changed on this line. Otherwise, a reader needs to know wider context to understand this function, which is not a nice feature of it. The whole 'committed' construct is...worthy reconsidering (rewriting) however. You obviously enjoy crazy hacks but I don't really.

Argh, this is long shot for me ... but I agree that I should give it a try.

Well, for the first pass -- first branch -- "everything is imported" ... and for other branches nothing else then --fast-forward merge (or reset if --fast-forward merge fails) to exactly the same git content of already committed branch might happen.

It is questionable what to do if the first (full import) branch fails ... I implemented it so the branch is skipped and the full import is tried against next branch.

Though yeah, I can fix more concrete remarks. I was tempted to revert 565eeb9 a lot -- that's huge hack (code duplication, not running in VM, duplicating mock code...), but that commit really speeds up "something" (even if we merged this PR eventually) -- so we should probably concentrate on de-duplication, somehow - but that's for another PR.

:) I'll keep the 'committed' argument read-only then, and I'll do committed.add(branch) after calling sync_with_branches (duplicated). Doing some a.x(b(x(c))) is not readable neither.

Ad "crazy hack"/"hackish" things ... this is a bit personal POV, so I don't take this personally, I'm sure that all the code will be "re-factored out" soon anyways...

The point here is that you don't need to merge at all. You would have the same
performance with simply checking out each branch, creating files there,
uploading (if not uploaded before) and commiting. That's just one boolean
'uploaded' = True/False. Also that code would be much easier to read and much
easier to maintain.

I know - that would be a bit easier code, but a wrong code:

  • only 'merge --fast-forward' guarantees you the same git hash in all
    branches which are mirrored. This is good not only from readability POV, but
    it also saves a lot of space in git repository (those data can't be garbage
    collected, so it is payload forever...).

  • you propose "uploading" just once... But 'commnads.upload()' (or something
    similar) is actually needed to have the 'sources' and '.gitignore'
    automatically edited by rpkg. OTOH, with 'git merge' you have this for free (actually
    'git merge --fast-forward' is almost NO-OP.

You claim that the code is not maintainable, but that's not true -- there's
one 'fast merge' git command, and about 3 commands for the cases fast forward
merge is not possible. That's way easier, natural and effective code. I'll
provide the test cases for it to avoid breakage in future.

The point here is that you don't need to merge at all. You would have the same
performance with simply checking out each branch, creating files there,
uploading (if not uploaded before) and commiting. That's just one boolean
'uploaded' = True/False. Also that code would be much easier to read and much
easier to maintain.

I know - that would be a bit easier code, but a wrong code:

only 'merge --fast-forward' guarantees you the same git hash in all
branches which are mirrored. This is good not only from readability POV, but
it also saves a lot of space in git repository (those data can't be garbage
collected, so it is payload forever...).

you propose "uploading" just once... But 'commnads.upload()' (or something
similar) is actually needed to have the 'sources' and '.gitignore'
automatically edited by rpkg. OTOH, with 'git merge' you have this for free (actually
'git merge --fast-forward' is almost NO-OP.

You can read the generetad content and then populate sources and .gitignore with it for the next branch. The result would be still easier to read than what you have written.

You claim that the code is not maintainable, but that's not true -- there's
one 'fast merge' git command, and about 3 commands for the cases fast forward
merge is not possible. That's way easier, natural and effective code. I'll
provide the test cases for it to avoid breakage in future.

Please, ensure that it works and I'll be ok with it.

Btw. sorry that I deleted the comment you reacted on. I got better understanding of the code you have written and didn't really want to object any further.

rebased

6 years ago

I've added the tests for srpm import for now.

rebased

6 years ago

Both "srpm" and "distgit" import functions are now tested.

That's nice but I don't see the Regression test fixed.

rebased

6 years ago

@clime wrote:

dist-git/tests/batch1 <- this needs to pass

I'm lost. The test (before my changes) hangs with:

[Errno 2] No such file or directory: '/tmp/copr/beaker-tests/Regression/dist-git/tests/batch1/build-tasks.json'
---------------
Could not load action-tasks.json from data directory /tmp/copr/beaker-tests/Regression/dist-git/tests/batch1
[Errno 2] No such file or directory: '/tmp/copr/beaker-tests/Regression/dist-git/tests/batch1/action-tasks.json'
---------------
 * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)

I don't see how this command could end immediately with exit status 0:

lRun "/usr/share/copr/mocks/frontend/app.py $TESTPATH $TESTPATH/static" 0

What am I missing here? The app.py seems to be indefinitely listening server app...

I don't think it hangs. It is probably building the initial docker image. You can also check copr-dist-git logs or copr-dist-git service to know exactly what is happening.

rebased

6 years ago

I accidentally (for the first attempt) ran the runtest.sh from pretty old copr git hash; this led to pretty old copr-dist-git installation; which in turn caused some permission issues when I switched to correct git hash and rebuilt newer copr-dist-git.

The tests should work now. Thanks for the help!

Pull-Request has been merged by clime

6 years ago