#647 Checking a repo configuration before 'git push' with a git hook script
Merged a year ago by onosek. Opened a year ago by onosek.
onosek/rpkg git_hook_2  into  master

file modified
+2 -1
@@ -11,7 +11,8 @@ 

          python3-libmodulemd \

          python3-setuptools \

          rpmlint \

-         rpm-build

+         rpm-build \

+         rpmdevtools

  # development packages that are not part of packages' specfile \

  RUN dnf -y install \

          python3-tox \

file modified
+151 -3
@@ -24,9 +24,11 @@ 

  import re

  import shlex

  import shutil

+ import stat

  import subprocess

  import sys

  import tempfile

+ import textwrap

  import time

  from itertools import groupby

  from multiprocessing.dummy import Pool as ThreadPool
@@ -926,7 +928,7 @@ 

  

          if specs:

              # Prefer the spec matching the directory name

-             self._spec = os.path.basename(self.layout.specdir) + '.spec'

+             self._spec = os.path.basename(self.layout.root_dir) + '.spec'

              if specs != [self._spec]:

                  if self._spec not in specs:

                      self._spec = specs[0]
@@ -1626,6 +1628,7 @@ 

  

          if not bare_dir:

              self._add_git_excludes(os.path.join(path, git_dir))

+             self._add_git_pre_push_hook(os.path.join(path, git_dir))

  

          return

  
@@ -1711,6 +1714,7 @@ 

  

                  # Add excludes

                  self._add_git_excludes(branch_path)

+                 self._add_git_pre_push_hook(branch_path)

              except (git.GitCommandError, OSError) as e:

                  raise rpkgError('Could not locally clone %s from %s: %s'

                                  % (branch, repo_path, e))
@@ -1768,11 +1772,51 @@ 

          for item in self.git_excludes:

              git_excludes.add(item)

          if not os.path.exists(git_excludes_path):

-             # prepare ".git/info" directory if is missing

+             # prepare ".git/info" directory if it is missing

              os.makedirs(os.path.dirname(git_excludes_path))

          git_excludes.write()

          self.log.debug('Git-excludes patterns were added into %s' % git_excludes_path)

  

+     def _add_git_pre_push_hook(self, conf_dir):

+         """

+         Create pre-push hook script and write it in the location:

+         <repository_directory>/.git/hooks/pre-push

+         This hook script is run right before the 'git push' command.

+         The script contains one command - 'pre-push-check' that checks

+         for possible user mistakes.

+         """

+         tool_name = os.path.basename(sys.argv[0])  # rhpkg|fedpkg|...

+         hook_content = textwrap.dedent("""

+             #!/bin/bash

+ 

+             _remote="$1"

+             _url="$2"

+ 

+             exit_code=0

+             while read -r _local_ref local_sha _remote_ref _remote_sha

+             do

+                 command -v {0} >/dev/null 2>&1 || {{ echo >&2 "Warning: '{0}' is missing, \\

+             pre-push check is omitted. See .git/hooks/pre-push"; exit 0; }}

+                 {0} pre-push-check "$local_sha"

+                 ret_code=$?

+                 if [ $ret_code -ne 0 ] && [ $exit_code -eq 0 ]; then

+                     exit_code=$ret_code

+                 fi

+             done

+ 

+             exit $exit_code

+         """).strip().format(tool_name)

+         git_pre_push_hook_path = os.path.join(conf_dir, '.git/hooks/pre-push')

+         if not os.path.exists(os.path.dirname(git_pre_push_hook_path)):

+             # prepare ".git/hooks" directory if it is missing

+             os.makedirs(os.path.dirname(git_pre_push_hook_path))

+         with open(git_pre_push_hook_path, 'w') as hook_file:

+             hook_file.writelines(hook_content)

+         # set script's permissions as executable

+         file_stat = os.stat(git_pre_push_hook_path)

+         os.chmod(git_pre_push_hook_path, file_stat.st_mode | stat.S_IEXEC)

+         self.log.debug('Pre-push hook script was added into %s' % git_pre_push_hook_path)

+ 

      def commit(self, message=None, file=None, files=[], signoff=False):

          """Commit changes to a repository (optionally found at path)

  
@@ -2133,7 +2177,7 @@ 

  

          return patches_not_tracked

  

-     def push(self, force=False, extra_config=None):

+     def push(self, force=False, no_verify=False, extra_config=None):

          """Push changes to the remote repository"""

          self.check_repo(is_dirty=False, all_pushed=False)

  
@@ -2157,6 +2201,8 @@ 

          cmd.append('push')

          if force:

              cmd += ['-f']

+         if no_verify:

+             cmd += ['--no-verify']

          if self.quiet:

              cmd.append('-q')

          self._run_command(cmd, cwd=self.path)
@@ -4364,3 +4410,105 @@ 

                                  % (self.repo_name))

  

          return self._repo_name, version, release

+ 

+     def pre_push_check(self, ref):

+         show_hint = ('Hint: this check (pre-push hook script) can be bypassed by adding '

+                      'the argument \'--no-verify\' argument to the push command.')

+         try:

+             commit = self.repo.commit(ref)

+         except Exception:

+             self.log.error('Wrong reference to a commit: \'{0}\''.format(ref))

+             sys.exit(1)

+ 

+         try:

+             # Assume, that specfile names are same in the active branch

+             # and in the pushed branch (git checkout f37 && git push origin rawhide)

+             # in this case 'f37' is active branch and 'rawhide' is pushed branch.

+             specfile_path_absolute = os.path.join(self.layout.specdir, self.spec)

+             # convert to relative path

+             specfile_path = os.path.relpath(specfile_path_absolute, start=self.path)

+             spec_content = self.repo.git.cat_file("-p", "{0}:{1}".format(ref, specfile_path))

+         except Exception:

+             # It might be the case of an empty commit

+             self.log.warning('Specfile doesn\'t exist. Push operation continues.')

+             return

+ 

+         # load specfile content from pushed branch and save it into a temporary file

+         with tempfile.NamedTemporaryFile(mode="w+") as temporary_spec:

+             temporary_spec.write(spec_content)

+             temporary_spec.flush()

+             # get all source files from the specfile (including patches)

+             cmd = ('spectool', '-l', temporary_spec.name)

+             ret, stdout, _ = self._run_command(cmd, return_text=True, return_stdout=True)

+             if ret != 0:

+                 self.log.error('Command \'{0}\' failed. Push operation '

+                                'was cancelled.'.format(' '.join(cmd)))

+                 self.log.warning(show_hint)

+                 sys.exit(2)

+ 

+         source_files = []

+         # extract source files from the spectool's output

+         for line in stdout.split('\n'):

+             file_location = re.sub(r'(?:Source|Patch)\d+\s*:\s*(\w+)', r'\1', line, re.IGNORECASE)

+             if file_location:

+                 # find out the format of the source file path. From URL use just the file name.

+                 # We want to keep hierarchy of the files if possible

+                 res = urllib.parse.urlparse(file_location)

+                 if res.scheme and res.netloc:

+                     source_files.append(os.path.basename(res.path))

+                 else:

+                     source_files.append(file_location)

+ 

+         if not len(source_files):

+             self.log.warning('No source files found in the specfile \'{0}\'. '

+                              'Push operation continues.'.format(specfile_path))

+ 

+         try:

+             sources_file_path_absolute = self.sources_filename

+             # convert to relative path

+             sources_file_path = os.path.relpath(sources_file_path_absolute, start=self.path)

+             sources_file_content = self.repo.git.cat_file(

+                 '-p', '{0}:{1}'.format(ref, sources_file_path))

+         except Exception:

+             self.log.warning('\'sources\' file doesn\'t exist. Push operation continues.')

+             # NOTE: check doesn't fail when 'sources' file doesn't exist. Just skips the rest.

+             # it might be the case of the push without 'sources' = retiring the repository

+             return

+ 

+         # load 'sources' file content from pushed branch and save it into a temporary file

+         with tempfile.NamedTemporaryFile(mode="w+") as temporary_sources_file:

+             temporary_sources_file.write(sources_file_content)

+             temporary_sources_file.flush()

+             # parse 'sources' files content

+             sourcesf = SourcesFile(temporary_sources_file.name, self.source_entry_type)

+         sourcesf_entries = set(item.file for item in sourcesf.entries)

+ 

+         # list of all files (their relative paths) in the commit

+         repo_entries = set(item.path for item in commit.tree.traverse() if item.type != "tree")

+ 

+         # check whether every source file is either listed in the 'sources' file or tracked in git

+         for source_file in source_files:

+             listed = source_file in sourcesf_entries

+             tracked = source_file in repo_entries

+             if not (listed or tracked):

+                 self.log.error('Source file \'{0}\' was neither listed in the \'sources\' file '

+                                'nor tracked in git. '

+                                'Push operation was cancelled'.format(source_file))

+                 self.log.warning(show_hint)

+                 sys.exit(3)

+ 

+         # verify all file entries in 'sources' were uploaded to the lookaside cache

+         for entry in sourcesf.entries:

+             filename = entry.file

+             hash = entry.hash

+             file_exists_in_lookaside = self.lookasidecache.remote_file_exists(

+                 self.ns_repo_name if self.lookaside_namespaced else self.repo_name,

+                 filename,

+                 hash)

+             if not file_exists_in_lookaside:

+                 self.log.error('Source file (or tarball) \'{}\' wasn\'t uploaded to the lookaside '

+                                'cache. Push operation was cancelled.'.format(filename))

+                 self.log.warning(show_hint)

+                 sys.exit(4)

+ 

+         return 0  # The push operation continues

file modified
+52 -1
@@ -448,6 +448,7 @@ 

          self.register_new()

          self.register_new_sources()

          self.register_patch()

+         self.register_pre_push_check()

          self.register_prep()

          self.register_pull()

          self.register_push()
@@ -1375,6 +1376,10 @@ 

          push_parser = self.subparsers.add_parser(

              'push', help='Push changes to remote repository')

          push_parser.add_argument('--force', '-f', help='Force push', action='store_true')

+         push_parser.add_argument(

+             '--no-verify',

+             help='Bypass the pre-push hook script. No check of the branch will prevent the push.',

+             action='store_true')

          push_parser.set_defaults(command=self.push)

  

      def register_remote(self):
@@ -1785,6 +1790,47 @@ 

          self.container_build_setup_parser.set_defaults(

              command=self.container_build_setup)

  

+     def register_pre_push_check(self):

+         """Register command line parser for subcommand pre_push_check

+ 

+         .. versionadded:: 1.44

+         """

+ 

+         help_msg = 'Check whether the eventual "git push" command could proceed'

+         description = textwrap.dedent('''

+             Performs few checks of the repository before pushing changes.

+             "git push" command itself is not part of this check.

+ 

+             Checks include:

+             * parse specfile for source files and verifies it is noted also

+               in the 'sources' file.

+             * verifies, that all source files from 'sources' file were uploaded

+               to the lookaside cache.

+ 

+             Checks can be performed manually by executing 'pre-push-check' command.

+             But originally, it is designed to be executed automatically right before

+             'git push' command is started. Every time the 'git push' command

+             is executed, the 'pre-push' hook script runs first.

+ 

+             Path: <repository_directory>/.git/hooks/pre-push

+ 

+             This hook script is created after a new repository is cloned. Previously

+             created repositories don't contain this hook script.

+ 

+             To disable these checks, remove the hook script.

+         ''')

+ 

+         pre_push_check_parser = self.subparsers.add_parser(

+             'pre-push-check',

+             formatter_class=argparse.RawDescriptionHelpFormatter,

+             help=help_msg,

+             description=description)

+         pre_push_check_parser.add_argument(

+             "ref", default='HEAD', nargs="?",

+             help='Reference to the commit that will be checked'

+                  'Accepts hash or reference for example \'refs/heads/f37\'')

+         pre_push_check_parser.set_defaults(command=self.pre_push_check)

+ 

      # All the command functions go here

      def usage(self):

          self.parser.print_help()
@@ -2759,7 +2805,9 @@ 

              extra_config = {

                  'credential.helper': ' '.join(utils.find_me() + ['gitcred']),

                  'credential.useHttpPath': 'true'}

-         self.cmd.push(getattr(self.args, 'force', False), extra_config)

+         self.cmd.push(getattr(self.args, 'force', False),

+                       getattr(self.args, 'no_verify', False),

+                       extra_config)

  

      def remote(self):

          """Handle command remote"""
@@ -2996,3 +3044,6 @@ 

              self.user = self.args.user

          else:

              self.user = getpass.getuser()

+ 

+     def pre_push_check(self):

+         self.cmd.pre_push_check(self.args.ref)

file modified
+2 -2
@@ -209,8 +209,8 @@ 

      Method doesn't check whether files exist.

      :param file_path: relative or absolute path to the file

      :type file_path: str

-     :param file_path: relative or absolute path to the directory

-     :type file_path: str

+     :param dir_path: relative or absolute path to the directory

+     :type dir_path: str

      :return: file path relative to the dictionary if the file is inside

          of the directory otherwise None

      :rtype: str or None

file modified
+4 -2
@@ -69,16 +69,18 @@ 

          subprocess.check_call(['git', 'clone', 'file://%s' % moduledir],

                                cwd=cloneroot, stdout=subprocess.PIPE,

                                stderr=subprocess.PIPE)

-         clonedir = os.path.join(cloneroot, module.split('/')[-1])

+         module_basename = os.path.basename(module)

+         clonedir = os.path.join(cloneroot, module_basename)

          open(os.path.join(clonedir, '.gitignore'), 'w').close()

          open(os.path.join(clonedir, 'sources'), 'w').close()

+         open(os.path.join(clonedir, module_basename+'.spec'), 'w').close()

          subprocess.check_call(['git', 'config', 'user.name', 'tester'],

                                cwd=clonedir,

                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)

          subprocess.check_call(['git', 'config', 'user.email', 'tester@example.com'],

                                cwd=clonedir,

                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)

-         subprocess.check_call(['git', 'add', '.gitignore', 'sources'],

+         subprocess.check_call(['git', 'add', '.gitignore', 'sources', module_basename+'.spec'],

                                cwd=clonedir, stdout=subprocess.PIPE,

                                stderr=subprocess.PIPE)

          subprocess.check_call(['git', 'commit', '-m',

@@ -0,0 +1,106 @@ 

+ # -*- coding: utf-8 -*-

+ 

+ import os

+ import subprocess

+ 

+ import git

+ 

+ import pyrpkg

+ from pyrpkg.sources import SourcesFile

+ 

+ try:

+     from unittest.mock import patch

+ except ImportError:

+     from mock import patch

+ 

+ from . import CommandTestCase

+ 

+ SPECFILE_TEMPLATE = """Name:           test

+ Version:        1.0

+ Release:        1.0

+ Summary:        test

+ 

+ Group:          Applications/System

+ License:        GPLv2+

+ 

+ %s

+ 

+ %%description

+ Test

+ 

+ %%install

+ rm -f $RPM_BUILD_ROOT%%{_sysconfdir}/"""

+ 

+ 

+ class TestPrePushCheck(CommandTestCase):

+ 

+     def setUp(self):

+         super(TestPrePushCheck, self).setUp()

+ 

+         self.make_new_git(self.module)

+ 

+         moduledir = os.path.join(self.gitroot, self.module)

+         subprocess.check_call(['git', 'clone', 'file://%s' % moduledir],

+                               cwd=self.path, stdout=subprocess.PIPE,

+                               stderr=subprocess.PIPE)

+ 

+         self.cloned_dir = os.path.join(self.path, self.module)

+         self.cmd = pyrpkg.Commands(self.cloned_dir, self.lookaside,

+                                    self.lookasidehash,

+                                    self.lookaside_cgi, self.gitbaseurl,

+                                    self.anongiturl, self.branchre,

+                                    self.kojiprofile,

+                                    self.build_client, self.user, self.dist,

+                                    self.target, self.quiet)

+ 

+         os.chdir(self.cloned_dir)

+ 

+     @patch('pyrpkg.log.error')

+     def test_push_is_blocked_by_untracked_patches(self, log_error):

+         """

+         Check that newly committed files are either tracked in git or listed

+         in 'sources' file.

+         """

+         # Track SPEC and a.patch in git

+         spec_file = self.module+".spec"

+         with open(spec_file, 'w') as f:

+             f.write(SPECFILE_TEMPLATE % '''Patch0: a.patch

+ Patch1: b.patch

+ Patch2: c.patch

+ Patch3: d.patch

+ ''')

+ 

+         for patch_file in ('a.patch', 'b.patch', 'c.patch', 'd.patch'):

+             with open(patch_file, 'w') as f:

+                 f.write(patch_file)

+ 

+         # Track c.patch in sources

+         sources_file = SourcesFile(self.cmd.sources_filename,

+                                    self.cmd.source_entry_type)

+         file_hash = self.cmd.lookasidecache.hash_file('c.patch')

+         sources_file.add_entry(self.cmd.lookasidehash, 'c.patch', file_hash)

+         sources_file.write()

+ 

+         self.cmd.repo.index.add([spec_file, 'a.patch', 'sources'])

+         self.cmd.repo.index.commit('add SPEC and patches')

+ 

+         with self.assertRaises(SystemExit) as exc:

+             self.cmd.pre_push_check("HEAD")

+ 

+         self.assertEqual(exc.exception.code, 3)

+         log_error.assert_called_once_with("Source file 'b.patch' was neither listed in the "

+                                           "'sources' file nor tracked in git. Push operation "

+                                           "was cancelled")

+ 

+         # Verify added files are committed but not pushed to origin

+         local_repo = git.Repo(self.cloned_dir)

+         git_tree = local_repo.head.commit.tree

+         self.assertTrue('a.patch' in git_tree)

+         self.assertTrue('b.patch' not in git_tree)

+         self.assertTrue('c.patch' not in git_tree)

+         self.assertTrue('d.patch' not in git_tree)

+ 

+         sources_content = local_repo.git.show('master:sources').strip()

+         with open('sources', 'r') as f:

+             expected_sources_content = f.read().strip()

+         self.assertEqual(expected_sources_content, sources_content)

file modified
+2 -2
@@ -66,7 +66,7 @@ 

  

          # Now, change directory to parent and test the push

          os.chdir(self.path)

-         cmd.push()

+         cmd.push(no_verify=True)

  

  

  class TestPushWithPatches(CommandTestCase):
@@ -119,7 +119,7 @@ 

          self.assertEqual(['b.patch', 'd.patch'], untracked_patches)

  

      def test_push_not_blocked_by_untracked_patches(self):

-         self.cmd.push()

+         self.cmd.push(no_verify=True)

  

          # Verify added files are pushed to origin

          origin_repo_path = self.cmd.repo.git.config(

This check should prevent unwanted pushing of incorrect
configuration. When a 'git push' command is executed, the git hook
'pre-push' script is activated. Checks available:
1. Is tarball/source file added to the 'sources' file?
2. Are files from 'sources' file uploaded into the lookaside cache?

JIRA: RHELCMP-10415
Fixes: https://pagure.io/fedpkg/issue/491
Relates: https://pagure.io/releng/issue/9955

Signed-off-by: Ondrej Nosek onosek@redhat.com

Unlike the first attempt (#643), this one executes functionality in Python, unlike the original bash solution.
fedpkg pre-push-check, which is executed in the git hook script can be even executed separately.

rebased onto 7c598d7cf20b968c7a44e44e4c970852c58518c9

a year ago

Why run the check for each pushed branch if neither the branch name nor the commit sha? It will repeatedly check whatever is currently checked out.

Maybe the pre-push-check should receive local_sha as argument, and use git to verify file contents at that revision?

The use case for that is that it's possible to check out a branch locally, but push a different branch. If I do git switch f37 and git push origin rawhide, I would expect the check to still verify the commit being pushed, not what I have checked out.

I don't know if this assumption is safe. Take fedora-release as an example. No source is in lookaside.

Could the check instead verify that each listed source file is either listed in sources, or committed in git?

This might make sense for patches too.

Or see python-rpm-generators -- neither of the RPM sources is in the lookaside cache, but there is a tarball in a lookaside cache for the CI tests.

rebased onto da276a082c67782bf3e6a8e1e2a6f3043072d9dc

a year ago

Thanks for your inputs. The new version check takes one argument with a git reference.
Also checks whether a source file (or patch) is either tracked or listed in 'sources'.
Still, there is still one assumption used: specfile name is determined in the local branch instead of a pushed branch (hopefully main specfile name is rarely changed). Do you think it is worth making it 100% for the price duplicating the functionality?

Still, there is still one assumption used: specfile name is determined in the local branch instead of a pushed branch (hopefully main specfile name is rarely changed).

You mean the spec file name is determined from whatever is currently checked out, which may not be what is pushed? That's not a problem in my opinion. It should be so rare that disabling the check should be sufficient in such case.

That might be a good improvement: if the hook denies the push, the output could suggest that it can be disabled by passing --no-verify option to push command (xpkg push should learn to pass that option to git).

How about prefixing the unused variables with underscore? ShellCheck complains about them, but I think they have value as documentation for the hook interface.

ShellCheck also wants read -r to be used to not mangle backslashes (which should not really be a problem here).

The $local_sha in the the pre-push-check call should be quoted.

The exit in the loop will cause only the first branch to be checked. I think it's fine to abort on the first problematic branch, but all branches should be checked.

@lsedlar: Actually I have just remembered the case that might not be so rare. When rhpkg operates from the master branch, which is empty.
Determining the specfile is a functionality, that was changed a few times in history and I didn't want to duplicate it.

Why tuple? I would expect set.

Why tuple? I would expect set.

Can be there some duplicities?

Actually I have just remembered the case that might not be so rare. When rhpkg operates from the master branch, which is empty.

True, that might happen. I can easily imagine making changes in multiple branches and then pushing them all in one command or while the last modified branch is checked out. But why would someone check out unused master branch?

I still think it's okay to bail out in such case. The functionality is supposed to be a convenience for users. They can explicitly opt out, or they can do weird stuff that makes it not work correctly.

I completely agree with not duplicating loading spec file from work tree and from a specific commit.

Why tuple? I would expect set.

Can be there some duplicities?

I don't think so. The order is not significant either.

rebased onto 4b6f969c67861cb1edb35121420f55d85bc8d64f

a year ago

recent updates:

  • ShellCheck fixes
  • bash script exits with overall return code after checking all branches
  • --no-verify argument to the "xpkg push" command
  • show the hint about the possibility of bypassing the pre-push check when the check fails
  • minor fix in regular expression for extracting sources and patches
  • tuple-->set; more suitable structure for items
  • use sys.exit(ret_code) instead of return to affect '$?' in bash

additional functionality:
hook script executes checking with: "<x_pkg command> pre-push-check"
<x_pkg command> is hard-coded in the hook script during the clone of the repository. When a user pushes the changes manually after <x_pkg command> is missing, it would fail. Therefore there is a check for the presence of the <x_pkg command> in the system.

rebased onto cf95ca2

a year ago

Unit tests were added.

Pull-Request has been merged by onosek

a year ago