From 2e64a15058b9484fd17047fa5667aac6c1034954 Mon Sep 17 00:00:00 2001 From: Pavel Raiskup Date: Apr 19 2021 06:38:40 +0000 Subject: dist-git: optimize cgit cache file generator Start generating the CGIT configuration file (to be included into /etc/cgitrc) on our own, and mimic the 'cgit --scan-path=...' output. We previously relied on the full 'cgit --scan-path=...' functionality, but that always traversed through all the dist git repositories (and Fedora Copr has huge number of repositories, so this was too expensvie). So newly, the 'cgit_pkg_list' is replaced with 'copr-dist-git-refresh-cgit' that can do both the "full scan" (without arguments, executed by cron) and incremental run - with given sub-directory argument (executed by importer daemon for particular package). While I'm on it, I'm fixing the configuration for this feature and documentation - and the new script is able to read the dist-git and copr-dist-git configuration. I also noticed that the test-cases related to cgit were entirely broken, so those are fixed, too. The new 'copr-dist-git-refresh-cgit' is installed into the monthly cron job, to do periodic cgit fixups. The incremental (per-package) 'cgit-dist-git-refresh-cgit' run is done immediately after the new package is imported, and is executed only iff the package was freshly created. This is to avoid unnecessary executions. Fixes: #397 Fixes: #1218 Fixes: #824 --- diff --git a/dist-git/conf/copr-dist-git.conf.example b/dist-git/conf/copr-dist-git.conf.example index a327b36..74c5163 100644 --- a/dist-git/conf/copr-dist-git.conf.example +++ b/dist-git/conf/copr-dist-git.conf.example @@ -10,3 +10,14 @@ frontend_auth=backend_password_from_fe_config log_dir=/tmp/copr-dist-git per_task_log_dir=/var/lib/copr-dist-git/per-task-logs/ + +# Copr administrator is responsible for referencing this file in /etc/cgitrc as +# include=CGIT_CACHE_FILE. This file is automatically updated anytime a new +# package is imported into dist-git (plus periodically by a monthly cron job). +#cgit_cache_file=/var/cache/cgit/repo-configuration.rc + +# Intermediate file created when generating cgit_cache_file +#cgit_cache_list_file=/var/cache/cgit/repo-subdirs.list + +# Lock file used to atomically work with cgit_cache_file +#cgit_cache_lock_file=/var/cache/cgit/copr-repo.lock diff --git a/dist-git/conf/cron.monthly/copr-dist-git b/dist-git/conf/cron.monthly/copr-dist-git index e46d5c2..675c718 100644 --- a/dist-git/conf/cron.monthly/copr-dist-git +++ b/dist-git/conf/cron.monthly/copr-dist-git @@ -1,3 +1,7 @@ #!/usr/bin/sh runuser -c 'find /var/lib/copr-dist-git/per-task-logs -name *.log -mtime +30 -delete' - copr-dist-git + +# From time to time assure that the CGIT caches are consistent, and that the +# ownership of cache files is correct (run this as root). +/usr/bin/copr-dist-git-refresh-cgit diff --git a/dist-git/copr-dist-git.spec b/dist-git/copr-dist-git.spec index 2871603..f4e963b 100644 --- a/dist-git/copr-dist-git.spec +++ b/dist-git/copr-dist-git.spec @@ -27,6 +27,8 @@ BuildRequires: python3-setproctitle Recommends: logrotate Requires: systemd Requires: httpd +Requires: coreutils +Requires: crudini Requires: dist-git Requires: python3-copr-common Requires: python3-requests diff --git a/dist-git/copr_dist_git/bin/cgit_pkg_list b/dist-git/copr_dist_git/bin/cgit_pkg_list deleted file mode 100755 index e853bf5..0000000 --- a/dist-git/copr_dist_git/bin/cgit_pkg_list +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -# This simple script lists out the current pkgs git repos to a file. -# This speeds up cgit as it doesn't have to recurse into all dirs -# Looking for git repos. - -destination=/var/lib/copr-dist-git/cgit_pkg_list - -if [ -n "$1" ] -then - destination=$1 -fi - -newfile=`mktemp` - -# TODO: we should read the 'SetEnv GIT_PROJECT_ROOT /var/lib/dist-git/git' -# instead of hardcoding it here -cd /var/lib/dist-git/git - -# We expect //.git directories. -find -maxdepth 3 -mindepth 3 -type d -printf '%P\n' > "$newfile" -cp -fZ "$newfile" "$destination" -rm "$newfile" -chmod 644 "$destination" diff --git a/dist-git/copr_dist_git/helpers.py b/dist-git/copr_dist_git/helpers.py index 5f47b99..3a1ae77 100644 --- a/dist-git/copr_dist_git/helpers.py +++ b/dist-git/copr_dist_git/helpers.py @@ -116,8 +116,18 @@ class ConfigReader(object): cp, "dist-git", "pool_busy_sleep_time", 0.5, mode="float" ) - opts.cgit_pkg_list_location = _get_conf( - cp, "dist-git", "cgit_pkg_list_location", "/var/lib/copr-dist-git/cgit_pkg_list" + # The Copr administrator is responsible for configuring /etc/cgitrc to + # include this file. + opts.cgit_cache_file = _get_conf( + cp, "dist-git", "cgit_cache_file", "/var/cache/cgit/repo-configuration.rc" + ) + + opts.cgit_cache_list_file = _get_conf( + cp, "dist-git", "cgit_cache_list_file", "/var/cache/cgit/repo-subdirs.list" + ) + + opts.cgit_cache_lock_file = _get_conf( + cp, "dist-git", "cgit_cache_lock_file", "/var/cache/cgit/copr-repo.lock" ) opts.lookaside_location = _get_conf( diff --git a/dist-git/copr_dist_git/package_import.py b/dist-git/copr_dist_git/package_import.py index 2bd0b1f..892d27b 100644 --- a/dist-git/copr_dist_git/package_import.py +++ b/dist-git/copr_dist_git/package_import.py @@ -88,12 +88,14 @@ def sync_branch(new_branch, branch_commits, message): log.debug("nothing to commit into branch '{0}'".format(new_branch)) -def refresh_cgit_listing(opts): +def refresh_cgit_listing(reponame=None): """ Refresh cgit repository list. See cgit docs for more information. """ try: - cmd = ["/usr/share/copr/dist_git/bin/cgit_pkg_list", opts.cgit_pkg_list_location] + cmd = ["copr-dist-git-refresh-cgit"] + if reponame: + cmd += [reponame] subprocess.check_output(cmd, stderr=subprocess.STDOUT, encoding='utf-8') except OSError as e: log.error(str(e)) @@ -109,9 +111,11 @@ def setup_git_repo(reponame, branches): :param str branches: branch names to be created inside that repo """ log.info("make sure repos exist: {}".format(reponame)) + brand_new_package = False try: cmd = ["/usr/share/dist-git/setup_git_package", reponame] subprocess.check_output(cmd, stderr=subprocess.STDOUT, encoding='utf-8') + brand_new_package = True except subprocess.CalledProcessError as e: log.error("cmd: {}, rc: {}, msg: {}" .format(cmd, e.returncode, e.output.strip())) @@ -120,6 +124,9 @@ def setup_git_repo(reponame, branches): else: raise PackageImportException(e.output) + if brand_new_package: + refresh_cgit_listing(reponame) + for branch in branches: try: cmd = ["/usr/share/dist-git/mkbranch", branch, reponame] @@ -239,7 +246,6 @@ def import_package(opts, namespace, branches, srpm_path, pkg_name): os.chdir(oldpath) shutil.rmtree(repo_dir) - refresh_cgit_listing(opts) return munch.Munch( branch_commits=branch_commits, diff --git a/dist-git/run/copr-dist-git-config b/dist-git/run/copr-dist-git-config new file mode 100755 index 0000000..0effe86 --- /dev/null +++ b/dist-git/run/copr-dist-git-config @@ -0,0 +1,16 @@ +#! /usr/bin/python3 + +""" +Read the Copr DistGit configuration file, and print the desired config option to +stdout. Alternative to crudini parser, though we want to read also the defaults +from ConfigReader. +""" + +import sys +from copr_dist_git.helpers import ConfigReader + +if __name__ == "__main__": + reader = ConfigReader() + opts = reader.read() + requested_option = sys.argv[1] + print(opts[requested_option]) diff --git a/dist-git/run/copr-dist-git-refresh-cgit b/dist-git/run/copr-dist-git-refresh-cgit new file mode 100755 index 0000000..066e9db --- /dev/null +++ b/dist-git/run/copr-dist-git-refresh-cgit @@ -0,0 +1,106 @@ +#!/usr/bin/sh + +# This script generates a "cgit_cache_file" configuration file to be included +# into the CGIT config /etc/cgitrc. We execute this as 'copr-dist-git' user +# from copr-dist-git service, and as root (full scan) from cron job. + +cachefile=$(copr-dist-git-config cgit_cache_file) +listfile=$(copr-dist-git-config cgit_cache_list_file) +lockfile=$(copr-dist-git-config cgit_cache_lock_file) +reposdir=$(crudini --get /etc/dist-git/dist-git.conf dist-git gitroot_dir) +ownership=copr-dist-git:apache +subdir=$1 + +single_repo () +{ + # Note that we used to use /var/www/cgi-bin/cgit --scan-path=/..../ to + # generate the output, but since our directories are too large and deep + # nowadays it is too expensive, and cgit isn't anyhow optimized -- when + # --scan-path argument is used the full directory scan is done, and the + # project-list= argument in CGIT_CONFIG file is entirely ignored (see the + # difference between scan_projects() and scan_tree()): + # https://git.zx2c4.com/cgit/tree/cgit.c?id=bd6f5683f6cde4212364354b3139c1d521f40f39#n1001 + subpath=$1 + + case $subpath in + *.git) ;; + *) subpath=$subpath.git ;; + esac +cat < "$newlistfile" +fi + +( + # !! Commands executed under lock, make this block FAST !! + set -e + flock 9 + + if test -n "$subdir"; then + # only one repo is being added, catenate to the existing config files + echo "$subdir" >> "$listfile" + single_repo "$subdir" >> "$cachefile" + exit 0 + fi + + # Generating the new cache file shouldn't take terribly long, though just in + # case -- generate it in a separate file name to not break the background + # httpd cgit cgi processes serving users. + newcachefile=$(tempfile_from "$cachefile") + + # Some background copr-dist-git-workers might change the $listfile while + # we were traversing the directory tree (see above). So we can not just + # rely on the "$newlistfile" - so we rather use both the old and new list + # files. The file doesn't exist for the first run, so ignore failures. + cat "$listfile" >> "$newlistfile" || : + sort < "$newlistfile" | uniq | tee "$listfile" | \ + while read -r line; do + single_repo "$line" + done >> "$newcachefile" + + cp -fZ "$newcachefile" "$cachefile" + rm "$newcachefile" "$newlistfile" + chmod 644 "$cachefile" + chown "$ownership" "$cachefile" "$listfile" + +) 9>"$lockfile" + +# fix lock file so copr-dist-git user can lock as well +case $(stat -c '%U:%G' "$lockfile"):$(id -u -n) in + "$ownership:root") ;; + *) chown "$ownership" "$lockfile" +esac diff --git a/dist-git/tests/test_package_import.py b/dist-git/tests/test_package_import.py index e699ffb..7a2dafb 100644 --- a/dist-git/tests/test_package_import.py +++ b/dist-git/tests/test_package_import.py @@ -8,7 +8,12 @@ from munch import Munch from base import Base -from copr_dist_git.package_import import my_upload_fabric, import_package, setup_git_repo +from copr_dist_git.package_import import ( + import_package, + my_upload_fabric, + refresh_cgit_listing, + setup_git_repo, +) from unittest import mock from unittest.mock import MagicMock @@ -116,14 +121,21 @@ class TestPackageImport(Base): reponame = 'foo' branches = ['f25', 'f26'] setup_git_repo(reponame, branches) - assert mc_subprocess_check_output.has_calls([ - mock.call(['/usr/share/dist-git/setup_git_package', 'foo']), - mock.call(['/usr/share/dist-git/mkbranch', 'f25', 'foo']), - mock.call(['/usr/share/dist-git/mkbranch', 'f26', 'foo']), - ]) - - def refresh_cgit_listing(self, mc_subprocess_check_output): - refresh_cgit_listing(self.opts) - assert mc_subprocess_check_output.has_calls([ - mock.call(["/usr/share/copr/dist_git/bin/cgit_pkg_list", self.opts.cgit_pkg_list_location]) - ]) + mc_subprocess_check_output.assert_has_calls([ + mock.call(['/usr/share/dist-git/setup_git_package', 'foo'], + stderr=-2, encoding='utf-8'), + mock.call(['/usr/share/dist-git/mkbranch', 'f25', 'foo'], + stderr=-2, encoding='utf-8'), + mock.call(['/usr/share/dist-git/mkbranch', 'f26', 'foo'], + stderr=-2, encoding='utf-8'), + mock.call(['copr-dist-git-refresh-cgit', 'foo'], + stderr=-2, encoding='utf-8'), + ], any_order=True) + + + def test_refresh_cgit_listing(self, mc_subprocess_check_output): + refresh_cgit_listing() + mc_subprocess_check_output.assert_has_calls([ + mock.call(['copr-dist-git-refresh-cgit'], + stderr=-2, encoding='utf-8'), + ], any_order=True)