From de850738b2dadd8898dcdd19f97a2a1f4a48fea3 Mon Sep 17 00:00:00 2001 From: Slavek Kabrda Date: Nov 15 2018 07:17:07 +0000 Subject: Allow running 'git gc' explicitly after every object-adding git operation --- diff --git a/doc/configuration.rst b/doc/configuration.rst index 2467079..4355b16 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -1383,6 +1383,31 @@ information. Defaults to: ``None`` +GIT_GARBAGE_COLLECT +~~~~~~~~~~~~~~~~~~~ + +This configuration key allows for explicit running of ``git gc --auto`` +after every operation that adds new objects to any git repository - +that is after pushing and merging. The reason for having this functionality +in Pagure is that gc is not guaranteed to be run by git after every +object-adding operation. + +The garbage collection run by Pagure will respect git settings, so you +can tweak ``gc.auto`` and ``gc.autoPackLimit`` to your liking +and that will have immediate effect on the task that runs the garbage +collection. These values can be configured system-wide in ``/etc/gitconfig``. +See https://git-scm.com/docs/git-gc#git-gc---auto for more details. + +This is especially useful if repositories are stored on NFS (or similar +network storage), where file metadata access is expensive - having unpacked +objects in repositories requires *a lot* of metadata reads. + +Note that the garbage collection is only run on repos that are not on +repoSpanner. + +Defaults to: ``False`` + + CELERY_CONFIG ~~~~~~~~~~~~~ diff --git a/pagure/default_config.py b/pagure/default_config.py index 940eb68..960e509 100644 --- a/pagure/default_config.py +++ b/pagure/default_config.py @@ -206,6 +206,13 @@ GL_RC = None GL_BINDIR = None +# Whether or not to run "git gc --auto" after every change to a project +# This will only run for projects not on repospanner and will use +# default git config values +# See https://git-scm.com/docs/git-gc#git-gc---auto for more details +GIT_GARBAGE_COLLECT = False + + # SMTP settings SMTP_SERVER = "localhost" SMTP_PORT = 25 diff --git a/pagure/hooks/default.py b/pagure/hooks/default.py index 1623126..b492b1f 100644 --- a/pagure/hooks/default.py +++ b/pagure/hooks/default.py @@ -275,6 +275,11 @@ class DefaultRunner(BaseRunner): parent.namespace, parent.user.user if parent.is_fork else None, ) + if not project.is_on_repospanner and \ + _config.get("GIT_GARBAGE_COLLECT", False): + pagure.lib.tasks.git_garbage_collect.delay( + project.repopath("main") + ) class Default(BaseHook): diff --git a/pagure/lib/tasks.py b/pagure/lib/tasks.py index 0977ca4..feb6b6e 100644 --- a/pagure/lib/tasks.py +++ b/pagure/lib/tasks.py @@ -1151,3 +1151,15 @@ def pull_request_ready_branch(self, session, namespace, name, user): ) del (branches[pr.branch_from]) return {"new_branch": branches, "branch_w_pr": branches_pr} + + +@conn.task(queue=pagure_config.get("MEDIUM_CELERY_QUEUE", None), bind=True) +@pagure_task +def git_garbage_collect(self, session, repopath): + # libgit2 doesn't support "git gc" and probably never will: + # https://github.com/libgit2/libgit2/issues/3247 + _log.info("Running 'git gc --auto' for repo %s", repopath) + subprocess.check_output( + ["git", "gc", "--auto", "-q"], + cwd=repopath, + )