From a87742c3921b3e6d2a41151ca68e665d694495e3 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Chibon Date: Mar 27 2019 14:37:47 +0000 Subject: Replace calls to pygit2.clone_repository by calls to git clone directly pygit2.clone_repository() is leaking file descriptors on large git repo making pagure generate "Too many open files" errors while it runs. This leads to certain task no longer working, especially the tasks around the tickets or PRs repositories of large project, lowering significantly their interest. With this commit we're now shelling out to git directly the cloning action, this should fix the leak of file descriptors and thus allow pagure to go back to its expected behaviour. Fixes https://pagure.io/pagure/issue/4277 Signed-off-by: Pierre-Yves Chibon --- diff --git a/pagure/lib/git.py b/pagure/lib/git.py index 66d607e..bc10fcb 100644 --- a/pagure/lib/git.py +++ b/pagure/lib/git.py @@ -928,7 +928,7 @@ class TemporaryClone(object): return None if not os.path.exists(self._origpath): return None - pygit2.clone_repository(self._origpath, self.repopath) + PagureRepo.clone(self._origpath, self.repopath) # Because for whatever reason, one pygit2.Repository is not # equal to another.... The pygit2.Repository returned from # pygit2.clone_repository does not have the "branches" attribute. diff --git a/pagure/lib/repo.py b/pagure/lib/repo.py index 968c077..0fe3404 100644 --- a/pagure/lib/repo.py +++ b/pagure/lib/repo.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ - (c) 2015-2017 - Copyright Red Hat Inc + (c) 2015-2019 - Copyright Red Hat Inc Authors: Pierre-Yves Chibon @@ -10,6 +10,7 @@ from __future__ import print_function, unicode_literals, absolute_import import logging +import subprocess import pygit2 @@ -27,6 +28,24 @@ def get_pygit2_version(): return tuple([int(i) for i in pygit2.__version__.split(".")]) +def run_command(command): + _log.info("Running command: %s", command) + try: + out = subprocess.check_output(command, stderr=subprocess.STDOUT) + _log.info(" command ran successfully") + _log.debug("Output: %s" % out) + except subprocess.CalledProcessError as err: + _log.debug( + "Command FAILED: {cmd} returned code {code} with the " + "following output: {output}".format( + cmd=err.cmd, code=err.returncode, output=err.output + ) + ) + raise pagure.exceptions.PagureException( + "Did not manage to rebase this pull-request" + ) + + class PagureRepo(pygit2.Repository): """ An utility class allowing to go around pygit2's inability to be stable. @@ -34,6 +53,28 @@ class PagureRepo(pygit2.Repository): """ @staticmethod + def clone(path_from, path_to, checkout_branch=None, bare=False): + """ Clone the git repo at the specified path to the specified location. + + This method is meant to replace pygit2.clone_repository which for us + leaks file descriptors on large project leading to "Too many open files + error" which then prevent some tasks from completing. + + :arg path_from: the path or url of the git repository to clone + :type path_from: str + :arg path_to: the path where the git repository should be cloned + :type path_to: str + : + """ + cmd = ["git", "clone", path_from, path_to] + if checkout_branch: + cmd.extend(["-b", checkout_branch]) + if bare: + cmd.append("--bare") + + run_command(cmd) + + @staticmethod def push(remote, refname): """ Push the given reference to the specified remote. """ pygit2_version = get_pygit2_version() diff --git a/pagure/lib/tasks.py b/pagure/lib/tasks.py index 14d0604..3592c9d 100644 --- a/pagure/lib/tasks.py +++ b/pagure/lib/tasks.py @@ -543,11 +543,10 @@ def pull_remote_repo(self, session, remote_git, branch_from): remote_git, branch_from, ignore_non_exist=True ) - repo = pygit2.clone_repository( + pagure.lib.repo.PagureRepo.clone( remote_git, clonepath, checkout_branch=branch_from ) - del repo return clonepath