#4358 Make fork more performant by using 'git push --mirror'
Merged 5 years ago by pingou. Opened 5 years ago by bkabrda.
bkabrda/pagure fork-use-git-mirror  into  master

file modified
+42
@@ -996,6 +996,36 @@ 

          """ Exit the context manager, removing the temorary clone. """

          shutil.rmtree(self.repopath)

  

+     def change_project_association(self, new_project):

+         """ Make this instance "belong" to another project.

+ 

+         This is useful when you want to create TemporaryClone of one project

+         and then push some of its content into a different project just

+         by running ``push`` or ``mirror`` methods.

+ 

+         Args:

+             new_project (pagure.lib.model.Project): project to associate

+                 this TemporaryClone instance with

+         """

+         self._project = new_project

+         if not new_project.is_on_repospanner:

+             self.repo.remotes.set_push_url(

+                 "origin", new_project.repopath("main")

+             )

+ 

+     def mirror(self, username, force=False, **extra):

+         """ Run ``git push --mirror`` of the repo to its origin.

+ 

+         Args:

+             username (string): The user on who's account this push is

+             force (bool): whether or not to use ``--force`` when pushing

+             extra (dict): Extra fields passed to the remote side. Either via

+                 environment variables, or as X-Extra-<key> HTTP headers.

+         """

+         # git push --mirror fails if there are no branches

+         if len(list(self.repo.branches)) > 0:

+             self._push(username, "--mirror", force, **extra)

+ 

      def push(self, username, sbranch, tbranch=None, force=False, **extra):

          """ Push the repo back to its origin.

  
@@ -1003,11 +1033,23 @@ 

              username (string): The user on who's account this push is

              sbranch (string): Source branch to push

              tbranch (string): Target branch if different from sbranch

+             force (bool): whether or not to use ``--force`` when pushing

              extra (dict): Extra fields passed to the remote side. Either via

                  environment variables, or as X-Extra-<key> HTTP headers.

          """

          pushref = "%s:%s" % (sbranch, tbranch if tbranch else sbranch)

+         self._push(username, pushref, force, **extra)

+ 

+     def _push(self, username, pushref, force, **extra):

+         """ Push the repo back to its origin.

  

+         Args:

+             username (string): The user on who's account this push is

+             pushref(string): either ``<sbranch>:<tbranch>`` or ``--mirror``

+             force (bool): whether or not to use ``--force`` when pushing

+             extra (dict): Extra fields passed to the remote side. Either via

+                 environment variables, or as X-Extra-<key> HTTP headers.

+         """

          if "pull_request" in extra:

              extra["pull_request_uid"] = extra["pull_request"].uid

              del extra["pull_request"]

file modified
+15 -19
@@ -471,26 +471,22 @@ 

          )

  

          with pagure.lib.git.TemporaryClone(

-             repo_to, "main", "fork"

+             repo_from, "main", "fork"

          ) as tempclone:

-             fork_repo = tempclone.repo

- 

-             fork_repo.remotes.create("forkedfrom", repo_from.repopath("main"))

-             fork_repo.remotes["forkedfrom"].fetch()

- 

-             for branchname in fork_repo.branches.remote:

-                 if not branchname.startswith("forkedfrom/"):

-                     continue

-                 localname = branchname.replace("forkedfrom/", "")

-                 if localname == "HEAD":

-                     # HEAD will be created automatically as a symref

-                     continue

-                 tempclone.push(

-                     "pagure",

-                     "remotes/%s" % branchname,

-                     "refs/heads/%s" % localname,

-                     internal_no_hooks="yes",

-                 )

+             for branchname in tempclone.repo.branches.remote:

+                 if (

+                     branchname.startswith("origin/")

+                     and branchname != "origin/HEAD"

+                 ):

+                     locbranch = branchname[len("origin/") :]

+                     if locbranch in tempclone.repo.branches.local:

+                         continue

+                     branch = tempclone.repo.branches.remote.get(branchname)

+                     tempclone.repo.branches.local.create(

+                         locbranch, branch.peel()

+                     )

+             tempclone.change_project_association(repo_to)

+             tempclone.mirror("pagure", internal_no_hooks="yes")

  

          if not repo_to.is_on_repospanner and not repo_to.private:

              # Create the git-daemon-export-ok file on the clone

This makes the fork task much more performant on repos with multiple branches, as it only does one push (=> one subprocess, one wait time for hooks being run on the receiving end) instead of pushing each branch individually.
If this seems reasonable, I'll add some tests, of course :)

The approach makes sense to me, though it's annoying that we couldn't do this with pygit2...

rebased onto b914e538ee661240a47657f20e46d1d086a118fc

5 years ago

@bkabrda Any chance you'll add tests for this anytime soon?

Sorry, I was on vacation with no internet access...

Actually, I've gone through the existing tests and I believe that the forking functionality is covered well enough for the test suite that this doesn't require additional tests. @pingou do you have specific tests in mind that should be created or are you ok with this PR as is?

rebased onto 2e72b372aa38b7ae053fb2d2c2bf20b8026a775b

5 years ago

I wonder if the work done in https://pagure.io/pagure/pull-request/4380 couldn't simplify this one.

What do you think?

@pingou I don't really see how. Could you be more specific?

For projects on repospanner, I'm not sure, but for project that are not on repospanner, we could simply say: git clone --mirror origin target without using the temporary clone at all in the process, no?

Ah, I didn't consider that. Well, we could do that, but I'd prefer the consistency of doing everything through the TemporaryClone object. I think the code is actually cleaner this way, YMMV :)

But it does slow things down :)

@puiterwijk would you accept a PR allowing to fork a repo in repoSpanner?

@pingou yes, I would happily take that. Note it'd likely be a small-medium sized change involving both storage and state layers.

rebased onto 29bdb93

5 years ago

I opened https://pagure.io/pagure/issue/4387 so once repoSpanner has the desired feature so it'll help removing code that we no longer need :)

Pull-Request has been merged by pingou

5 years ago