#283 Add hacky support for shallow clones
Closed 3 months ago by nphilipp. Opened a year ago by zbyszek.
fedora-infra/ zbyszek/rpmautospec tolerate-shallow-clones  into  main

file modified
+4
@@ -6,6 +6,7 @@ 

  import typing

  

  from .subcommands import changelog, convert, process_distgit, release

+ from .pkg_history import ShallowCloneError

  

  

  all_subcmds = (changelog, convert, process_distgit, release)
@@ -123,6 +124,9 @@ 

          yield

      except BrokenPipeError:

          pass

+     except ShallowCloneError as e:

+         # if we encountered a KeyError in a shallow clone, print a helpful message

+         sys.exit(f"error: in a shallow clone and commit {e.args[0]} not available")

      except OSError as e:

          # this covers various cases like file not found / has wrong type / access is denied.

          sys.exit(f"error: {e}")

file modified
+48 -1
@@ -1,6 +1,7 @@ 

  import datetime as dt

  import logging

  import re

+ import string

  import subprocess

  import sys

  from collections import defaultdict
@@ -19,6 +20,25 @@ 

  log = logging.getLogger(__name__)

  

  

+ class ShallowCloneError(KeyError):

+     "We tried to access a commit which is not available in the shallow clone"

+ 

+     @classmethod

+     def maybe_convert(

+         cls, key_error: KeyError, shallow: Optional[str]

+     ) -> Optional["ShallowCloneError"]:

+         "Return ShallowCloneError or None if it seems the reason was different"

+         if not shallow:

+             return None

+         key = key_error.args[0]

+         if isinstance(key, str) and len(key) >= 40 and not (set(key) - set(string.hexdigits)):

+             # OK, this looks looks like a sha1 or sha256 hash

+             return cls(key)

+ 

+         # Some other reason for KeyError?

+         return None

+ 

+ 

  class PkgHistoryProcessor:

  

      autorelease_flags_re = re.compile(
@@ -446,10 +466,21 @@ 

          # Unfortunately, pygit2 only tells us what the parents of a commit are, not what other

          # commits a commit is parent to (its children). Fortunately, Repository.walk() is quick.

          # This maps parent commits to their children.

+ 

+         # libgit2 doesn't support shallow clones. Handle .git/shallow manually.

+         # https://github.com/libgit2/libgit2/issues/3058

+         try:

+             shallow = open(self.repo.path + "shallow", "rt").read().strip()

+         except FileNotFoundError:

+             shallow = None

+ 

          commit_children = defaultdict(list)

          for commit in self.repo.walk(head.id):

              for parent in commit.parents:

                  commit_children[parent].append(commit)

+             if commit.parents and commit.parents[0].id.hex == shallow:

+                 log.debug("Commit {} is marked as shallow root, stopping iteration", commit.id)

+                 break

  

          ##########################################################################################

          # To process, first walk the tree from the head commit downward, following all branches.
@@ -542,7 +573,19 @@ 

  

                      # Consult all visitors for the commit on whether we should continue and store

                      # the results.

-                     commit_coroutines_info[commit] = [next(c) for c in coroutines]

+                     try:

+                         commit_coroutines_info[commit] = [next(c) for c in coroutines]

+                     except KeyError as e:

+                         # If we encountered a KeyError and we are in a shallow clone, then most

+                         # likely the visitor tried to access a commit which is not available. Let's

+                         # generate a nicer error message in that case. But let's be careful not

+                         # to convert an unrelated exception: we check if the key looks like a

+                         # hash. This is ugly, but pygit2/libgit2 have this terrible interface.

+                         e2 = ShallowCloneError.maybe_convert(e, shallow)

+                         if e2:

+                             raise e2 from e

+                         raise

+ 

                  else:

                      # Only traverse this commit. Traversal is important if parent commits are the

                      # root of branches that affect the results (computed release number and
@@ -552,6 +595,10 @@ 

                          {"child_must_continue": False} for v in visitors

                      ]

  

+                 if commit.id.hex == shallow:

+                     log.debug("\tshallow root, bailing out")

+                     break

+ 

                  if not commit.parents:

                      log.debug("\tno parents, bailing out")

                      break

The way that pygit2 (libgit2?) reacts to a shallow clone is very unfortunate:
it throws a KeyError in the iterator so it's hard to handle such cases nicely.
Let's hope that libgit2 and pygit2 get native support for shallow clones at some
point. For now, let's just check if .git/shallow is present and cut short the loop walking
over the commits.

This is enough for 'rpmautospec calculate-release' and 'rpmautospec generate-changelog'
to work, as long as all the commits that are necessary to get a result are present.
I.e. if e.g. conversion to rpmautospec happened n commits ago, and the repo is shallow
clone with at least n+1 commits, things should work. But if it's shallower than that,
it'll fail with KeyError, the same as before.

This is only a partial solution. But I don't think it makes sense to do a full
solution here. It can only be solved nicely in pygit2/libgit2, so let's do the bare
minimum to get some common cases working without complicating rpmautospec code too
much.

Fixes #227.

Build failed. More information on how to proceed and troubleshoot errors available at https://fedoraproject.org/wiki/Zuul-based-ci

I don't know pygit much, but isn't self.repo.path actually a path to the repository and not to the .git folder?

If that was true, this patch would never pass even the basic testing. It does pass.

The CI is all broken thou :/

fatal: could not read Username for 'https://gitlab.com': No such device or address
I don't think my patch did that ;)

But there was an IndexError if the iteration reached root. Should be fixed now.

rebased onto abd5029a0ad106f81198e7af200c27e09a104489

a year ago

Build failed. More information on how to proceed and troubleshoot errors available at https://fedoraproject.org/wiki/Zuul-based-ci

Hmm, as others have mentioned it would mess with the release numbering wherever the shallow cut-off is placed:

  • If shallow clone leaves out commits belonging to the same epoch-version, the release number of the current build will be off.
  • Unless an overriding changelog file is added/modified after the cutoff:
    • If the shallow clone leaves out older commits than the current epoch-version, the release numbers in the changelog will be off for the affected epoch-version.
    • The changelog will be cut short.

How release numbers are computed and changelog entries generated rely on all commit metadata being available for commits that need to be taken into account for release number computation and changelog generation (any commit considered for the generated changelog needs its release number computed, too).

I'm sympathetic to "doing the next best thing" with shallow clones for private builds etc., but I don’t see how we can use this for official builds.

This still fails in such a case, doesn't it?

Yeah, I didn't take the changelog entries into account properly. This will need to be reworked.

as others have mentioned it would mess with the release numbering wherever the shallow cut-off is placed

Yep. I think we can get around the issue to an extent by truncating the changelog at an appropriate point, i.e. at one of the points right before a version bump. For a commit with a version bump we know the release number, and the changelog entry text. We already truncate changelogs by default in our rpm builds (%_changelog_trimage), so I think this would be OK in particular for copr builds.

rebased onto 5115fe3b22248c896e53d03730d48318d98a61eb

a year ago

Update: it turns out that the patch was doing the right thing, to a certain extent. A release number of a changelog entry that would be different from a full clone is never generated. If some commits that would be be accessed are not found in the clone, a KeyError is thrown. I updated the commit message description to describe this.

I also added a second commit which suppresses the traceback when such a KeyError is encountered and prints a one-line error.

I also changed "Fixes …" to "Partially fixes …". I think the idea suggested above to trim the changelog would be useful, but it's also more complex to implement. I pushed the two patches I have because they make sense on their own, even if they don't solve the issue completely.

Build failed. More information on how to proceed and troubleshoot errors available at https://fedoraproject.org/wiki/Zuul-based-ci
https://fedora.softwarefactory-project.io/zuul/buildset/b1e4762f79854f44982ad9c599394892

rebased onto e9ee401

a year ago

Build failed. More information on how to proceed and troubleshoot errors available at https://fedoraproject.org/wiki/Zuul-based-ci
https://fedora.softwarefactory-project.io/zuul/buildset/1f762fdf475c4c22b926e335e9d0d084

As I moved the project repository to GitHub, so I’ll close this PR. Would you please resubmit it there if you’re still interested? Thanks!

Pull-Request has been closed by nphilipp

3 months ago