#9454 Add script to check if a branch can be deleted from dist-git
Merged 3 years ago by mohanboddu. Opened 3 years ago by zbyszek.
zbyszek/releng distgit-commit-unused  into  master

@@ -0,0 +1,213 @@ 

+ #!/usr/bin/python3

+ 

+ """This script checks if a branch may be deleted.

+ 

+ A branch may be removed safely when, for all commits in that branch

+ not reachable from other branches, there are no complete koji builds.

+ 

+ Examples:

+ 

+ -\------A---\

+   \----------\----- master

+ 

+ 'A' has been merged into 'master', so it can be trivially deletected

+ without checking any builds.

+ 

+   /---/-------\-B"-B'-B

+ -/---/---\-----\----master

+           \--C

+ 

+ 'B' has commits that are not found anywhere else (B, B', and B"), and

+ we need to check in koji if it knows about any builds from those

+ commits.

+ """

+ 

+ import argparse

+ import pathlib

+ import re

+ import pygit2

+ import requests

+ import koji as _koji

+ 

+ BODHI_RELEASES = 'https://bodhi.fedoraproject.org/releases/?rows_per_page=1000'

+ NORMAL_BRANCHES = r'^(f\d{1,2}|el\d|epel\d|epel1\d)$'

+ 

+ _KOJI_SESSION = None

+ def koji_session(opts):

+     global _KOJI_SESSION

+     if not _KOJI_SESSION:

+         koji = _koji.get_profile_module(opts.koji_profile)

+         session_opts = koji.grab_session_options(koji.config)

+         session = koji.ClientSession(koji.config.server, session_opts)

+         _KOJI_SESSION = (session, koji)

+     return _KOJI_SESSION

+ 

+ def koji_builds_exist(tag, package, opts):

+     session, _ = koji_session(opts)

+ 

+     print(f'Checking for {package} in tag {tag}...', end=' ')

+     tagged = session.listTagged(tag, latest=True, inherit=False, package=package)

+     print(tagged[0]['nvr'] if tagged else '(no)')

+     return bool(tagged)

+ 

+ def bodhi_builds_exist(branch, package, opts):

+     releases = requests.get(BODHI_RELEASES).json()['releases']

+ 

+     for entry in releases:

+         if entry['branch'] == branch:

+             tags = [v for k,v in entry.items() if k.endswith('_tag') and v]

+             print(f'Found branch {branch} in bodhi with tags:', ', '.join(tags))

+             for tag in tags:

+                 if koji_builds_exist(tag, package, opts):

+                     return True

+ 

+             print(f'No builds found in koji for branch {branch}')

+             return False

+ 

+     print(f'Branch {branch} not found in bodhi, checking if branch matches pattern...')

+     m = re.match(NORMAL_BRANCHES, branch)

+     if m:

+         print('...it does, do not delete')

+         return True

+     print('...no match, seems OK to remove')

+     return False

+ 

+ def find_hash(build):

+     return build['source'].rsplit('#', 1)[1]

+ 

+ def list_builds(package, opts):

+     session, koji = koji_session(opts)

+ 

+     pkg = session.getPackageID(package, strict=True)

+     builds = session.listBuilds(packageID=pkg, state=koji.BUILD_STATES['COMPLETE'])

+ 

+     with session.multicall(strict=True) as msession:

+         for build in builds:

+             if build['source'] is None:

+                 build['source'] = msession.getTaskInfo(build['task_id'], request=True)

+     for build in builds:

+         if isinstance(build['source'], koji.VirtualCall):

+             r = build['source'].result

+             if r is None:

+                 # This seems to happen for very old builds, e.g. buildbot-0.7.5-1.fc7.

+                 build['source'] = None

+                 nvr, time = build['nvr'], build['creation_time']

+                 print(f'Warning: build {nvr} from {time} has no source, ignoring.')

+             else:

+                 build['source'] = r['request'][0]

+ 

+     by_hash = {find_hash(b):b for b in builds if b['source']}

+     return by_hash

+ 

+ def containing_branches(repo, commit, *, local, ignore_branch=None):

+     if local:

+         containing = repo.branches.local.with_commit(commit)

+     else:

+         containing = repo.branches.remote.with_commit(commit)

+ 

+     for b in containing:

+         branch = repo.branches[b]

+         if branch != ignore_branch:

+             yield branch

+ 

+ def name_in_spec_file(commit, package):

+     try:

+         spec = (commit.tree / f'{package}.spec').data

+     except KeyError:

+         print(f"Commit {commit.hex} doesn't have '{package}.spec', assuming package is unbuildable.")

+         return None

+ 

+     # We don't try to decode the whole spec file here, to reduce the chances of trouble.

+     # Just any interesting lines.

+     for line in spec.splitlines():

+         if not line.startswith(b'Name:'):

+             continue

+         try:

+             name = line[5:].decode().strip()

+         except UnicodeDecodeError:

+             print(f"Something is wrong: commit {commit.hex} has busted encoding'.")

+             raise

+ 

+         # Note that this does not do macro resolution. No need to support crazy things.

+         return name

+ 

+ def do_opts():

+     parser = argparse.ArgumentParser(description=__doc__,

+                                      formatter_class=argparse.RawTextHelpFormatter)

+     parser.add_argument('--koji-profile', default='koji')

+     parser.add_argument('--package')

+     parser.add_argument('--repository', default='.', type=pathlib.Path)

+     parser.add_argument('branch')

+ 

+     opts = parser.parse_args()

+     if opts.package is None:

+         opts.package = opts.repository.absolute().name

+     return opts

+ 

+ def branch_is_reachable(opts):

+     repo = pygit2.Repository(opts.repository)

+     try:

+         branch = repo.branches.local[opts.branch]

+         branch_name = branch.branch_name

+         local = True

+     except KeyError:

+         branch = repo.branches.remote[opts.branch]

+         l = len(branch.remote_name)

+         branch_name = branch.branch_name[l+1:]

+         local = False

+ 

+     if branch_name == 'master':

+         print("Branch 'master' cannot be deleted.")

+         return 1

+ 

+     if bodhi_builds_exist(branch_name, opts.package, opts):

+         print('Branch was used to build packages, cannot delete.')

+         return 1

+ 

+     other = list(containing_branches(repo, branch.target, local=local, ignore_branch=branch))

+     if other:

+         names = ', '.join(o.name for o in other)

+         print(f'Branch merged into {names}. Safe to delete.')

+         return 0

+ 

+     print('Branch has commits not found anywhere else. Looking for builds.')

+     builds = list_builds(opts.package, opts)

+ 

+     for n, commit in enumerate(repo.walk(branch.target, pygit2.GIT_SORT_TOPOLOGICAL)):

+         subj = commit.message.splitlines()[0][:60]

+         print(f'{n}: {commit.hex[:7]} {subj}')

+         other = list(containing_branches(repo, commit, local=local, ignore_branch=branch))

+         if other:

+             names = ', '.join(o.name for o in other)

+             print(f'Commit {commit.hex} referenced from {names}. Stopping iteration.')

+             break

+ 

+         # Figure out the name used in the spec file in that commit.

+         # This is for the following case:

+         # * Repo 'foo' exists and is active

+         # * Repo 'foo2' (like a compat version of foo) exists and is active

+         # * People have 'Name: foo' in 'foo2.spec' and make a build

+         # * Koji will record this package as 'foo', even though it was built from 'foo2' repo

+         try:

+             real_name = name_in_spec_file(commit, opts.package)

+         except UnicodeDecodeError:

+             return 1

+         if real_name is not None and real_name != opts.package:

+             print(f"Sorry, {commit.hex} has Name:{real_name}, refusing to continue.")

+             return 1

+ 

+         built = builds.get(commit.hex, None)

+         if built:

+             print(f"Sorry, {commit.hex} built as {built['nvr']}.")

+             koji_link = f"https://koji.fedoraproject.org/koji/taskinfo?taskID={built['task_id']}"

+             print(f"See {koji_link}.")

+             return 1

+ 

+     print('No builds found, seems OK to delete.')

+     return 0

+ 

+ if __name__ == '__main__':

+     opts = do_opts()

+     print(f'Checking package {opts.package} in {opts.repository.absolute()}')

+ 

+     exit(branch_is_reachable(opts))

rebased onto 4604a4232332e78ac8228c47080224ee2e250c7e

3 years ago
Examples:
$ scripts/distgit-commit-unused.py --repository ~/fedora/systemd foo2
Working on package systemd in /home/zbyszek/fedora/systemd
Branch has commits not found anywhere else. Looking for builds.
0: afe51a0 foo2
1: a32a98a Merge branch 'master' into f32
Commit a32a98a0fa2fdee376e10eb53f67987fe1f3cc10 referenced from refs/remotes/origin/f32. Stopping iteration.
No builds found, seems OK to delete.

$ scripts/distgit-commit-unused.py --repository ~/fedora/systemd f32
Working on package systemd in /home/zbyszek/fedora/systemd
Branch has commits not found anywhere else. Looking for builds.
0: e3554e7 Move Provides:u2f-hidraw-policy to -udev subpackage
1: 8ae0be2 gitignore: add emacs backup files
2: 1a7473c Add abignore file to make abigail happy
3: 27e3b91 Fix some rpmlint issues and add filter for others
Sorry, 27e3b915a47af78655dc5cffdf277f16c76e4525 built as systemd-245.4-1.fc32.
See https://koji.fedoraproject.org/koji/taskinfo?taskID=42954546.

I think you could add this and next arguments into the mutually exclusive group so that you won't specify both.

Great job, @zbyszek!

I think I have only one concern here. It can happen that:

  • Repo foo exists and is active
  • Repo foo2 (like a compat version of foo) exists and is active
  • People have Name: foo in foo2.spec and make a build
  • Koji will record this package as foo, even though it was built from foo2 repo

In that case, we will allow deletion of commits from foo2 repo even though they were used. I don't think this is happening often and most likely is some corner case, but I think it is important to be aware of this. It actually happened to me few times with Rust packages. Probably solution to this problem can be enforcement of same dist-git repo as package name in koji, but it is not in place.

rebased onto 90439edc106ff212b4a99ff82fe760ab0a361a4b

3 years ago

Reworked. Two changes:
- @churchyard raised the point that the script would say that e.g. "f28" is contained by "origin/f28" when running in a clone. While technically true, this is not very useful. So now when checking a remote branch, only remote branches are taken into account, and when checking a local branch, only local branches are taken into account. (The first mode is for users, the second for releng.)
- To answer @ignatenkobrain's case above, the spec file is extracted for each commit and Name is extracted. If it doesn't match the package name, the script refuses to continue.

(That second check might reject some cases that could possibly be supported, e.g. when a macro is used in the name. If somebody has a case like that and they care enough, they can fix the script to parse the file using rpm. Hopefully such cases are rare, so I didn't bother doing that.)

rebased onto 09c32ed2dc41225e9aa3c3c0d5fd098cb2902535

3 years ago

5 new commits added

  • Add a work-around for very old builds not having a source
  • Do not allow removing release branches from which builds have been made
  • When checking builds, extract the spec file and verify Name: is set set correctly
  • Look at local and remote branches separately
  • Add script to check if a branch can be deleted from dist-git
3 years ago

Updated to handle @churchyard's request in https://pagure.io/fesco/issue/2387#comment-652070: do not allow removal of branches which were used to build anything.

@ignatenkobrain I also found another case where koji doesn't know the source for a build, see last commit.

@zbyszek Can you rebase the PR as it has some merge conflicts?

Otherwise, its looking good.

Thanks.

Script seems fine to me, as long as it expressed the policy that fesco approves/wants.

rebased onto a55d413

3 years ago

Can you rebase the PR as it has some merge conflicts?

It can't have conflicts, because it just adds a new file. I rebased in case you want to have linear history.

Script seems fine to me, as long as it expressed the policy that fesco approves/wants.

I think it satisfies the spirit of the policy approved by FESCo in #2340. The script is more restrictive in some regards, because it rejects branches from which packages have been built, even all the commits in those branches are reachable from other branches. OTOH, it permits removal of branches which have commits which are not reachable from a different branch, as long as none of those commits were used to successfully build real packages in koji. Those additional details match what was requested in the ticket and discussed before approving the one-sentence policy, even if the one-sentence policy does not explicitly express this. So yeah, I think this is good to go.

I think it satisfies the spirit of the policy approved by FESCo in #2340. The script is more restrictive in some regards, because it rejects branches from which packages have been built, even all the commits in those branches are reachable from other branches. OTOH, it permits removal of branches which have commits which are not reachable from a different branch, as long as none of those commits were used to successfully build real packages in koji. Those additional details match what was requested in the ticket and discussed before approving the one-sentence policy, even if the one-sentence policy does not explicitly express this. So yeah, I think this is good to go.

well, shouldn't fesco formally vote on it before we just decide it's ok and start using it? I know it may be a formality, but would be good for everything to be acked.

Metadata