#7407 Add new and improved spam-o-matic script, now flavored with DNF
Merged 5 years ago by mohanboddu. Opened 6 years ago by ngompa.
ngompa/releng spam-o-matic-dnf  into  master

file added
@@ -0,0 +1,305 @@ 

+ #!/usr/bin/python3


+ # Copyright (C) 2011-2015 Red Hat, Inc.

+ # * Original Author: Bill Nottingham <notting@redhat.com>

+ # Copyright (C) 2018 Neal Gompa <ngompa13@gmail.com>.

+ # SPDX-License-Identifier: GPL-2.0+

+ #

+ # This script trawls through the repositories at a given directory location

+ # and identifies broken dependencies, if any, and then spams the owners of packages

+ # with the broken dependencies with emails about it until they fix it.

+ #

+ # The original script (using Yum) was from Mash, written by Bill Nottingham.

+ # It was rewritten to use DNF and Python 3 by Neal Gompa.

+ #


+ import os

+ import shutil

+ from stat import *

+ import string

+ import sys

+ import tempfile

+ import re

+ import smtplib


+ import argparse


+ import dnf

+ import dnf.base

+ import dnf.conf

+ import dnf.const



+ from collections import defaultdict


+ owner_template = "%s-owner@fedoraproject.org"

+ from_address = "buildsys@fedoraproject.org"


+ owners = {}

+ deps = {}


+ def generateDnfBaseObject(distdir, treename, arch):

+     if not os.path.exists(os.path.join(distdir, arch)):

+         return None

+     if arch == 'source' or arch == 'SRPMS':

+         return None

+     if os.path.exists(os.path.join(distdir, arch, "os")):

+         subdir = "os"

+     else:

+         subdir = ""

+     if not os.path.exists(os.path.join(distdir, arch, subdir, "repodata", "repomd.xml")):

+         return None


+     dbo = dnf.Base()

+     dbo.conf.debuglevel = 2

+     dbo.conf.logdir = tempfile.mkdtemp(suffix="dnflog", prefix="spamomatic")

+     dbo.conf.best = True

+     dbo.conf.ignorearch = True

+     dbo.conf.reposdir = "/dev/null"

+     dbo.conf.keepcache = False

+     dbo.conf.metadata_expire = 0

+     dbo.conf.cachedir = tempfile.mkdtemp()


+     dnfrepo = dnf.repo.Repo("{}-{}".format(treename, arch), dbo.conf)

+     dnfrepo.name = "Fedora {} Tree - {}".format(treename, arch)

+     dnfrepo.baseurl = "file://{}/{}/{}".format(distdir, arch, subdir)

+     dnfrepo.gpgcheck = False

+     dnfrepo.enable()


+     dbo.repos.add(dnfrepo)

+     dbo.fill_sack(False, True)


+     return dbo



+ def libmunge(match):

+     if match.groups()[1].isdigit():

+         return "%s%d" % (match.groups()[0], int(match.groups()[1]) + 1)

+     else:

+         return "%s%s" % (match.groups()[0], match.groups()[1])



+ def addOwner(list, pkg):

+     if list.get(pkg):

+         return True


+     if pkg in list:

+         return False


+     f = owner_template % pkg

+     list[pkg] = f

+     if f:

+         return True

+     return False



+ def getSrcPkg(pkg):

+     if pkg.arch == 'src':

+         return pkg.name

+     srcpkg = pkg.source_name

+     if not srcpkg:

+         return None

+     return srcpkg



+ def assignBlame(resolver, dep, guilty):

+     def __addpackages(query):

+         for package in query:

+             p = getSrcPkg(package)

+             if addOwner(guilty, p):

+                 list.append(p)


+     # Given a dep, find potential responsible parties


+     list = []


+     # The dep itself

+     list.append(dep)


+     # Something that provides the dep

+     __addpackages(resolver.sack.query().available().filter(provides=dep))


+     # Libraries: check for variant in soname

+     if re.match("lib.*\.so\.[0-9]+", str(dep)):

+         new = re.sub("(lib.*\.so\.)([0-9]+)", libmunge, str(dep))

+         __addpackages(resolver.sack.query().available().filter(provides=new))

+         libname = str(dep).split('.')[0]

+         __addpackages(resolver.sack.query().available().filter(provides=libname))


+     return list



+ def generateSpam(pkgname, treename, sendmail=True):


+     package = deps[pkgname]

+     guilty = owners[pkgname]

+     conspirators = []


+     for s in package.keys():

+         subpackage = package[s]

+         for arch in subpackage.keys():

+             brokendeps = subpackage[arch]

+             for dep in brokendeps:

+                 for blame in dep[2]:

+                     # We might not have an owner here for virtual deps

+                     try:

+                         party = owners[blame]

+                     except:

+                         continue

+                     if party != guilty and party not in conspirators:

+                         conspirators.append(party)


+     data = """


+ %s has broken dependencies in the %s tree:

+ """ % (pkgname, treename)


+     for s in package.keys():

+         subpackage = package[s]

+         for arch in subpackage.keys():

+             data = data + "On %s:\n" % (arch)

+             brokendeps = subpackage[arch]

+             for dep in brokendeps:

+                 data = data + "\t%s requires %s\n" % (dep[0], dep[1])


+     data = data + "Please resolve this as soon as possible.\n\n"


+     fromaddr = from_address

+     toaddrs = [guilty]

+     if conspirators:

+         toaddrs = toaddrs + conspirators


+     msg = """From: %s

+ To: %s

+ Cc: %s

+ Subject: Broken dependencies: %s


+ %s

+ """ % (fromaddr, guilty, ','.join(conspirators), pkgname, data)

+     if sendmail:

+         try:

+             server = smtplib.SMTP('localhost')

+             server.set_debuglevel(1)

+             server.sendmail(fromaddr, toaddrs, msg)

+         except:

+             print('sending mail failed')



+ def get_unresolved_deps(dbo, arch):

+     # Modified from repoclosure plugin from dnf-plugins-core

+     unresolved = {}


+     deps = set()

+     available = dbo.sack.query().available()

+     if dbo.conf.best:

+         available = available.latest()

+     if arch is not None:

+         available = available.filter(arch=arch)

+     pkgs = set()

+     for pkgs_filtered in available:

+         pkgs.add(pkgs_filtered)


+     for pkg in pkgs:

+         unresolved[pkg] = set()

+         for req in pkg.requires:

+             reqname = str(req)

+             # XXX: https://bugzilla.redhat.com/show_bug.cgi?id=1186721

+             if reqname.startswith("solvable:") or \

+                     reqname.startswith("rpmlib("):

+                 continue

+             deps.add(req)

+             unresolved[pkg].add(req)


+     available.apply()

+     unresolved_deps = set(x for x in deps if not available.filter(provides=x))


+     unresolved_transition = {k: set(x for x in v if x in unresolved_deps)

+                              for k, v in unresolved.items()}

+     return {k: v for k, v in unresolved_transition.items() if v}



+ def doit(args):

+     for arch in os.listdir(args.distdir):

+         if args.only_arches and (arch not in args.only_arches):

+             print("Skipping {} because we don't want to check it...".format(arch))

+             continue

+         dbo = generateDnfBaseObject(os.path.abspath(args.distdir), args.treename, arch)


+         if not dbo:

+             continue

+         if arch == 'i386':

+             carch = 'i686'

+         elif arch == 'ppc':

+             carch = 'ppc64'

+         elif arch == 'armhfp':

+             carch = 'armv7hnl'

+         elif arch == 'arm':

+             carch = 'armv7l'

+         else:

+             carch = arch


+         dbo.conf.arch = carch


+         baddeps = get_unresolved_deps(dbo, carch)

+         pkgs = baddeps.keys()

+         tmplist = [(x.name, x) for x in pkgs]

+         tmplist.sort()

+         pkgs = [x for (key, x) in tmplist]

+         if len(pkgs) > 0:

+             print("Broken deps for %s" % (arch,))

+             print("----------------------------------------------------------")

+         naughtydd = defaultdict(list)

+         for pkg in pkgs:

+             srcpkg = getSrcPkg(pkg)


+             addOwner(owners, srcpkg)


+             if srcpkg not in deps:

+                 deps[srcpkg] = {}


+             pkgid = "%s-%s-%s" % (pkg.name, pkg.version, pkg.release)


+             if pkgid not in deps[srcpkg]:

+                 deps[srcpkg][pkgid] = {}


+             broken = []

+             for missingdep in baddeps[pkg]:

+                 brokendep = "\t%s requires %s" % (pkg, missingdep)

+                 naughtydd[srcpkg].append(brokendep)


+                 blamelist = assignBlame(dbo, missingdep, owners)


+                 broken.append((pkg, missingdep, blamelist))


+             deps[srcpkg][pkgid][arch] = broken


+         brokensources = dict((k, list(v)) for k, v in naughtydd.items())

+         for source in sorted(brokensources.keys()):

+             print("[%s]" % (source))

+             for d in brokensources[source]:

+                 print(d)


+         print("\n\n")

+         shutil.rmtree(dbo.conf.cachedir, ignore_errors=True)


+     pkglist = deps.keys()

+     for pkg in pkglist:

+         generateSpam(pkg, args.treename, not args.nomail)


+ if __name__ == '__main__':


+     parser = argparse.ArgumentParser(description="Tool to identify unresolved dependencies and spam people until they fix it",

+                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)

+     parser.add_argument("--nomail", action="store_true",

+                         help="Don't mail the results")

+     parser.add_argument("--treename", default="rawhide",

+                         help="Name of the tree to use in messages")

+     parser.add_argument("--fromaddr", default=from_address,

+                         help="Address to send mail from")

+     parser.add_argument("--owneraddr", default=owner_template,

+                         help="Template for package owner addresses to send mail to")

+     parser.add_argument("--only-arches", nargs="+", default=None,

+                         help="Use only these architectures")

+     parser.add_argument("distdir", help="Target distribution directory to work from")

+     args = parser.parse_args()

+     doit(args)

rebased onto a6ae450e0a0579dc2d2783e5c11c5c3c50b47d58

6 years ago

MIssing aarch64, ppc64le, s390x

rebased onto d1351505847ff8250c93523388c18a8861bb8fb3

6 years ago

@mohanboddu the aarch64, ppc64le, and s390x ones auto-resolve correctly because there are no aliases or basearch name mismatches. That's why the final else condition works.

I can delete the sparc one, though.

1 new commit added

  • spam-o-matic: Drop sparc basearch<->arch mapping
6 years ago

What happens if the logdir doesn't exist?

It seems its gonna fail to unable to open the log file.

@mohanboddu Fixed to use a proper temporary directory.

rebased onto 9037f5e8cbc7c6c529b8459ebdf2f092297daf97

6 years ago

@ngompa Filed a ticket against dnf to fix repoclosure for aarch64 and armhfp:

rebased onto 4effec1620a7d00d1884fd87cbea18714b8e5838

5 years ago

rebased onto 61e218686cecdfd716308f5b20a3c21fe0460d20

5 years ago

rebased onto 11b90b7bc885b2bea31270382e682b4eaf1724be

5 years ago

rebased onto 35590904c8aa9cf927f7c67b350d5541ec92b8a2

5 years ago

@mohanboddu I've rebased this against current master.

rebased onto 8bad5d71e1c4609611a493981162fa37c732c877

5 years ago

rebased onto b143727

5 years ago

@mohanboddu the new option is implemented.

Looking good if we can run it on Everything and non arm arches.

Commit 240fc9a fixes this pull-request

Pull-Request has been merged by mohanboddu

5 years ago

Pull-Request has been merged by mohanboddu

5 years ago