#5 WIP: Port potential_conflict.py from yum to dnf
Merged 4 years ago by adamwill. Opened 4 years ago by frantisekz.

file modified
+377 -202
@@ -1,4 +1,4 @@ 

- #!/usr/bin/python -tt

+ #! /usr/bin/python3

  # This program is free software; you can redistribute it and/or modify

  # it under the terms of the GNU General Public License as published by

  # the Free Software Foundation; either version 2 of the License, or
@@ -19,13 +19,13 @@ 

  # This script used for the following test case:

  # https://fedoraproject.org/wiki/QA:Testcase_Mediakit_FileConflicts

  

- """Go through a yum packagesack and return the list of newest pkgs which

+ """Go through a dnf packagesack and return the list of newest pkgs which

     possibly have conflicting files"""

  

+ from collections import defaultdict

+ from optparse import OptionParser

+ from urllib.request import urlopen

  

- import yum

- import yum.Errors

- import yum.misc

  import datetime

  import time

  import sys
@@ -33,11 +33,11 @@ 

  import os.path

  import tempfile

  import shutil

- import urllib

- from collections import defaultdict

- from rpmUtils.miscutils import rangeCompare as rpmRangeCompare

+ import re

  

- from optparse import OptionParser

+ import dnf

+ import rpm

+ import hawkey

  

  # iterate all pkgs, build a filedict of filename = [list of pkg objects]

  # then go through that list, if any file has more than one pkg then put those
@@ -47,30 +47,8 @@ 

  #        pkgs which are multilib sets

  #        pkgs which have fully matching overlapping files (md5sums + timestamps)

  

- def parseArgs():

-     usage = """

-     Check given repos for possible file/package conflicts

- 

-     %s [-c <config file>] [-r <repoid>] [-r <repoid2>]

-     """ % sys.argv[0]

-     parser = OptionParser(usage=usage)

-     parser.add_option("-c", "--config", default='/etc/yum.conf',

-         help='config file to use (defaults to /etc/yum.conf)')

-     parser.add_option("-r", "--repoid", default=[], action='append',

-         help=("specify repo ids to query, can be specified multiple times "

-              "(default is all enabled)"))

-     parser.add_option("-t", "--tempcache", default=False, action="store_true",

-         help="Use a temp dir for storing/accessing yum-cache")

-     parser.add_option("--repofrompath", action="append",

-                       help=("specify repoid & paths of additional repositories "

-         "- unique repoid and complete path required, can be specified multiple "

-         "times. Example. --repofrompath=myrepo,/path/to/repo"))

-     (opts, args) = parser.parse_args()

-     return (opts, args)

- 

- 

- # not a great check but it does toss out all the glibc.i686 vs glibc.i386 ones

  def pkg_names_match(pkglist):

+     "not a great check but it does toss out all the glibc.i686 vs glibc.i386 ones"

      p = pkglist[0]

      for pkg in pkglist[1:]:

          if p.name != pkg.name:
@@ -78,9 +56,8 @@ 

  

      return True

  

- 

- # if they have the same sourcerpm it is highly doubtful it is a legit conflict

  def sourcerpms_match(pkglist):

+     "if they have the same sourcerpm it is highly doubtful it is a legit conflict"

      p = pkglist[0]

      for pkg in pkglist[1:]:

          if p.sourcerpm != pkg.sourcerpm:
@@ -88,203 +65,401 @@ 

  

      return True

  

+ # And this is just ugly piece of yum we are going to copy n paste

+ def compareEVR(left_tuple, right_tuple):

+     """

+     return 1: a is newer than b

+     0: a and b are the same version

+     -1: b is newer than a

+     """

+     (e1, v1, r1) = left_tuple

+     (e2, v2, r2) = right_tuple

+     if e1 is None:

+         e1 = '0'

+     else:

+         e1 = str(e1)

+     v1 = str(v1)

+     r1 = str(r1)

+     if e2 is None:

+         e2 = '0'

+     else:

+         e2 = str(e2)

+     v2 = str(v2)

+     r2 = str(r2)

+     #print '%s, %s, %s vs %s, %s, %s' % (e1, v1, r1, e2, v2, r2)

+     rc = rpm.labelCompare((e1, v1, r1), (e2, v2, r2))

+     #print '%s, %s, %s vs %s, %s, %s = %s' % (e1, v1, r1, e2, v2, r2, rc)

+     return rc

+ 

+ def rangeCompare(reqtuple, provtuple):

+     """returns true if provtuple satisfies reqtuple"""

+     (reqn, reqf, (reqe, reqv, reqr)) = reqtuple

+     (n, f, (e, v, r)) = provtuple

+     if reqn != n:

+         return 0

+ 

+     # unversioned satisfies everything

+     if not f or not reqf:

+         return 1

+ 

+     # and you thought we were done having fun

+     # if the requested release is left out then we have

+     # to remove release from the package prco to make sure the match

+     # is a success - ie: if the request is EQ foo 1:3.0.0 and we have

+     # foo 1:3.0.0-15 then we have to drop the 15 so we can match

+     if reqr is None:

+         r = None

+     if reqe is None:

+         e = None

+     if reqv is None: # just for the record if ver is None then we're going to segfault

+         v = None

+ 

+     # if we just require foo-version, then foo-version-* will match

+     if r is None:

+         reqr = None

+ 

+     rc = compareEVR((e, v, r), (reqe, reqv, reqr))

+ 

+     # does not match unless

+     if rc >= 1:

+         if reqf in ['GT', 'GE', 4, 12, '>', '>=']:

+             return 1

+         if reqf in ['EQ', 8, '=']:

+             if f in ['LE', 10, 'LT', 2, '<=', '<']:

+                 return 1

+         if reqf in ['LE', 'LT', 'EQ', 10, 2, 8, '<=', '<', '=']:

+             if f in ['LE', 'LT', 10, 2, '<=', '<']:

+                 return 1

+ 

+     if rc == 0:

+         if reqf in ['GT', 4, '>']:

+             if f in ['GT', 'GE', 4, 12, '>', '>=']:

+                 return 1

+         if reqf in ['GE', 12, '>=']:

+             if f in ['GT', 'GE', 'EQ', 'LE', 4, 12, 8, 10, '>', '>=', '=', '<=']:

+                 return 1

+         if reqf in ['EQ', 8, '=']:

+             if f in ['EQ', 'GE', 'LE', 8, 12, 10, '=', '>=', '<=']:

+                 return 1

+         if reqf in ['LE', 10, '<=']:

+             if f in ['EQ', 'LE', 'LT', 'GE', 8, 10, 2, 12, '=', '<=', '<', '>=']:

+                 return 1

+         if reqf in ['LT', 2, '<']:

+             if f in ['LE', 'LT', 10, 2, '<=', '<']:

+                 return 1

+     if rc <= -1:

+         if reqf in ['GT', 'GE', 'EQ', 4, 12, 8, '>', '>=', '=']:

+             if f in ['GT', 'GE', 4, 12, '>', '>=']:

+                 return 1

+         if reqf in ['LE', 'LT', 10, 2, '<=', '<']:

+             return 1

+ #                if rc >= 1:

+ #                    if reqf in ['GT', 'GE', 4, 12, '>', '>=']:

+ #                        return 1

+ #                if rc == 0:

+ #                    if reqf in ['GE', 'LE', 'EQ', 8, 10, 12, '>=', '<=', '=']:

+ #                        return 1

+ #                if rc <= -1:

+ #                    if reqf in ['LT', 'LE', 2, 10, '<', '<=']:

+ #                        return 1

+ 

+     return 0

+ 

+ # yum code ends here

+ 

+ def prco_to_tuple(prco_string):

+     """

+     Creates dependency tuple containing name and evr from single string provided by dnf

+     Example input: "glib2(x86-64) >= 2.54.0"

+     Example output: (glib2, "GE", (0, 2.54.0, 1))

+ 

+     Architecture requirements are lost during the conversion. Release is set to 1 if missing.

+     """

+     sign = ""

+     if "<=" in prco_string:

+         sign = "LE"

+         data = prco_string.split("<=")

+     elif "<" in prco_string:

+         sign = "LT"

+         data = prco_string.split("<")

+     elif ">=" in prco_string:

+         sign = "GE"

+         data = prco_string.split(">=")

+     elif ">" in prco_string:

+         sign = "GT"

+         data = prco_string.split(">")

+     elif "=" in prco_string:

+         sign = "EQ"

+         data = prco_string.split("=")

+     else:

+         sign = None

+         data = [prco_string, None]

+ 

+     # We might have got prco without evr

+     if not data[1]:

+         return (data[0], sign, (None, None, None))

+ 

+     # Trim whitespace from start and end

+     data[0] = data[0].strip()

+     data[1] = data[1].strip()

+ 

+     base_string_for_nevra = str(data[0] + "-" + data[1])

+     # Hawkey nevra parser doesn't like brackets :/

+     # Removes brackets and content inside them

+     # eg. "glib2(x86-64) >= 2.54.0" > "glib2 >= 2.54.0"

+     base_string_for_nevra = re.sub(r' *\(.*?\)', '', base_string_for_nevra)

+ 

+     try:

+         string_for_nevra = str(base_string_for_nevra + ".noarch")

+         nevra = hawkey.split_nevra(string_for_nevra)

+     # We might be missing release

+     except hawkey.ValueException:

+         string_for_nevra = str(base_string_for_nevra + "-1.noarch")

+         nevra = hawkey.split_nevra(string_for_nevra)

+ 

+     return (data[0], sign, (nevra.epoch, nevra.version, nevra.release))

  

- # XXX we should just be able to use the pkg.inPrcoRange method but I think it's

- # broken - it seems to have the rangeCompare arguments backwards.

  def inPrcoRange(prcotype, p1, p2):

-     p2_tuple = (p2.name, 'EQ', (p2.epoch, p2.ver, p2.rel))

-     for obs in p1.returnPrco(prcotype):

-         if rpmRangeCompare(obs, p2_tuple):

+     "Checks if p1 satisfies p2 or vice versa"

+     if prcotype:

+         p1_data = getattr(p1, prcotype)

+     else:

+         raise Exception("Unexpected prcotype: %s" % prcotype)

+ 

+     #p2 stays the same, no need to assign it again and again below

+     p2_tuple = (p2.name, 'EQ', (p2.epoch, p2.version, p2.release))

+ 

+     for item in p1_data:

+         p1_tuple = prco_to_tuple(str(item))

+         if rangeCompare(p1_tuple, p2_tuple):

              return True

-     return False

  

+     return False

  

  def check_list_for_prco(prcotype, pkglist):

-     '''Compare each pair of packages in the pkglist to see if any of them

-     obsoletes/conflicts/requires/provides the other'''

+     """

+     Compare each pair of packages in the pkglist to see if any of them

+     obsoletes/conflicts/requires/provides the other

+     """

      for p1 in pkglist:

          for p2 in pkglist:

              if p1 == p2:

                  continue

              if inPrcoRange(prcotype, p1, p2) or inPrcoRange(prcotype, p2, p1):

-                 #print '%s %s %s' % (p1, prcotype, p2)

+                 #print("%s and %s in prco range." % (p1.name, p2.name))

                  return True

      return False

  

  

  def obsolete_each_other(pkglist):

+     "Checks if packages in pkglist obsolete each other"

      return check_list_for_prco('obsoletes', pkglist)

  

  

  def package_conflict(pkglist):

+     "Checks if packages in pkglist conflict"

      return check_list_for_prco('conflicts', pkglist)

  

  

- # XXX with sufficiently fast network, the bottleneck is CPU in the checksum

- # comparison here. Maybe we should multithread it someday?

- def file_conflict(fn, pkglist):

-     checksum = None

- 

-     for pkg in pkglist:

-         #print "Fetching Headers for %s" % pkg

-         #download header

-         # make this into a proper error func check for urlgrabber routines

-         # so we get the proper retries

-         if not os.path.exists(pkg.localHdr()):

-             try:

-                 hdr = pkg.returnLocalHeader()

-             except yum.Errors.RepoError, e:

-                 pkg.repo.getHeader(pkg)

-         else:

-             pkg.repo.getHeader(pkg)

-         #read in header

-         try:

-             hdr = pkg.returnLocalHeader()

-         except yum.Errors.RepoError, e:

-             #print "Cannot retrieve header for %s" % pkg

-             return True

+ def file_conflict_is_permitted(left_loc, right_loc, filename):

+     """

+     Returns True if rpm would allow both the given packages to share

+     ownership of the given filename.

+     """

+     ts = rpm.TransactionSet()

+     ts.setVSFlags(rpm._RPMVSF_NOSIGNATURES)

  

-         fi = hdr.fiFromHeader()

-         for fitup in fi:

-             if fitup[0] == fn:

-                 if checksum is None:

-                     checksum = fitup[-1]

-                 elif checksum != fitup[-1]:

-                     return True

+     fd_left = os.open(left_loc, os.O_RDONLY)

+     fd_right = os.open(right_loc, os.O_RDONLY)

  

-     return False

+     left_files = rpm.files(ts.hdrFromFdno(fd_left))

+     right_files = rpm.files(ts.hdrFromFdno(fd_right))

  

+     # Close FDs

+     os.close(fd_left)

+     os.close(fd_right)

  

- (opts, cruft) = parseArgs()

+     if left_files[filename].matches(right_files[filename]):

+         #print('Conflict on %s between %s and %s permitted because files match' % (filename, left_loc, right_loc))

+         return True

+     # TODO: Check if the following code is working properly and uncomment it eventually

+     #if left_files[filename].color != right_files[filename].color:

+     #    #print('Conflict on %s between %s and %s permitted because colors differ' % (filename, left_loc, right_loc))

+     #    return True

+     return False

  

- my = yum.YumBase()

- my.doConfigSetup(fn = opts.config, init_plugins=False)

+ def get_package(pkg):

+     "Returns location of the package and downloads it if it's not available in local storage"

+     if not "file://" in pkg.remote_location():

+         pkg.base.download_packages([pkg])

+     return pkg.localPkg()

  

- temp_cachedir = None

- if os.geteuid() != 0 or opts.tempcache:

-     temp_cachedir = tempfile.mkdtemp(prefix='conflicts')

-     my.conf.cachedir = temp_cachedir

- else:

-     my.conf.cachedir = yum.misc.getCacheDir()

+ def file_conflict(fn, pkglist):

+     "Checks if is fn going to cause conflicts between packages from pkglist"

+     for pkg_left in pkglist:

+         location_left = get_package(pkg_left)

+         for pkg_right in pkglist:

+             if pkg_left == pkg_right:

+                 continue

+             location_right = get_package(pkg_right)

+             if not file_conflict_is_permitted(location_right, location_left, fn):

+                 return True

+             return False

  

- my.repos.setCacheDir(my.conf.cachedir)

+ if __name__ == "__main__":

  

- if opts.repofrompath:

- # setup the fake repos

-     for repo in opts.repofrompath:

-         repoid, repopath = tuple(repo.split(','))

-         if repopath[0] == '/':

-             baseurl = 'file://' + repopath

-         else:

-             # This URL may be a redirect, like download.fedoraproject.org. We

-             # need to resolve this redirect now, otherwise the redirect URL will

-             # be used for every package header download and that a) will extremely

-             # slow down download speed b) may cause consistency problems if not

-             # always the same repo mirror is returned from the redirect

-             net_obj = urllib.urlopen(repopath)

-             baseurl = repopath = net_obj.geturl()

- 

-         repopath = os.path.normpath(repopath)

-         newrepo = yum.yumRepo.YumRepository(repoid)

-         newrepo.name = repoid

-         newrepo.baseurl = baseurl

-         newrepo.basecachedir = my.conf.cachedir

-         newrepo.metadata_expire = 0

-         newrepo.timestamp_check = False

-         my.repos.add(newrepo)

-         my.repos.enableRepo(newrepo.id)

-         my.logger.info("Added %s repo from %s" % (repoid, repopath))

- 

- if opts.repoid:

-     for repo in my.repos.repos.values():

-         if repo.id not in opts.repoid:

-             repo.disable()

+     usage = """

+     Check given repos for possible file/package conflicts

+ 

+     %s [-c <config file>] [-r <repoid>] [-r <repoid2>]

+     """ % sys.argv[0]

+     parser = OptionParser(usage=usage)

+     parser.add_option("-c", "--config", default='/etc/yum.conf',

+                       help='config file to use (defaults to /etc/yum.conf)')

+     parser.add_option("-r", "--repoid", default=[], action='append',

+                       help=("specify repo ids to query, can be specified multiple times "

+                             "(default is all enabled)"))

+     parser.add_option("-t", "--tempcache", default=False, action="store_true",

+                       help="Use a temp dir for storing/accessing yum-cache")

+     parser.add_option("--repofrompath", action="append",

+                       help=("specify repoid & paths of additional repositories "

+                             "- unique repoid and complete path required, can be specified multiple "

+                             "times. Example. --repofrompath=myrepo,/path/to/repo"))

+     (opts, _) = parser.parse_args()

+ 

+     my = dnf.Base()

+     my.read_all_repos()

+ 

+     temp_cachedir = None

+     if os.geteuid() != 0 or opts.tempcache:

+         temp_cachedir = tempfile.mkdtemp(prefix='conflicts')

+         my.conf.cachedir = temp_cachedir

+     else:

+         my.conf.cachedir = dnf.yum.misc.getCacheDir()

+ 

+     if opts.repofrompath:

+     # setup the fake repos

+         for repo in opts.repofrompath:

+             repoid, repopath = tuple(repo.split(','))

+             if repopath[0] == '/':

+                 baseurl = 'file://' + repopath

+             else:

+                 # This URL may be a redirect, like download.fedoraproject.org. We

+                 # need to resolve this redirect now, otherwise the redirect URL will

+                 # be used for every package header download and that a) will extremely

+                 # slow down download speed b) may cause consistency problems if not

+                 # always the same repo mirror is returned from the redirect

+                 with urlopen(repopath) as net_obj:

+                     baseurl = repopath = net_obj.geturl()

+ 

+             repopath = os.path.normpath(repopath)

+             newrepo = dnf.repo.Repo(repoid)

+             newrepo.name = repoid

+             newrepo.baseurl = baseurl

+             newrepo.basecachedir = my.conf.cachedir

+             newrepo.metadata_expire = 0

+             newrepo.timestamp_check = False

+             my.repos.add(newrepo)

+             print("Added %s repo from %s" % (repoid, repopath))

+ 

+     if opts.repoid:

+         for repo in my.repos.values():

+             if repo.id not in opts.repoid:

+                 repo.disable()

+             else:

+                 repo.enable()

+ 

+     fulldict = defaultdict(list)

+ 

+     print('Getting complete filelist for:')

+     for repo in my.repos.iter_enabled():

+         if repo.metalink:

+             print(repo.metalink)

          else:

-             repo.enable()

- 

- fulldict = defaultdict(list)

- #dirdict = defaultdict(list)

- 

- print 'Getting complete filelist for:'

- for repo in my.repos.listEnabled():

-     print repo.urls[0]

- 

- for pkg in my.pkgSack.returnNewestByNameArch():

-     for fn in pkg.filelist:

-         fulldict[fn].append(pkg)

- 

- #    for fn in pkg.dirlist:

- #        dirdict[fn].append(pkg)

- print '%u files found.\n' % len(fulldict)

- 

- print 'Looking for duplicated filenames:'

- filedict = {}

- for fn in fulldict.keys():

-     if len(fulldict[fn]) >= 2:

-         filedict[fn] = fulldict[fn]

- del fulldict # frees up a little memory

- print '%u duplicates found.\n' % len(filedict)

- 

- print 'Doing more advanced checks to see if these are real conflicts:'

- count = 0

- found = 0

- total = len(filedict)

- milestone = int(0.05 * total)

- start = time.time()

- last = start

- package_conflicts = set()

- confdict = {}

- for (fn, pkglist) in filedict.iteritems():

-     #print '%d checking on %s ...' % (count, fn),

-     if (pkg_names_match(pkglist) or

-         sourcerpms_match(pkglist) or

-         obsolete_each_other(pkglist)):

-         pass

-     elif package_conflict(pkglist):

-         strlist = sorted([str(p) for p in pkglist])

-         package_conflicts.add('\n'.join(strlist))

-     elif file_conflict(fn, pkglist):

-         found += 1

-         confdict[fn] = pkglist

- 

-     # Timing / counting output

-     count += 1

-     if count % milestone == 0:

-         now = time.time()

-         percent = round(float(count*100) / total)

-         files_per_sec = float(milestone) / (now - last)

-         total_files_per_sec = float(count) / (now - start)

-         eta_sec = float(total - count) / total_files_per_sec

-         eta = str(datetime.timedelta(seconds=int(eta_sec)))

-         print "%3u%% complete (%6u/%6u, %5u/sec), %4u found - eta %s" % \

-             (percent, count, total, files_per_sec, found, eta)

-         last = now

- del filedict

- print "%u file conflicts found." % len(confdict)

- print "%u package conflicts found." % len(package_conflicts)

- 

- # Reduce the results to a dict like {"pkga,pkgb": [conflicting files], ...}

- rbpp = defaultdict(list)# Results By Package-Pair

- for (fn, pkglist) in confdict.iteritems():

-     pkgpair = '\n'.join(sorted([str(p) for p in pkglist]))

-     rbpp[pkgpair].append(fn)

- del confdict

- 

- print '\n== Package conflicts =='

- for pkglist in package_conflicts:

-     print pkglist + "\n"

- 

- print '\n== File conflicts, listed by conflicting packages =='

- for (pkgpair, files) in rbpp.iteritems():

-     print pkgpair

-     for f in sorted(files):

-         print '  ' + f

-     print

- 

- # delete cache

- if temp_cachedir:

-     shutil.rmtree(temp_cachedir)

- 

- if rbpp:

-     # file conflicts should be considered a failure, thus exit 1

-     sys.exit(1)

+             print(repo.baseurl[0])

+ 

+     my.fill_sack()

+     q = my.sack.query()

+ 

+     for package in q:

+         if str(package.reponame) == "@System":

+             continue

+         #print('{} in repo {}'.format(package, package.reponame))

+         for file_name in package.files:

+             fulldict[file_name].append(package)

+ 

+     print('%u files found.\n' % len(fulldict))

+ 

+     print('Looking for duplicated filenames:')

+     filedict = {}

+     for file_name in fulldict.keys():

+         if len(fulldict[file_name]) >= 2:

+             filedict[file_name] = fulldict[file_name]

+     del fulldict # frees up a little memory

+     print('%u duplicates found.\n' % len(filedict))

+ 

+     print('Doing more advanced checks to see if these are real conflicts:')

+     count = 0

+     found = 0

+     total = len(filedict)

+     milestone = int(0.05 * total)

+     start = time.time()

+     last = start

+     package_conflicts = set()

+     confdict = {}

+     for (file_name, package_list) in filedict.items():

+         # /usr/lib/.build-id seems to be causing whole lot of false positives, so just skip it

+         if file_name == "/usr/lib/.build-id":

+             continue

+         if (pkg_names_match(package_list) or

+                 sourcerpms_match(package_list) or

+                 obsolete_each_other(package_list)):

+             pass

+         elif package_conflict(package_list):

+             strlist = sorted([str(p) for p in package_list])

+             package_conflicts.add('\n'.join(strlist))

+         elif file_conflict(file_name, package_list):

+             found += 1

+             confdict[file_name] = package_list

+ 

+         # Timing / counting output

+         count += 1

+         if count % milestone == 0:

+             now = time.time()

+             percent = round(float(count*100) / total)

+             files_per_sec = float(milestone) / (now - last)

+             total_files_per_sec = float(count) / (now - start)

+             eta_sec = float(total - count) / total_files_per_sec

+             eta = str(datetime.timedelta(seconds=int(eta_sec)))

+             print("%3u%% complete (%6u/%6u, %5u/sec), %4u found - eta %s" % \

+                 (percent, count, total, files_per_sec, found, eta))

+             last = now

+     del filedict

+     print("%u file conflicts found." % len(confdict))

+     print("%u package conflicts found." % len(package_conflicts))

+ 

+     # Reduce the results to a dict like {"pkga,pkgb": [conflicting files], ...}

+     rbpp = defaultdict(list)# Results By Package-Pair

+     for (file_name, package_list) in confdict.items():

+         pkgpair = '\n'.join(sorted([str(p) for p in package_list]))

+         rbpp[pkgpair].append(file_name)

+     del confdict

+ 

+     print('\n== Package conflicts ==')

+     for package_list in package_conflicts:

+         print(package_list + "\n")

+ 

+     print('\n== File conflicts, listed by conflicting packages ==')

+     for (pkgpair, files) in rbpp.items():

+         print(pkgpair)

+         for file_name in sorted(files):

+             print('  ' + file_name)

+         print("")

+ 

+     # delete cache

+     if temp_cachedir:

+         shutil.rmtree(temp_cachedir)

+ 

+     if rbpp:

+         # file conflicts should be considered a failure, thus exit 1

+         sys.exit(1)

So, this is heavily WIP kind of stuff. Package conflicts sort of works, file conflicts are skipped completely right now.

What am I struggling with:

  • there seems to be no returnPrco method on dnf package object, I am able to get prco from respective lists (package.requires, and so on) on the package object but it seems there is no additional data apart from names, eg: ('name', 'EQ', ('epoch', 'version', 'release')). For now, I've tried it without this additional data, but results are a little bit different, so it's probably containing false positives
  • file conflicts are not implemented at all, it seems there are no methods to work on package headers exported in dnf python api
# yum PRCO
ipdb> p1.returnPrco("provides")
[('xorg-x11-drv-nvidia-340xx-cuda(x86-32)', 'EQ', ('1', '340.107', '4.fc30')), ('xorg-x11-drv-nvidia-340xx-cuda', 'EQ', ('1', '340.107', '4.fc30')), ('nvidia-persistenced', 'EQ', ('0', '340.107', '4.fc30')), ('nvidia-modprobe', 'EQ', ('0', '340.107', '4.fc30')), ('libnvidia-opencl.so.1', None, (None, None, None)), ('libnvidia-ml.so.1', None, (None, None, None)), ('libnvidia-encode.so.1', None, (None, None, None)), ('libnvidia-compiler.so.340.107', None, (None, None, None)), ('libnvcuvid.so.1', None, (None, None, None)), ('libcuda.so.1', None, (None, None, None)), ('cuda-drivers(x86-32)', 'EQ', ('0', '340.107', None)), ('cuda-drivers', 'EQ', ('0', '340.107', None)), ('config(xorg-x11-drv-nvidia-340xx-cuda)', 'EQ', ('1', '340.107', '4.fc30'))]

#dnf PRCO
ipdb> str(p1.requires[n])
'/usr/bin/bash'
...

Btw, I left code to be in python 2 for now, you can run this version of script on F >=30 after dnf install python2-dnf --releasever=29 . If you prefer to have code in python 3 asap, I can do that too.

rebased onto 0bc6d1a

4 years ago

rebased onto 87fefa7

4 years ago

rebased onto 6c91b7e

4 years ago

please make it python 3, yes. there's no reason to have anything python 2 specific at this point. of course you can write it to work with both if you like. :)

@dmach is there someone from DNF side who can help with this project - recommending the best ways to do things, and adding features to dnf/libdnf if required? It's quite important that this script be ported to dnf if yum is going away soon.

@adamwill Frantisek is sitting next to our cubicle, he can always stop by and ask for help.
BTW, isn't this a duplicate work to rpmdeplint?

@dmach Here's a bug that prevents us from using rpmdeplint:
https://bugzilla.redhat.com/show_bug.cgi?id=1562029

Also, I recently sent a list of documented use cases to @dmach for a potential replacement tool for rpmdeplint, and it contains a use case for this particular check as well. If there is such a tool, we can ditch our custom poorly-functioning scripts, and that would be nice :-)

Let's say this is potentially duplicate with rpmdeplint. The requirements for this tool are basically: tell us whether there are any packages which appear to have file conflicts, but do not have explicit package conflicts, on a DVD ISO. Exit 1 if so, 0 if not. So, it's a check for this test case. If we can make rpmdeplint do that, great.

rebased onto 8e749d4

4 years ago

I've pushed updated version of this.

  • It's python 3

  • File conflicts are implemented (code taken/modified from rpmdeplint)

  • Results are similar to original yum version

Compared to original yum script it has following regressions:

  • Checking prco range between packages is based only on unversioned provides/conflicts/obsoletes/requires (this doesn't seem to be a big issue though)

  • Checking file conflicts is done on entire rpm archives, not just on headers - not a big issue for our use case, run times are worse only for remote repositories and it's nothing horrible

@adamwill , WDYT?

rebased onto 5c46963

4 years ago

"Checking prco range between packages is based only on unversioned provides/conflicts/obsoletes/requires (this doesn't seem to be a big issue though)"

Not quite grokking this. Do you mean versioned provides/requires/conflicts/obsoletes are ignored? Or just the version component is ignored and they're treated as if they were unversioned?

I can see potential problems either way. If versioned ones are ignored, we could get false failures: if 'foo' conflicts with 'bar > 1.0', and the DVD contains foo and bar-2.0, and there's a file conflict between those two packages, the script may report a file conflict without a package conflict when really there is a package conflict and it's fine.

OTOH, if the version component is ignored, what if 'foo' conflicts with 'bar < 2.0', but the DVD contains foo and bar-2.0 and there is a file conflict between them? The script will suggest that things are fine because there's a package conflict between foo and bar, but really things are not fine because the conflict will not take effect due to the version restriction...

I'll let Frantisek reply, but just note that the existing yum-based script is broken as well. It detects file conflicts in packages that explicitly conflict with each other. So even if the first case you described was true, it's still wouldn't make the situation worse - for everything reported we'd need to manually check whether it really applies (the same way we had to in the past). We need to avoid the second case - silencing an actual problem. (And I'm not sure the existing script doesn't fail in this case as well - we can try with rpmfluff).

Edit: Actually I might be wrong about the existing script and misunderstood the "Package conflicts" vs "File conflicts" section (for all that time).

I don't believe the existing script does do that, actually. Or at least, it doesn't show them. Here's how it is there:

for (fn, pkglist) in filedict.iteritems():
    #print '%d checking on %s ...' % (count, fn),
    if (pkg_names_match(pkglist) or
        sourcerpms_match(pkglist) or
        obsolete_each_other(pkglist)):
        pass
    elif package_conflict(pkglist):
        strlist = sorted([str(p) for p in pkglist])
        package_conflicts.add('\n'.join(strlist))
    elif file_conflict(fn, pkglist):
        found += 1
        confdict[fn] = pkglist
...
print "%u file conflicts found." % len(confdict)
print "%u package conflicts found." % len(package_conflicts)

Note the elifs. It looks for a package conflict first; if it finds one, it will never proceed to looking for a file conflict; it only looks for and stores file conflicts when no package conflict is found.

Of course, I may be missing something...

Note the elifs. It looks for a package conflict first; if it finds one, it will never proceed to looking for a file conflict; it only looks for and stores file conflicts when no package conflict is found.

You are right, if it finds package conflict, it skips detecting the file conflicts. Just now, I've found there actually are information about versioned prco, just everything is in one string, sigh. I should be able to parse that pretty easily (by just dividing the string if there is either <, >, <=, >=, =, using first part as name of prco, sign, and second part as a version info).

More difficult/prone to mistakes is going to be implementing something like rpmRangeCompare (which I've found out today is actually a part of yum :/ ).

Regarding the old yum script, I've mixed package conflicts with file conflicts, @kparal sorry for misleading you :)

1 new commit added

  • Try to parse additional data from PRCO
4 years ago

I've added parser of prco string (and two functions from yum: rangeCompare, compareEVR) to try digging out evr from them.

Comparing to the old script, on Server DVD repo, it's still missing two explicit package conflicts, I'll try to look into it tomorrow:

== Package conflicts ==
2:docker-common-1.13.1-66.git1185cfd.fc30.x86_64
moby-engine-18.06.3-2.ce.gitd7080c1.fc30.x86_64

2:docker-1.13.1-66.git1185cfd.fc30.x86_64
moby-engine-18.06.3-2.ce.gitd7080c1.fc30.x86_64

2 new commits added

  • Try to parse additional data from PRCO
  • WIP: Port from yum to dnf
4 years ago

Okay,

I've added some fixes and now, on my test set (F30 Server DVD repo and rpmfusion-nonfree), the script from this PR produces the same results as the old one, yay!

There is one ugly thing remaining though, check line 458 . I am confident with using the script as is with that hacky workaround, but I'd understand if you weren't :)

https://pagure.io/fedora-qa/qa-misc/blob/6658e4a2ae1ed26bfe222c966a3d80a7e67b6f56/f/potential_conflict.py#_458

rebased onto dde8eee

4 years ago

Nevermind, fixed that :)

    # /usr/lib/.build-id seems to be causing whole lot of false positives, so just skip it
    if fn == "/usr/lib/.build-id":
        continue

So, it should be ready for review now.

Just as a note, DNF guys are continually threatening to take this (hawkey.split_nevra) away from us :/ See e.g. here. But hey, the more things we have using it, the louder we can tell them no!

you could probably do the above slightly more neatly using a dict, but meh, it's not long enough to really hurt.

you can do the above a bit more neatly, for style points: p1_data = getattr(p1, prcotype)

Just as a note, DNF guys are continually threatening to take this (hawkey.split_nevra) away from us :/ See e.g. here. But hey, the more things we have using it, the louder we can tell them no!

Yeah, we've talked about that. If I remember correctly, there should be some replacement available before they remove it.

Apart from that, I forgot to mention (possible) regression compared to the old yum script: this one strips architecture information from prco. So, something like 'pcre2(x86-64)-10.21-4' is treated as if it was 'pcre2-10.21-4.noarch'. I am not sure the old script handled that properly at all though.

if you want to be rigorous about it you could set up logging properly and make this a debug log statement or something...

is this comment still accurate for the new code? if not, remove it...

why is this included but commented out?

again, why is the 'a' stuff included but commented out?

is this the workaround you added for .build-id? if so, is it still needed now you skip that file?

can you run pylint-3 on the file and fix some of the issues? we probably can ignore the variable naming complaints for a single-file script, but most of the other things can probably be fixed. as some of them may be inherited, maybe do it in a second commit.

aside from that and the comments above this looks good to my eyeballs...perhaps @jskladan or @tflink could provide a second opinion. I'll try and run it over a few other ISOs I have lying around later today.

1 new commit added

  • Changes from PR Review
4 years ago

Okay, thanks for the review. I'll look on logging later today, should I use it just for the debug level or for all logging (eg. replace printing completely)?

'a' stuff shouldn't have been there at all, it originated from example from dnf documentation, I'd commented it out and forgot about it.

Everything should be addressed apart from logging and pylint.

Logging is kinda an optional extra, I wouldn't block the review on it...it's not super important for a single file script. Commented-out prints just make me itch a bit. :P So I wouldn't worry about it really.

1 new commit added

  • pylint fixes
4 years ago

3 new commits added

  • pylint fixes
  • Changes from PR Review
  • Port from yum to dnf
4 years ago

3 new commits added

  • pylint fixes
  • Changes from PR Review
  • Port from yum to dnf
4 years ago

So, I've added some fixes as suggested by pylint. The last item that seems important is replacing optparse with argparse.

It looks like it's not a drop-in replacement. I think I can do that later together with logging stuff, but if you prefer to have it done before merging, I'll look into that tomorrow.

Nah, I think doing that as a follow-up would be fine, this looks mergeable now; I'll test it out a bit and merge it if I find no problems. Thanks a lot!

Just as a note, DNF guys are continually threatening to take this (hawkey.split_nevra) away from us :/ See e.g. here. But hey, the more things we have using it, the louder we can tell them no!

Yeah, we've talked about that. If I remember correctly, there should be some replacement available before they remove it.
Apart from that, I forgot to mention (possible) regression compared to the old yum script: this one strips architecture information from prco. So, something like 'pcre2(x86-64)-10.21-4' is treated as if it was 'pcre2-10.21-4.noarch'. I am not sure the old script handled that properly at all though.

Oh, I forgot to reply to this - thanks for pointing it up, I agree we can live with it though. I would guess it's very rarely gonna be an issue, and I also don't know if the old version handled it correctly either. The result of this script is always gonna be something a human should check the details of anyway, so...I think it's OK.

Oh, there's one pylint issue you missed:

potential_conflict.py:314:8: W0612: Unused variable 'e' (unused-variable)

indeed, you assign the exception as e but then don't actually use it.

oh, couple other minor things. Another pylint one:

potential_conflict.py:68:5: W0621: Redefining name 'opts' from outer scope (line 332) (redefined-outer-name)

this is basically because you're doing all the main work of the script just at the top level - defining opts at that level means the opts that the parseArgs() function deals with is actually that same object. In practice with this particular code I don't think that'll cause any problems, but it is slightly weird. One way to avoid this is to put all that work in a function called e.g. main() and then do this at the top level:

if __name__ == '__main__':
    main()

then the definition of opts would be inside the main() function, so it wouldn't be a global any more. (This would also shut up a lot of the warnings about incorrect global variable names, as they wouldn't be global any more). Again this probably isn't practically causing any problems, but it's probably a good idea to follow typical style.

There are also a couple of missing function doc strings it wouldn't hurt to add, for parseArgs() and compareEVR().

Also, in (opts, cruft) = parseArgs() - normal style there would be (opts, _) = parseArgs(), the convention for Python is that you give a variable you are forced to accept but aren't going to use the name _. Or, better, you could just not have parseArgs return args at all, if you don't intend to use it.

when you use --repofrompath for a local path, this seems to just print 'None'.

Results for F30 Beta Server DVD ISO (Fedora-Server-dvd-x86_64-30_Beta-1.8.iso) are not quite the same: the old script found two conflicting files (/etc/sysconfig/docker and /usr/bin/docker) but the new finds only /etc/sysconfig/docker. Not sure why that is right now.

Found a bug testing on an F29 Beta ISO, Fedora-Server-dvd-x86_64-29_Beta-1.4.iso : it crashes on the second attempt at hawkey.split_nevra. The first string it tries is "apr-1.3.0.noarch"; the second string it tries is "apr(x86-64)-1.3.0-1.noarch". So it seems when we get a case like this with the arch, it doesn't just get ignored, it actually breaks stuff. That needs fixing somehow...

1 new commit added

  • Fix crashing on nevra parsing
4 years ago

Results for F30 Beta Server DVD ISO (Fedora-Server-dvd-x86_64-30_Beta-1.8.iso) are not quite the same: the old script found two conflicting files (/etc/sysconfig/docker and /usr/bin/docker) but the new finds only /etc/sysconfig/docker. Not sure why that is right now.

That seems to be because colors differ (so, packages should be installable side by side, were they conflicting only on this file). This evaluates as True for /usr/bin/docker between moby-engine-18.06.3-1.ce.gitd7080c1.fc30.x86_64.rpm and docker-common-1.13.1-65.git1185cfd.fc30.x86_64.rpm:

left_files[filename].color != right_files[filename].color

Found a bug testing on an F29 Beta ISO, Fedora-Server-dvd-x86_64-29_Beta-1.4.iso

Fixed, I had forgot to purge arch info on except code path.

1 new commit added

  • Print baseurl on repos from path instead of repo.metalink (None)
4 years ago

2 new commits added

  • Reformat comments in compareEVR as docstring
  • Place code not contained in functions into __main__
4 years ago

1 new commit added

  • Remove unused var
4 years ago

So, problems should be fixed, feedback addressed.

While removing that unused variable, I was wondering, how to handle dnf.exceptions.DownloadError. Right now, it's probably just going to crash somewhere else. There are a few options that I can think of:

  • Don't handle them at all and die horribly once download fails
  • Try multiple times with some sleeping between tries, die if it had failed more times
  • Second option + ignoring the package instead of dying after trying a few times

However, I suppose we are not going to use this script against a remote repo too often, so just dying on DownloadError might be the way forward?

Edit: I've updated the PR not to handle dnf.exceptions.DownloadError at all.

1 new commit added

  • Don't catch dnf.exceptions.DownloadError
4 years ago

9 new commits added

  • Don't catch dnf.exceptions.DownloadError
  • Remove unused var
  • Reformat comments in compareEVR as docstring
  • Place code not contained in functions into __main__
  • Print baseurl on repos from path instead of repo.metalink (None)
  • Fix crashing on nevra parsing
  • pylint fixes
  • Changes from PR Review
  • Port from yum to dnf
4 years ago

yeah, I think just dying on it is probably fine. Honestly, if anyone's gonna implement retry-download-on-fail it should probably be done within DNF (I think there's a bug for this).

So, for the color difference - the new script is actually correct in that case, and the old script was wrong as it didn't consider color? Is that your assessment?

yeah, I think just dying on it is probably fine. Honestly, if anyone's gonna implement retry-download-on-fail it should probably be done within DNF (I think there's a bug for this).

It's going to land soon. There are 2 PRs on librepo, we need to review them and eventually merge together.

So now this is duplicated, which I don't like. How about we rejig this whole part a bit, like this?

base_string_for_nevra = str(data[0] + "-" + data[1])
# better comment as to exactly what this is stripping, ideally with
# an example, to avoid 'regexes are write-only' syndrome
base_string_for_nevra = re.sub(r' *\([^\]]*\)', '', string_for_nevra)
string_for_nevra = base_string_for_nevra + ".noarch"
try:
    nevra = hawkey.split_nevra(string_for_nevra)
# We might be missing release
except hawkey.ValueException:
    string_for_nevra = base_string_for_nevra + "-1.noarch"
    nevra = hawkey.split_nevra(string_for_nevra)

Er, there's a deliberate mistake in the above. Finding it is left as an exercise for the reader. ;) (aka pagure won't let me edit the comment, stupid pagure)

"Place code not contained in functions into main" still doesn't move it into a function, it's just inside a conditional. So all the things defined in there are still constants. Still this isn't causing any real problems in a single file script, just a note. That's why pylint is still complaining about invalid constant names.

1 new commit added

  • Process regex only once even when hitting except in prco_to_tuple
4 years ago

So now this is duplicated, which I don't like. How about we rejig this whole part a bit, like this?

Done

So, for the color difference - the new script is actually correct in that case, and the old script was > wrong as it didn't consider color? Is that your assessment?

Yes, as I understand colors, the new script should be correct. However, when I was talking with @dmach , he seemed to be not sure if the code checking colors is really correct (he didn't say it's wrong though :) ).

I can disable taking colors into account ( It's just question of commenting out lines 285-287. ) before confirming it's correct, or we can go ahead and tweak the code later if needed.

10 new commits added

  • Process regex only once even when hitting except in prco_to_tuple
  • Don't catch dnf.exceptions.DownloadError
  • Remove unused var
  • Reformat comments in compareEVR as docstring
  • Place code not contained in functions into __main__
  • Print baseurl on repos from path instead of repo.metalink (None)
  • Fix crashing on nevra parsing
  • pylint fixes
  • Changes from PR Review
  • Port from yum to dnf
4 years ago

as discussed on IRC, this regex is unnecessarily complicated, all the stuff with square brackets isn't doing anything much useful. This should be sufficient:

re.sub(r' *\(.*?\)', '', base_string_for_nevra)

that strips all occurrences of "some text inside some round brackets", plus leading (but not trailing) space characters.

1 new commit added

  • Simplify regex
4 years ago

11 new commits added

  • Simplify regex
  • Process regex only once even when hitting except in prco_to_tuple
  • Don't catch dnf.exceptions.DownloadError
  • Remove unused var
  • Reformat comments in compareEVR as docstring
  • Place code not contained in functions into __main__
  • Print baseurl on repos from path instead of repo.metalink (None)
  • Fix crashing on nevra parsing
  • pylint fixes
  • Changes from PR Review
  • Port from yum to dnf
4 years ago

as for the color thing - if you're not confident it's correct yet, let's disable it for now and fix it in a follow-up commit @dmach can help review.

1 new commit added

  • Temporarily disable checking for file colors
4 years ago

rebased onto d1b31dc

4 years ago

Pull-Request has been merged by adamwill

4 years ago
Metadata