#56 Issue #34: Improve handling of duplicate components
Merged 6 years ago by ncoghlan. Opened 6 years ago by ncoghlan.
modularity/ ncoghlan/fedmod issue-34-handle-duplicate-components  into  master

file modified
+18 -25
@@ -158,14 +158,13 @@ 

          _download_metadata_files(repo_definition)

      _download_bootstrap_modulemd()

  

- _SRPM_REVERSE_LOOKUP = {}  # SRPM name : module name

- _RPM_REVERSE_LOOKUP = {}   # RPM name : module name

- _BETTER_SRPM_REVERSE_LOOKUP = {}  # SRPM name : [module names]

- _BETTER_RPM_REVERSE_LOOKUP = {}   # RPM name : [module names]

- _BOOTSTRAP_REVERSE_LOOKUP = {}

+ _SRPM_REVERSE_LOOKUP = {}  # SRPM name : [module names]

+ _RPM_REVERSE_LOOKUP = {}   # RPM name : [module names]

+ _BOOTSTRAP_COMPONENTS = set()

  _MODULE_FORWARD_LOOKUP = {}

  def _populate_module_reverse_lookup():

-     # TODO: Cache the reverse mapping as a JSON file, as with _BOOTSTRAP_REVERSE_LOOKUP_CACHE

+     # TODO: Construct and cache the reverse mapping as a JSON file as part of

+     #       download_repo_metadata(), as with _BOOTSTRAP_REVERSE_LOOKUP_CACHE

      if _RPM_REVERSE_LOOKUP:

          return

      metadata_dir = os.path.join(_x86_64_MODULE_INFO.local_cache_path)
@@ -190,29 +189,19 @@ 

      for module in modules:

          _MODULE_FORWARD_LOOKUP[module.name] = module

          for srpmname in module.components.rpms:

-             # This isn't entirely valid, as it doesn't account for multiple

-             # modules that include the same source RPM with different output

-             # filters (e.g. python3-ecosystem vs python2-ecosystem)

-             _SRPM_REVERSE_LOOKUP[srpmname] = module.name

-             if srpmname not in _BETTER_SRPM_REVERSE_LOOKUP:

-                 _BETTER_SRPM_REVERSE_LOOKUP[srpmname] = []

-             _BETTER_SRPM_REVERSE_LOOKUP[srpmname].append(module.name)

+             srpm_entry = _SRPM_REVERSE_LOOKUP.setdefault(srpmname, [])

+             srpm_entry.append(module.name)

          for rpmname in module.artifacts.rpms:

-             # This is only valid for module sets that are guaranteed to be

-             # fully coinstallable, and hence only allow any given RPM to be

-             # published by at most one module

              rpmprefix = rpmname.split(":", 1)[0].rsplit("-", 1)[0]

-             _RPM_REVERSE_LOOKUP[rpmprefix] = module.name

-             if rpmprefix not in _BETTER_RPM_REVERSE_LOOKUP:

-                 _BETTER_RPM_REVERSE_LOOKUP[rpmprefix] = []

-             _BETTER_RPM_REVERSE_LOOKUP[rpmprefix].append(module.name)

+             rpm_entry = _RPM_REVERSE_LOOKUP.setdefault(rpmprefix, [])

+             rpm_entry.append(module.name)

      # Read the extra RPM bootstrap metadata

      if not os.path.exists(_BOOTSTRAP_REVERSE_LOOKUP_CACHE):

          msg = (f"{_BOOTSTRAP_REVERSE_LOOKUP_CACHE!r} does not exist. "

                  "Try running `fedmod fetch-metadata` again.")

          raise MissingMetadata(msg)

      with open(_BOOTSTRAP_REVERSE_LOOKUP_CACHE, "r") as cachefile:

-         _BOOTSTRAP_REVERSE_LOOKUP.update(json.load(cachefile))

+         _BOOTSTRAP_COMPONENTS.update(json.load(cachefile))

  

  def list_modules():

      return _MODULE_FORWARD_LOOKUP.keys()
@@ -223,17 +212,21 @@ 

      return set()

  

  def get_modules_for_rpm(rpm_name):

-     result = _BETTER_RPM_REVERSE_LOOKUP.get(rpm_name)

+     result = _RPM_REVERSE_LOOKUP.get(rpm_name)

      return result

  

  def get_module_for_rpm(rpm_name, *, allow_bootstrap=False):

      result = _RPM_REVERSE_LOOKUP.get(rpm_name)

-     if allow_bootstrap and result is None:

-         _BOOTSTRAP_REVERSE_LOOKUP.get(rpm_name)

+     if result is not None:

+         if len(result) > 1:

+             log.warn(f"Multiple modules found for {rpm_name!r}: {','.join(result)}")

+         result = result[0]

+     elif allow_bootstrap and rpm_name in _BOOTSTRAP_COMPONENTS:

+         result = "bootstrap"

      return result

  

  def get_rpm_reverse_lookup():

-     return _BETTER_RPM_REVERSE_LOOKUP

+     return _RPM_REVERSE_LOOKUP

  

  class Repo(object):

      def __init__(self, name, metadata_path):

file modified
+2
@@ -35,6 +35,8 @@ 

      """Utilities for working with modular Fedora RPM repositories"""

      if verbose:

          logging.basicConfig(level=logging.INFO)

+     else:

+         logging.basicConfig(level=logging.WARN)

  

  

  # Fetching metadata

@@ -107,16 +107,10 @@ 

          expected_components = set(input_rpms)

          expected_components |= {

              'systemtap',

-             'multilib-rpm-config',

              'libselinux',

-             'libuv',

-             'jsoncpp',

-             'python-nose',

              'numpy',

-             'openblas',

              'Judy',

              'setools',

-             'rhash',

              'pyparsing',

              'boost',

              'libsemanage',
@@ -127,14 +121,12 @@ 

          # Expected module dependencies

          expected_build_deps = {

              'platform',

+             'bootstrap',

              'mariadb',

-             'python2',

              'networking-base',

              'host',

-             'python3',

              'perl',

              'installer',

-             'mariadb',

          }

          assert set(modmd.buildrequires) == expected_build_deps

          assert set(modmd.requires) == {'platform', 'perl', 'mariadb'}
@@ -192,7 +184,7 @@ 

          assert set(modmd.components.rpms) == expected_components

  

          # Expected module dependencies

-         # graphite-web is currently generating runtime module dependencies

+         # graphite-web is currently generating more runtime module dependencies

          # than expected, so it makes for an interesting test case to look for

          # cases where fedmod is picking up module implementation details

  
@@ -200,7 +192,8 @@ 

              'platform',

              'fonts',

              'python2',

-             'python2-ecosystem'

+             'python2-ecosystem',

+             'bootstrap'

          }

          _unexpected_build_requires = {

              'pki',
@@ -210,7 +203,6 @@ 

              'installer',

              'X11-base',

              'freeipa',

-             'cloud-init',

              'perl',

              'fonts',

              'freeipa',
@@ -223,12 +215,10 @@ 

              'platform',

              'httpd',

              'fonts',

-             'python2',

          }

          _unexpected_requires = {

              'pki',

              'samba',

-             'python3-ecosystem',

              'installer',

              'X11-base',

              'freeipa',

  • single reverse lookup tables from SRPMs and RPMs to modules
  • simplify bootstrap lookup to just a set
  • actually report hits from the bootstrap module properly
  • print a warning to stderr for ambiguous module lookups
  • adjust tests for the cleaner resulting draft modules

There's no test for the "ambiguous module lookup" warning, as I'd expect that to be incredibly fragile in the absence of a synthetic data set purely for testing, but I checked it interactively, and it gives clear ambiguity warnings when generating draft modulemd files for mariadb and graphite-web.

Pull-Request has been merged by ncoghlan

6 years ago