#91 Improve summarize-module tool
Closed 8 months ago by nphilipp. Opened 8 months ago by rdossant.
modularity/ rdossant/fedmod summarize-module  into  master

file modified
+22 -7

@@ -172,6 +172,7 @@ 

  

  _MODULE_FORWARD_LOOKUP_CACHE = "module-contents"

  _PROFILE_FORWARD_LOOKUP_CACHE = "module-profiles"

+ _MODULE_DEPENDENCIES_FORWARD_LOOKUP_CACHE = "module-dependencies"

  _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE = "stream-defaults"

  _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE = "profile-defaults"

  _SRPM_REVERSE_LOOKUP_CACHE = "srpm-to-module"

@@ -179,6 +180,7 @@ 

  

  _ALL_CACHES = [_MODULE_FORWARD_LOOKUP_CACHE,

                 _PROFILE_FORWARD_LOOKUP_CACHE,

+                _MODULE_DEPENDENCIES_FORWARD_LOOKUP_CACHE,

                 _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE,

                 _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE,

                 _SRPM_REVERSE_LOOKUP_CACHE,

@@ -327,20 +329,32 @@ 

      module_forward_lookup = {}

      srpm_reverse_lookup = defaultdict(list)

      rpm_reverse_lookup = defaultdict(list)

-     # {module-name: {stream: [profiles]}}

-     profile_forward_lookup = defaultdict(dict)

+     # {'module-name:stream:version:context': [profiles]}}

+     profile_forward_lookup = defaultdict(list)

      # {module-name: stream}

      stream_defaults_forward_lookup = {}

      # {module-name: {stream : [profiles]}}

      profile_defaults_forward_lookup = defaultdict(dict)

+     # {'module-name:stream:version:context': ['module-name:stream']}

+     module_dependencies_forward_lookup = defaultdict(list)

  

      for index_set in index_sets:

          for index in index_set:

              module_name = index.get_name()

+             for nsvc, module in index.get_streams().items():

+                 profiles = module.get_profiles().keys()

+                 profile_forward_lookup[nsvc] = sorted(profiles)

+ 

+                 for dep in module.get_dependencies():

+                     dset = set()

+                     for m, s in dep.peek_requires().items():

+                         dset.add(f"{m}:{','.join(s.get())}"

+                                  if len(s.get()) else m)

+                     module_dependencies_forward_lookup[nsvc] = sorted(dset)

+ 

              # What we think of as module is a ModuleStream for libmodulemd

              # We are ignoring context and using the stream with highest version

              for module in merge_modules(index.get_streams().values()):

-                 sname = module.get_stream()

                  artifacts = module.props.rpm_artifacts.get()

                  module_forward_lookup[module_name] = list(set(artifacts))

  

@@ -353,10 +367,6 @@ 

                      rpmprefix = rpmname.split(":", 1)[0].rsplit("-", 1)[0]

                      rpm_reverse_lookup[rpmprefix].append(module_name)

  

-                 # {'name': ModuleProfile}

-                 profiles = list(module.get_profiles().keys())  # only names

-                 profile_forward_lookup[module_name][sname] = profiles

- 

              defaults = index.get_defaults()

              if not defaults:

                  continue

@@ -371,6 +381,8 @@ 

      print("Caching lookup tables")

      _write_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE, module_forward_lookup)

      _write_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE, profile_forward_lookup)

+     _write_cache(paths, _MODULE_DEPENDENCIES_FORWARD_LOOKUP_CACHE,

+                  module_dependencies_forward_lookup)

      _write_cache(paths, _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE,

                   stream_defaults_forward_lookup)

      _write_cache(paths, _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE,

@@ -397,6 +409,7 @@ 

      rpm_to_modules = attrib(type=dict)

      module_to_packages = attrib(type=dict)

      module_to_profiles = attrib(type=dict)

+     module_to_deps = attrib(type=dict)

      stream_defaults = attrib(type=dict)

      profile_defaults = attrib(type=dict)

      repo_cache_paths = attrib(type=dict)

@@ -433,6 +446,8 @@ 

          rpm_to_modules=_read_cache(paths, _RPM_REVERSE_LOOKUP_CACHE),

          module_to_packages=_read_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE),

          module_to_profiles=_read_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE),

+         module_to_deps=_read_cache(paths,

+                                    _MODULE_DEPENDENCIES_FORWARD_LOOKUP_CACHE),

          stream_defaults=_read_cache(paths,

                                      _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE),

          profile_defaults=_read_cache(paths,

file modified
+25 -9

@@ -21,7 +21,7 @@ 

      _ACTIVE_DATASET = load_cached_repodata(dataset_name)

  

  

- def get_dataset():

+ def _get_dataset():

      if _ACTIVE_DATASET is None:

          _load_dataset()

      return _ACTIVE_DATASET

@@ -44,20 +44,20 @@ 

  

  

  def list_modules():

-     return get_dataset().module_to_packages.keys()

+     return _get_dataset().module_to_packages.keys()

  

  

  def get_rpms_in_module(module_name):

-     return get_dataset().module_to_packages.get(module_name, [])

+     return _get_dataset().module_to_packages.get(module_name, [])

  

  

  def get_modules_for_rpm(rpm_name):

-     result = get_dataset().rpm_to_modules.get(rpm_name)

+     result = _get_dataset().rpm_to_modules.get(rpm_name)

      return result

  

  

  def get_module_for_rpm(rpm_name):

-     result = get_dataset().rpm_to_modules.get(rpm_name)

+     result = _get_dataset().rpm_to_modules.get(rpm_name)

      if result is not None:

          if len(result) > 1:

              log.warn(

@@ -67,7 +67,23 @@ 

  

  

  def get_rpm_reverse_lookup():

-     return get_dataset().rpm_to_modules

+     return _get_dataset().rpm_to_modules

+ 

+ 

+ def get_modules_profiles_lookup():

+     return _get_dataset().module_to_profiles

+ 

+ 

+ def get_modules_dependencies_lookup():

+     return _get_dataset().module_to_deps

+ 

+ 

+ def get_modules_default_streams_lookup():

+     return _get_dataset().stream_defaults

+ 

+ 

+ def get_modules_default_profiles_lookup():

+     return _get_dataset().profile_defaults

  

  

  class Repo(object):

@@ -100,7 +116,7 @@ 

              path = "{}-{}.solvx".format(path, ext)

          else:

              path = "{}.solv".format(path)

-         return os.path.join(get_dataset().cache_dir, path.replace("/", "_"))

+         return os.path.join(_get_dataset().cache_dir, path.replace("/", "_"))

  

      def usecachedrepo(self, ext, mark=False):

          try:

@@ -145,7 +161,7 @@ 

          tmpname = None

          try:

              fd, tmpname = tempfile.mkstemp(prefix=".newsolv-",

-                                            dir=get_dataset().cache_dir)

+                                            dir=_get_dataset().cache_dir)

              os.fchmod(fd, 0o444)

              f = os.fdopen(fd, "wb+")

              f = solv.xfopen_fd(None, f.fileno())

@@ -311,7 +327,7 @@ 

  

  

  def setup_repos():

-     dataset = get_dataset()

+     dataset = _get_dataset()

  

      repos = []

      for reponame, (arch_cache_path, src_cache_path) in (

file modified
+4 -2

@@ -198,6 +198,8 @@ 

                type=click.Path(exists=True),

                help="Additional modulemd files to check."

                     " Can be given multiple times.")

- def summarize(modules, add_file):

+ @click.option("--tree", "-t", is_flag=True, default=False,

+               help="Print output as a tree")

+ def summarize(modules, add_file, tree):

      """Prints a summary of available modules"""

-     summarize_modules(modules, add_file)

+     summarize_modules(modules, add_file, tree)

file modified
+60 -27

@@ -26,53 +26,84 @@ 

  

  import smartcols

  

- from ._fetchrepodata import merge_modules

- from ._repodata import get_dataset

+ from . import _repodata

  

  

- def print_summary(profiles, sdefaults=None, pdefaults=None, restrict_to=None):

-     sdefaults = sdefaults or {}

-     pdefaults = pdefaults or {}

-     restrict_to = restrict_to or []

- 

+ def _print_summary(profiles, sdefaults, pdefaults, deps, restrict_to, as_tree):

      tb = smartcols.Table()

      cl = tb.new_column('Name')

+     cl.tree = as_tree

      cl_strm = tb.new_column('Stream')

+     cl_ver = tb.new_column('Version')

+     cl_ctxt = tb.new_column('Context')

      cl_prof = tb.new_column('Profiles')

-     for modname, sdict in sorted(profiles.items()):

+     cl_deps = tb.new_column('Dependencies')

+     parent_ln = {}

+     for nsvc, plist in sorted(profiles.items()):

+         modname, sname, version, context = nsvc.split(':')

+ 

          if restrict_to and modname not in restrict_to:

              continue

  

          def is_def_strm(s):

              return s == sdefaults.get(modname, '')

  

-         for sname, plist in sorted(sdict.items()):

-             ln = tb.new_line()

-             ln[cl] = modname

-             ln[cl_strm] = sname + ' [d]' * is_def_strm(sname)

+         if as_tree:

+             pl = parent_ln.get(modname, None)

+             if pl is None:

+                 pl = parent_ln.setdefault(modname, tb.new_line())

+             pl[cl] = modname

+         else:

+             pl = None

  

-             def is_def_prof(p):

-                 return p in pdefaults.get(modname, {}).get(sname, [])

+         ln = tb.new_line(pl)

+         ln[cl] = modname

+         ln[cl_strm] = sname + ' [d]' * is_def_strm(sname)

+         ln[cl_ver] = version

+         ln[cl_ctxt] = context

  

-             ln[cl_prof] = ', '.join(p + ' [d]' if is_def_prof(p) else p

-                                     for p in plist)

+         def is_def_prof(p):

+             return p in pdefaults.get(modname, {}).get(sname, [])

+ 

+         ln[cl_prof] = ', '.join(p + ' [d]' if is_def_prof(p) else p

+                                 for p in plist)

+ 

+         dlist = sorted(deps.get(nsvc, []))

+         if len(dlist) == 0:

+             continue

+         if len(dlist) == 1:

+             dlist = str(dlist[0])

+         else:

+             dlist = ','.join(dlist)

+         ln[cl_deps] = dlist

  

      print(tb)

      print('\nHint: [d]efault')

  

  

- def _add_module_metadata(yaml_files, profiles, dstreams, dprofiles):

+ def _add_module_metadata(yaml_files, profiles, dstreams, dprofiles, deps):

      for yaml in yaml_files:

          assert yaml.endswith('.yaml'), "Not a yaml file: {}".format(yaml)

  

          mmd_index, failures = Modulemd.index_from_file(yaml)

          assert len(failures) == 0, failures

+ 

          for module_name, index in mmd_index.items():

-             for module in merge_modules(index.get_streams().values()):

-                 stream_name = module.get_stream()

+             for module in index.get_streams().values():

+                 # local modulemd files might miss some info like context or

+                 # version. So let's make sure nsvc is consistent

+                 ctxt = module.get_context() or ''

+                 nsvc = f'{module_name}:{module.get_stream()}:' \

+                        f'{module.get_version()}:{ctxt}'

                  plist = list(module.get_profiles().keys())

-                 profiles.setdefault(module_name, {}).setdefault(

-                     stream_name, []).extend(plist)

+                 profiles[nsvc] = sorted(set(profiles.get(nsvc, []) + plist))

+ 

+                 for dep in module.get_dependencies():

+                     deplist = set(deps.get(nsvc, []))

+                     for m, s in dep.peek_requires().items():

+                         deplist.add(f"{m}:{','.join(s.get())}"

+                                     if len(s.get()) else m)

+                     deps[nsvc] = sorted(deplist)

  

              defaults = index.get_defaults()

              if not defaults:

@@ -84,7 +115,7 @@ 

                  dprofiles[module_name][s] = pset.get()

  

  

- def summarize_modules(restrict_list=None, yaml_files=None):

+ def summarize_modules(restrict_list=None, yaml_files=None, as_tree=False):

      """

      Load Modulemd objects from each repository in repo_list and print a summary

      of the modules found with a summary of their streams and profiles.

@@ -94,11 +125,13 @@ 

      *yaml_files*: additional yaml files to parse and include in the summary

      """

  

-     profiles = get_dataset().module_to_profiles

-     dstreams = get_dataset().stream_defaults

-     dprofiles = get_dataset().profile_defaults

+     profiles = _repodata.get_modules_profiles_lookup().copy()

+     dstreams = _repodata.get_modules_default_streams_lookup().copy()

+     dprofiles = _repodata.get_modules_default_profiles_lookup().copy()

+     deps = _repodata.get_modules_dependencies_lookup().copy()

  

      if yaml_files:

-         _add_module_metadata(yaml_files, profiles, dstreams, dprofiles)

+         _add_module_metadata(yaml_files, profiles, dstreams, dprofiles, deps)

  

-     print_summary(profiles, dstreams, dprofiles, restrict_list)

+     restrict_list = restrict_list or []

+     _print_summary(profiles, dstreams, dprofiles, deps, restrict_list, as_tree)

file modified
+30 -22

@@ -15,8 +15,9 @@ 

  @pytest.mark.needs_metadata

  class TestModuleSummary(object):

  

-     def matches(self, mod, strm, prof, out):

-         return re.search(fr'^{mod}\s+{strm}\s+{prof}$', out, re.M) is not None

+     def matches(self, mod, strm, ver, ctxt, prof, deps, out):

+         mstr = fr'^{mod}\s+{strm}\s+{ver}\s+{ctxt}\s+{prof}\s+{deps}$'

+         return re.search(mstr, out, re.M) is not None

  

      # FIXME: we should mock the fetched metadata so that these tests do not

      # fail when the metadata changes

@@ -24,33 +25,40 @@ 

          summarize_modules()

          out, err = capfd.readouterr()

  

-         assert self.matches('reviewboard', '2.5',

-                             r'server, default \[d\]',

-                             out)

-         assert self.matches('reviewboard', '3.0',

-                             r'server, default \[d\]',

-                             out)

-         assert self.matches('testmodule', 'master', 'default', out)

+         assert self.matches('reviewboard', '2.5', '20180828143308', '083bce86',

+                             r'default \[d\], server',

+                             'django:1.6,platform:f29', out)

+         assert self.matches('reviewboard', '3.0', '20180828143238', '083bce86',

+                             r'default \[d\], server',

+                             'django:1.6,platform:f29', out)

+         assert self.matches('testmodule', 'master', '20180405123256',

+                             'c2c572ec', 'default', 'platform:f29', out)

  

      def test_summarize_modules_restricted(self, capfd):

          summarize_modules(['reviewboard', 'django'])

          out, err = capfd.readouterr()

  

-         assert self.matches('reviewboard', '2.5',

-                             r'server, default \[d\]',

-                             out)

-         assert self.matches('reviewboard', '3.0',

-                             r'server, default \[d\]',

-                             out)

-         assert self.matches('django', '1.6',

-                             r'python2_development, default \[d\]',

-                             out)

-         assert not self.matches('testmodule', 'master', 'default', out)

+         assert self.matches('reviewboard', '2.5', '20180828143308', '083bce86',

+                             r'default \[d\], server',

+                             'django:1.6,platform:f29', out)

+         assert self.matches('reviewboard', '3.0', '20180828143238', '083bce86',

+                             r'default \[d\], server',

+                             'django:1.6,platform:f29', out)

+         assert self.matches('django', '1.6', '20180828135711', '6c81f848',

+                             r'default \[d\], python2_development',

+                             'platform:f29', out)

+         assert not self.matches('testmodule', 'master', '20180405123256',

+                                 'c2c572ec', 'default', 'platform:f29', out)

  

      def test_summarize_modules_local_files(self, capfd):

          summarize_modules(yaml_files=[spec_v2_yaml_path])

          out, err = capfd.readouterr()

  

-         assert self.matches('testmodule', 'master', 'default', out)

-         assert self.matches('foo', 'stream-name', 'minimal, buildroot, ' +

-                             'container, srpm-buildroot, default', out)

+         assert self.matches('testmodule', 'master', '20180405123256',

+                             'c2c572ec', 'default', 'platform:f29', out)

+         assert self.matches('foo', 'stream-name', '20160927144203', 'c0ffee43',

+                             'buildroot, container, default, minimal, ' +

+                             'srpm-buildroot', 'compatible:v3,v4,extras,' +

+                             'moreextras:bar,foo,platform:-epel7,-f27,-f28,' +

+                             'platform:epel7,platform:f27,platform:f28,' +

+                             'runtime:a,b', out)

  • Add context and version information
  • Add modular dependencies information
  • Add option to show table as a tree

Metadata Update from @nphilipp:
- Request assigned

8 months ago

Unfortunately, the test suite doesn't pass here. I've checked and commit da59d30 "mark print_summary as internal" is the first one that fails.

Since I fixed the last remaining PEP8 errors in the master branch, I want that both the flake8 tool and the test suite always run clean (i.e. on every commit), rather than have one commit in a PR introduce a bug which is fixed by a subsequent commit—this makes bisecting issues easier later on. If you want, I can fix the commits myself, I'd rebase onto the current master anyway. :wink:

rebased onto 3fe7bcd

8 months ago

6 new commits added

  • summarize-module: add modular dependency info
  • summarize-module: add version and context to output
  • Flatten the module profiles lookup dict
  • summarize-module: make shallow copy of lookup dicts
  • summarize-module: mark print_summary as internal
  • Revert "repodata: make get_dataset public"
8 months ago

Thanks for fixing the tests! I've applied your changes in commits c727c16..6add261, with some small changes regarding imports which flake8 flagged.

Pull-Request has been closed by nphilipp

8 months ago