#90 Add summarize-module tool
Merged 5 months ago by nphilipp. Opened 5 months ago by rdossant.
modularity/ rdossant/fedmod summarize-module  into  master

file modified
+68 -24

@@ -140,10 +140,16 @@ 

      return DistroPaths(release_name, arch)

  

  _MODULE_FORWARD_LOOKUP_CACHE = "module-contents"

+ _PROFILE_FORWARD_LOOKUP_CACHE = "module-profiles"

+ _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE = "stream-defaults"

+ _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE = "profile-defaults"

  _SRPM_REVERSE_LOOKUP_CACHE = "srpm-to-module"

  _RPM_REVERSE_LOOKUP_CACHE = "rpm-to-module"

  

  _ALL_CACHES = [_MODULE_FORWARD_LOOKUP_CACHE,

+                _PROFILE_FORWARD_LOOKUP_CACHE,

+                _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE,

+                _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE,

                 _SRPM_REVERSE_LOOKUP_CACHE,

                 _RPM_REVERSE_LOOKUP_CACHE]

  

@@ -240,46 +246,78 @@ 

      with gzip.open(repo_modulemd_fname, "rt") as modules_yaml_gz:

          modules_yaml = modules_yaml_gz.read()

  

-     objects = Modulemd.objects_from_string(modules_yaml)

-     return (o for o in objects if isinstance(o,  Modulemd.Module))

+     objects, _ = Modulemd.index_from_string(modules_yaml)

+     return objects.values() # a list of ImprovedModule objects

  

- def _merge_modules(module_sets):

+ def merge_modules(module_set):

+     """

+     Given a list of ModuleStream objects, "merge" them by picking only the

+     ModuleStream with highest version

+     """

      modules = dict()

  

-     for module_set in module_sets:

-         for m in module_set:

-             old = modules.get((m.props.name, m.props.stream))

-             if not old or m.props.version > old.props.version:

-                 modules[(m.props.name, m.props.stream)] = m

+     for m in module_set:

+         old = modules.get((m.props.name, m.props.stream))

+         if not old or m.props.version > old.props.version:

+             modules[(m.props.name, m.props.stream)] = m

  

      return modules.values()

  

  def _write_lookup_caches(paths):

-     module_sets = [modules_read

-                    for modules_read in (

-                        _read_modules(repopaths.arch)

-                        for repopaths in paths.repo_paths_by_name.values())

-                    if modules_read]

+     index_sets = [index_read

+                   for index_read in (

+                       _read_modules(repopaths.arch)

+                       for repopaths in paths.repo_paths_by_name.values())

+                   if index_read]

  

      module_forward_lookup = {}

      srpm_reverse_lookup = defaultdict(list)

      rpm_reverse_lookup = defaultdict(list)

+     # {module-name: {stream: [profiles]}}

+     profile_forward_lookup = defaultdict(dict)

+     # {module-name: stream}

+     stream_defaults_forward_lookup = {}

+     # {module-name: {stream : [profiles]}}

+     profile_defaults_forward_lookup = defaultdict(dict)

+ 

+     for index_set in index_sets:

+         for index in index_set:

+             module_name = index.get_name()

+             # What we think of as module is a ModuleStream for libmodulemd

+             # We are ignoring context and using the stream with highest version

+             for module in merge_modules(index.get_streams().values()):

+                 sname = module.get_stream()

+                 artifacts = module.props.rpm_artifacts.get()

+                 module_forward_lookup[module_name] = list(set(artifacts))

+                 components = module.get_rpm_components() #module.props.components_rpm

+                 for srpmname in components:

+                     srpm_reverse_lookup[srpmname].append(module_name)

+                 for rpmname in artifacts:

+                     rpmprefix = rpmname.split(":", 1)[0].rsplit("-", 1)[0]

+                     rpm_reverse_lookup[rpmprefix].append(module_name)

+ 

+                 # {'name': ModuleProfile}

+                 profiles = list(module.get_profiles().keys()) # only names

+                 profile_forward_lookup[module_name][sname] = profiles

+ 

+             defaults = index.get_defaults()

+             if not defaults:

+                 continue

+ 

+             stream_defaults_forward_lookup[module_name] = defaults.peek_default_stream()

+             # Default profiles for each stream in the module

+             for s, pset in defaults.peek_profile_defaults().items():

+                 profile_defaults_forward_lookup[module_name][s] = pset.get()

  

-     for module in _merge_modules(module_sets):

-         module_name = module.props.name

-         artifacts = module.props.rpm_artifacts.get()

- 

-         module_forward_lookup[module_name] = list(set(artifacts))

-         components = module.props.components_rpm

-         for srpmname in components:

-             srpm_reverse_lookup[srpmname].append(module_name)

-         for rpmname in artifacts:

-             rpmprefix = rpmname.split(":", 1)[0].rsplit("-", 1)[0]

-             rpm_reverse_lookup[rpmprefix].append(module_name)

  

      # Cache the lookup tables as local JSON files

      print("Caching lookup tables")

      _write_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE, module_forward_lookup)

+     _write_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE, profile_forward_lookup)

+     _write_cache(paths, _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE,

+                 stream_defaults_forward_lookup)

+     _write_cache(paths, _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE,

+                 profile_defaults_forward_lookup)

      _write_cache(paths, _SRPM_REVERSE_LOOKUP_CACHE, srpm_reverse_lookup)

      _write_cache(paths, _RPM_REVERSE_LOOKUP_CACHE, rpm_reverse_lookup)

  

@@ -300,6 +338,9 @@ 

      srpm_to_modules = attrib(type=dict)

      rpm_to_modules = attrib(type=dict)

      module_to_packages = attrib(type=dict)

+     module_to_profiles = attrib(type=dict)

+     stream_defaults = attrib(type=dict)

+     profile_defaults = attrib(type=dict)

      repo_cache_paths = attrib(type=dict)

  

  

@@ -333,6 +374,9 @@ 

          srpm_to_modules=_read_cache(paths, _SRPM_REVERSE_LOOKUP_CACHE),

          rpm_to_modules=_read_cache(paths, _RPM_REVERSE_LOOKUP_CACHE),

          module_to_packages=_read_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE),

+         module_to_profiles=_read_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE),

+         stream_defaults=_read_cache(paths, _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE),

+         profile_defaults=_read_cache(paths, _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE),

          repo_cache_paths={

              n: (c.arch.local_cache_path, c.src.local_cache_path)

              for n, c in paths.repo_paths_by_name.items()

file modified
+9 -9

@@ -21,7 +21,7 @@ 

      global _ACTIVE_DATASET

      _ACTIVE_DATASET = load_cached_repodata(dataset_name)

  

- def _get_dataset():

+ def get_dataset():

      if _ACTIVE_DATASET is None:

          _load_dataset()

      return _ACTIVE_DATASET

@@ -41,17 +41,17 @@ 

      dataset_name = name

  

  def list_modules():

-     return _get_dataset().module_to_packages.keys()

+     return get_dataset().module_to_packages.keys()

  

  def get_rpms_in_module(module_name):

-     return _get_dataset().module_to_packages.get(module_name, [])

+     return get_dataset().module_to_packages.get(module_name, [])

  

  def get_modules_for_rpm(rpm_name):

-     result = _get_dataset().rpm_to_modules.get(rpm_name)

+     result = get_dataset().rpm_to_modules.get(rpm_name)

      return result

  

  def get_module_for_rpm(rpm_name):

-     result = _get_dataset().rpm_to_modules.get(rpm_name)

+     result = get_dataset().rpm_to_modules.get(rpm_name)

      if result is not None:

          if len(result) > 1:

              log.warn(f"Multiple modules found for {rpm_name!r}: {','.join(result)}")

@@ -59,7 +59,7 @@ 

      return result

  

  def get_rpm_reverse_lookup():

-     return _get_dataset().rpm_to_modules

+     return get_dataset().rpm_to_modules

  

  class Repo(object):

      def __init__(self, name, metadata_path):

@@ -91,7 +91,7 @@ 

              path = "{}-{}.solvx".format(path, ext)

          else:

              path = "{}.solv".format(path)

-         return os.path.join(_get_dataset().cache_dir, path.replace("/", "_"))

+         return os.path.join(get_dataset().cache_dir, path.replace("/", "_"))

  

      def usecachedrepo(self, ext, mark=False):

          try:

@@ -135,7 +135,7 @@ 

          tmpname = None

          try:

              fd, tmpname = tempfile.mkstemp(prefix=".newsolv-",

-                                            dir=_get_dataset().cache_dir)

+                                            dir=get_dataset().cache_dir)

              os.fchmod(fd, 0o444)

              f = os.fdopen(fd, "wb+")

              f = solv.xfopen_fd(None, f.fileno())

@@ -287,7 +287,7 @@ 

      return False

  

  def setup_repos():

-     dataset = _get_dataset()

+     dataset = get_dataset()

  

      repos = []

      for reponame, (arch_cache_path, src_cache_path) in (

file modified
+12

@@ -7,6 +7,7 @@ 

  from .flatpak_generator import FlatpakGenerator, do_flatpak_report

  from .module_generator import ModuleGenerator

  from .module_repoquery import ModuleRepoquery

+ from .modulemd_summarizer import summarize_modules

  

  from . import modulemd_linter as mmdl

  from . import _depchase, _repodata, _fetchrepodata

@@ -173,3 +174,14 @@ 

      """Validates a given modulemd YAML file"""

      linter = mmdl.ModuleMDLinter(modulemd_path=modulemd)

      return linter.lint(min_level=min_level)

+ 

+ 

+ # Summarize a list of modulemd files

+ @_cli_commands.command('summarize-module')

+ @click.argument("modules", metavar='MODULES', nargs=-1, required=False)

+ @click.option("--add-file", "-f", metavar="FILE", multiple=True,

+               type=click.Path(exists=True),

+               help="Additional modulemd files to check. Can be given multiple times")

+ def summarize(modules, add_file):

+     """Prints a summary of available modules"""

+     summarize_modules(modules, add_file)

@@ -0,0 +1,104 @@ 

+ # -*- coding: utf-8 -*-

+ #

+ # modulemd_summarizer - Prints a summary of ModuleMD files

+ #

+ # Copyright © 2018 Red Hat, Inc.

+ #

+ # This program is free software: you can redistribute it and/or modify

+ # it under the terms of the GNU General Public License as published by

+ # the Free Software Foundation, either version 3 of the License, or

+ # (at your option) any later version.

+ #

+ # This program is distributed in the hope that it will be useful,

+ # but WITHOUT ANY WARRANTY; without even the implied warranty of

+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

+ # GNU General Public License for more details.

+ #

+ # You should have received a copy of the GNU General Public License

+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.

+ #

+ # Author:

+ # Rafael dos Santos <rdossant@redhat.com>

+ 

+ import smartcols

+ 

+ import gi

+ gi.require_version('Modulemd', '1.0')

+ from gi.repository import Modulemd

+ 

+ from ._repodata import get_dataset

+ from ._fetchrepodata import merge_modules

+ 

+ 

+ def print_summary(profiles, sdefaults=None, pdefaults=None, restrict_to=None):

+     sdefaults = sdefaults or {}

+     pdefaults = pdefaults or {}

+     restrict_to = restrict_to or []

+ 

+     tb = smartcols.Table()

+     cl = tb.new_column('Name')

+     cl_strm = tb.new_column('Stream')

+     cl_prof = tb.new_column('Profiles')

+     for modname, sdict in sorted(profiles.items()):

+         if restrict_to and modname not in restrict_to:

+             continue

+ 

+         def is_def_strm(s):

+             return s == sdefaults.get(modname, '')

+ 

+         for sname, plist in sorted(sdict.items()):

+             ln = tb.new_line()

+             ln[cl] = modname

+             ln[cl_strm] = sname + ' [d]' * is_def_strm(sname)

+ 

+             def is_def_prof(p):

+                 return p in pdefaults.get(modname, {}).get(sname, [])

+ 

+             ln[cl_prof] = ', '.join(p + ' [d]' if is_def_prof(p) else p

+                                     for p in plist)

+ 

+     print(tb)

+     print('\nHint: [d]efault')

+ 

+ 

+ def _add_module_metadata(yaml_files, profiles, dstreams, dprofiles):

+     for yaml in yaml_files:

+         assert yaml.endswith('.yaml'), "Not a yaml file: {}".format(yaml)

+ 

+         mmd_index, failures = Modulemd.index_from_file(yaml)

+         assert len(failures) == 0, failures

+         for module_name, index in mmd_index.items():

+             for module in merge_modules(index.get_streams().values()):

+                 stream_name = module.get_stream()

+                 plist = list(module.get_profiles().keys())

+                 profiles.setdefault(module_name, {}).setdefault(

+                         stream_name, []).extend(plist)

+ 

+             defaults = index.get_defaults()

+             if not defaults:

+                 continue

+ 

+             # Local module metadata can overwrite metadata from repo

+             dstreams[module_name] = defaults.peek_default_stream()

+             for s, pset in defaults.peek_profile_defaults().items():

+                 dprofiles[module_name][s] = pset.get()

+ 

+ 

+ def summarize_modules(restrict_list=None, yaml_files=None):

+     """

+     Load Modulemd objects from each repository in repo_list and print a summary

+     of the modules found with a summary of their streams and profiles.

+ 

+     *restrict_list*: if present, restricts output to modules supplied

+ 

+     *yaml_files*: additional yaml files to parse and include in the summary

+     """

+ 

+     profiles = get_dataset().module_to_profiles

+     dstreams = get_dataset().stream_defaults

+     dprofiles = get_dataset().profile_defaults

+ 

+     if yaml_files:

+         _add_module_metadata(yaml_files, profiles, dstreams, dprofiles)

+ 

+     print_summary(profiles, dstreams, dprofiles, restrict_list)

@@ -0,0 +1,54 @@ 

+ """In-process tests for the module summary functionality"""

+ 

+ import re

+ import pytest

+ import os.path

+ from _fedmod.modulemd_summarizer import summarize_modules

+ 

+ 

+ testfiles_dir = os.path.join(os.path.dirname(__file__), 'files')

+ spec_v1_yaml_path = os.path.join(testfiles_dir, 'spec.v1.yaml')

+ 

+ 

+ @pytest.mark.needs_metadata

+ class TestModuleSummary(object):

+ 

+     def matches(self, mod, strm, prof, out):

+         return re.search(fr'^{mod}\s+{strm}\s+{prof}$', out, re.M) is not None

+ 

+     # FIXME: we should mock the fetched metadata so that these tests do not

+     # fail when the metadata changes

+     def test_summarize_modules(self, capfd):

+         summarize_modules()

+         out, err = capfd.readouterr()

+ 

+         assert self.matches('reviewboard', '2.5',

+                             r'server, default \[d\]',

+                             out)

+         assert self.matches('reviewboard', '3.0',

+                             r'server, default \[d\]',

+                             out)

+         assert self.matches('testmodule', 'master', 'default', out)

+ 

+     def test_summarize_modules_restricted(self, capfd):

+         summarize_modules(['reviewboard', 'django'])

+         out, err = capfd.readouterr()

+ 

+         assert self.matches('reviewboard', '2.5',

+                             r'server, default \[d\]',

+                             out)

+         assert self.matches('reviewboard', '3.0',

+                             r'server, default \[d\]',

+                             out)

+         assert self.matches('django', '1.6',

+                             r'python2_development, default \[d\]',

+                             out)

+         assert not self.matches('testmodule', 'master', 'default', out)

+ 

+     def test_summarize_modules_local_files(self, capfd):

+         summarize_modules(yaml_files=[spec_v1_yaml_path])

+         out, err = capfd.readouterr()

+ 

+         assert self.matches('testmodule', 'master', 'default', out)

+         assert self.matches('foo', 'stream-name', 'minimal, buildroot, ' +

+                             'container, srpm-buildroot, default', out)

Metadata Update from @nphilipp:
- Request assigned

5 months ago
  1. dict is a type... (*)
  2. function/method default values shouldn't be mutable--yeah, I know the current code doesn't touch them, but as a matter of principle

E.g. rather do it like this:

def print_summary(profiles, sdefaults=None, pdefaults=None, restrict_to=None):
    if sdefaults is None:
        sdefaults = {}
    if pdefaults is None:
        pdefaults = {}
    if restrict_to is None:
        restrict_to = []
    ...

(*): with sdefaults=dict above, this will cause an exception if no value is provided

You might want to fix the preceding lines. :wink:

I like using comments to describe the structure of the dicts!

There should be a space after the comma.

PEP8 advises two blank lines between top-level "things", i.e. import blocks, classes, functions.

I won't comment every new PEP8 violation because flake8 flags so many of them (both in the existing code and this PR). If you prefer, I can fix the PEP8 issues before committing (I'll want to rebase your PR on top of the current master branch anyway).

This looks like it's lifted from _repodata.py. Let's use these functions instead and import them here.

Ugh, I really don't like the nested function + map here. How about using a list comprehension instead:

    ln[cl_prof] = ", ".join(p + " [d]"
                            for p in plist
                            if pdefaults.get(modname, {}).get(sname, [])
                            else p)

Same issue with mutable default values as above.

If we don't reuse the compiled regex, we can skip that step. Additionally, because we only target Python 3.6+, we can use f-strings here, too, and make the code a bit less dense:

    def matches(self, mod, strm, prof, out):
        return (
            re.search(fr'^{mod}\s+{strm}\s+{profile}$', out, re.MULTILINE)
            is not None
        )

Ugh, I really don't like the nested function + map here. How about using a list comprehension instead:
ln[cl_prof] = ", ".join(p + " [d]"
for p in plist
if pdefaults.get(modname, {}).get(sname, [])
else p)

Don't know why you don't like the map, I find it more readable than the list comprehension. Anyway I'll change it.

rebased onto 0b54e37

5 months ago

7 new commits added

  • module-summarizer: add tests
  • Add tool for displaying summary of modules
  • fetchrepodata: make merge_modules public
  • repodata: make get_dataset public
  • Add lookup for default stream and profiles
  • Add lookup for module profiles in cached data
  • Use new ImprovedModule available in libmodulemd
5 months ago

7 new commits added

  • module-summarizer: add tests
  • Add tool for displaying summary of modules
  • fetchrepodata: make merge_modules public
  • repodata: make get_dataset public
  • Add lookup for default stream and profiles
  • Add lookup for module profiles in cached data
  • Use new ImprovedModule available in libmodulemd
5 months ago

Don't know why you don't like the map, I find it more readable than the list comprehension. Anyway I'll change it.

Thanks! I guess using map() vs. comprehensions can be a matter of taste. The main reason I prefer the latter is because it's easier for novices to understand the code.

I've noticed one other thing, i.e. that you use spec.v1.yaml in the test suite rather thanspec.v2.yaml but I'll just fix that afterwards.

Pull-Request has been merged by nphilipp

5 months ago