PR#90: Add summarize-module tool - modularity/fedmod

		`@@ -140,10 +140,16 @@`
		`return DistroPaths(release_name, arch)`

		`_MODULE_FORWARD_LOOKUP_CACHE = "module-contents"`
		`+ _PROFILE_FORWARD_LOOKUP_CACHE = "module-profiles"`
		`+ _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE = "stream-defaults"`
		`+ _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE = "profile-defaults"`
		`_SRPM_REVERSE_LOOKUP_CACHE = "srpm-to-module"`
		`_RPM_REVERSE_LOOKUP_CACHE = "rpm-to-module"`

		`_ALL_CACHES = [_MODULE_FORWARD_LOOKUP_CACHE,`
		`+ _PROFILE_FORWARD_LOOKUP_CACHE,`
		`+ _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE,`
		`+ _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE,`
		`_SRPM_REVERSE_LOOKUP_CACHE,`
		`_RPM_REVERSE_LOOKUP_CACHE]`

		`@@ -240,46 +246,78 @@`
		`with gzip.open(repo_modulemd_fname, "rt") as modules_yaml_gz:`
		`modules_yaml = modules_yaml_gz.read()`

		`- objects = Modulemd.objects_from_string(modules_yaml)`
		`- return (o for o in objects if isinstance(o, Modulemd.Module))`
		`+ objects, _ = Modulemd.index_from_string(modules_yaml)`
		`+ return objects.values() # a list of ImprovedModule objects`

		`- def _merge_modules(module_sets):`
		`+ def merge_modules(module_set):`
		`+ """`
		`+ Given a list of ModuleStream objects, "merge" them by picking only the`
		`+ ModuleStream with highest version`
		`+ """`
		`modules = dict()`

		`- for module_set in module_sets:`
		`- for m in module_set:`
		`- old = modules.get((m.props.name, m.props.stream))`
		`- if not old or m.props.version > old.props.version:`
		`- modules[(m.props.name, m.props.stream)] = m`
		`+ for m in module_set:`
		`+ old = modules.get((m.props.name, m.props.stream))`
		`+ if not old or m.props.version > old.props.version:`
		`+ modules[(m.props.name, m.props.stream)] = m`

		`return modules.values()`

		`def _write_lookup_caches(paths):`
		`- module_sets = [modules_read`
		`- for modules_read in (`
		`- _read_modules(repopaths.arch)`
		`- for repopaths in paths.repo_paths_by_name.values())`
		`- if modules_read]`
		`+ index_sets = [index_read`
		`+ for index_read in (`
		`+ _read_modules(repopaths.arch)`
		`+ for repopaths in paths.repo_paths_by_name.values())`
		`+ if index_read]`

		`module_forward_lookup = {}`
		`srpm_reverse_lookup = defaultdict(list)`
		`rpm_reverse_lookup = defaultdict(list)`
		`+ # {module-name: {stream: [profiles]}}`
		`+ profile_forward_lookup = defaultdict(dict)`
		`+ # {module-name: stream}`
		`+ stream_defaults_forward_lookup = {}`
		`+ # {module-name: {stream : [profiles]}}`
		`+ profile_defaults_forward_lookup = defaultdict(dict)`
		`+`
		`+ for index_set in index_sets:`
		`+ for index in index_set:`
		`+ module_name = index.get_name()`
		`+ # What we think of as module is a ModuleStream for libmodulemd`
		`+ # We are ignoring context and using the stream with highest version`
		`+ for module in merge_modules(index.get_streams().values()):`
		`+ sname = module.get_stream()`
		`+ artifacts = module.props.rpm_artifacts.get()`
		`+ module_forward_lookup[module_name] = list(set(artifacts))`
		`+ components = module.get_rpm_components() #module.props.components_rpm`
		`+ for srpmname in components:`
		`+ srpm_reverse_lookup[srpmname].append(module_name)`
		`+ for rpmname in artifacts:`
		`+ rpmprefix = rpmname.split(":", 1)[0].rsplit("-", 1)[0]`
		`+ rpm_reverse_lookup[rpmprefix].append(module_name)`
		`+`
		`+ # {'name': ModuleProfile}`
		`+ profiles = list(module.get_profiles().keys()) # only names`
		`+ profile_forward_lookup[module_name][sname] = profiles`
		`+`
		`+ defaults = index.get_defaults()`
		`+ if not defaults:`
		`+ continue`
		`+`
		`+ stream_defaults_forward_lookup[module_name] = defaults.peek_default_stream()`
		`+ # Default profiles for each stream in the module`
		`+ for s, pset in defaults.peek_profile_defaults().items():`
		`+ profile_defaults_forward_lookup[module_name][s] = pset.get()`

		`- for module in _merge_modules(module_sets):`
		`- module_name = module.props.name`
		`- artifacts = module.props.rpm_artifacts.get()`
		`-`
		`- module_forward_lookup[module_name] = list(set(artifacts))`
		`- components = module.props.components_rpm`
		`- for srpmname in components:`
		`- srpm_reverse_lookup[srpmname].append(module_name)`
		`- for rpmname in artifacts:`
		`- rpmprefix = rpmname.split(":", 1)[0].rsplit("-", 1)[0]`
		`- rpm_reverse_lookup[rpmprefix].append(module_name)`

		`# Cache the lookup tables as local JSON files`
		`print("Caching lookup tables")`
		`_write_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE, module_forward_lookup)`
		`+ _write_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE, profile_forward_lookup)`
		`+ _write_cache(paths, _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE,`
		`+ stream_defaults_forward_lookup)`
		`+ _write_cache(paths, _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE,`
		`+ profile_defaults_forward_lookup)`
		`_write_cache(paths, _SRPM_REVERSE_LOOKUP_CACHE, srpm_reverse_lookup)`
		`_write_cache(paths, _RPM_REVERSE_LOOKUP_CACHE, rpm_reverse_lookup)`

		`@@ -300,6 +338,9 @@`
		`srpm_to_modules = attrib(type=dict)`
		`rpm_to_modules = attrib(type=dict)`
		`module_to_packages = attrib(type=dict)`
		`+ module_to_profiles = attrib(type=dict)`
		`+ stream_defaults = attrib(type=dict)`
		`+ profile_defaults = attrib(type=dict)`
		`repo_cache_paths = attrib(type=dict)`


		`@@ -333,6 +374,9 @@`
		`srpm_to_modules=_read_cache(paths, _SRPM_REVERSE_LOOKUP_CACHE),`
		`rpm_to_modules=_read_cache(paths, _RPM_REVERSE_LOOKUP_CACHE),`
		`module_to_packages=_read_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE),`
		`+ module_to_profiles=_read_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE),`
		`+ stream_defaults=_read_cache(paths, _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE),`
		`+ profile_defaults=_read_cache(paths, _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE),`
		`repo_cache_paths={`
		`n: (c.arch.local_cache_path, c.src.local_cache_path)`
		`for n, c in paths.repo_paths_by_name.items()`

_fedmod/_repodata.py

file modified

+9 -9

		`@@ -21,7 +21,7 @@`
		`global _ACTIVE_DATASET`
		`_ACTIVE_DATASET = load_cached_repodata(dataset_name)`

		`- def _get_dataset():`
		`+ def get_dataset():`
		`if _ACTIVE_DATASET is None:`
		`_load_dataset()`
		`return _ACTIVE_DATASET`
		`@@ -41,17 +41,17 @@`
		`dataset_name = name`

		`def list_modules():`
		`- return _get_dataset().module_to_packages.keys()`
		`+ return get_dataset().module_to_packages.keys()`

		`def get_rpms_in_module(module_name):`
		`- return _get_dataset().module_to_packages.get(module_name, [])`
		`+ return get_dataset().module_to_packages.get(module_name, [])`

		`def get_modules_for_rpm(rpm_name):`
		`- result = _get_dataset().rpm_to_modules.get(rpm_name)`
		`+ result = get_dataset().rpm_to_modules.get(rpm_name)`
		`return result`

		`def get_module_for_rpm(rpm_name):`
		`- result = _get_dataset().rpm_to_modules.get(rpm_name)`
		`+ result = get_dataset().rpm_to_modules.get(rpm_name)`
		`if result is not None:`
		`if len(result) > 1:`
		`log.warn(f"Multiple modules found for {rpm_name!r}: {','.join(result)}")`
		`@@ -59,7 +59,7 @@`
		`return result`

		`def get_rpm_reverse_lookup():`
		`- return _get_dataset().rpm_to_modules`
		`+ return get_dataset().rpm_to_modules`

		`class Repo(object):`
		`def __init__(self, name, metadata_path):`
		`@@ -91,7 +91,7 @@`
		`path = "{}-{}.solvx".format(path, ext)`
		`else:`
		`path = "{}.solv".format(path)`
		`- return os.path.join(_get_dataset().cache_dir, path.replace("/", "_"))`
		`+ return os.path.join(get_dataset().cache_dir, path.replace("/", "_"))`

		`def usecachedrepo(self, ext, mark=False):`
		`try:`
		`@@ -135,7 +135,7 @@`
		`tmpname = None`
		`try:`
		`fd, tmpname = tempfile.mkstemp(prefix=".newsolv-",`
		`- dir=_get_dataset().cache_dir)`
		`+ dir=get_dataset().cache_dir)`
		`os.fchmod(fd, 0o444)`
		`f = os.fdopen(fd, "wb+")`
		`f = solv.xfopen_fd(None, f.fileno())`
		`@@ -287,7 +287,7 @@`
		`return False`

		`def setup_repos():`
		`- dataset = _get_dataset()`
		`+ dataset = get_dataset()`

		`repos = []`
		`for reponame, (arch_cache_path, src_cache_path) in (`

_fedmod/cli.py

file modified

+12

		`@@ -7,6 +7,7 @@`
		`from .flatpak_generator import FlatpakGenerator, do_flatpak_report`
		`from .module_generator import ModuleGenerator`
		`from .module_repoquery import ModuleRepoquery`
		`+ from .modulemd_summarizer import summarize_modules`

		`from . import modulemd_linter as mmdl`
		`from . import _depchase, _repodata, _fetchrepodata`
		`@@ -173,3 +174,14 @@`
		`"""Validates a given modulemd YAML file"""`
		`linter = mmdl.ModuleMDLinter(modulemd_path=modulemd)`
		`return linter.lint(min_level=min_level)`
		`+`
		`+`
		`+ # Summarize a list of modulemd files`
		`+ @_cli_commands.command('summarize-module')`
		`+ @click.argument("modules", metavar='MODULES', nargs=-1, required=False)`
		`+ @click.option("--add-file", "-f", metavar="FILE", multiple=True,`
		`+ type=click.Path(exists=True),`
		`+ help="Additional modulemd files to check. Can be given multiple times")`
		`+ def summarize(modules, add_file):`
		`+ """Prints a summary of available modules"""`
		`+ summarize_modules(modules, add_file)`

_fedmod/modulemd_summarizer.py

file added

+104

		`@@ -0,0 +1,104 @@`
		`+ # -- coding: utf-8 --`
		`+ #`
		`+ # modulemd_summarizer - Prints a summary of ModuleMD files`
		`+ #`
		`+ # Copyright © 2018 Red Hat, Inc.`
		`+ #`
		`+ # This program is free software: you can redistribute it and/or modify`
		`+ # it under the terms of the GNU General Public License as published by`
		`+ # the Free Software Foundation, either version 3 of the License, or`
		`+ # (at your option) any later version.`
		`+ #`
		`+ # This program is distributed in the hope that it will be useful,`
		`+ # but WITHOUT ANY WARRANTY; without even the implied warranty of`
		`+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
		`+ # GNU General Public License for more details.`
		`+ #`
		`+ # You should have received a copy of the GNU General Public License`
		`+ # along with this program. If not, see <http://www.gnu.org/licenses/>.`
		`+ #`
		`+ # Author:`
		`+ # Rafael dos Santos <rdossant@redhat.com>`
		`+`
		`+ import smartcols`
		`+`
		`+ import gi`
		`+ gi.require_version('Modulemd', '1.0')`
		`+ from gi.repository import Modulemd`
		`+`
		`+ from ._repodata import get_dataset`
		`+ from ._fetchrepodata import merge_modules`
		`+`
		`+`
		`+ def print_summary(profiles, sdefaults=None, pdefaults=None, restrict_to=None):`
		`+ sdefaults = sdefaults or {}`
		`+ pdefaults = pdefaults or {}`
		`+ restrict_to = restrict_to or []`
		`+`
		`+ tb = smartcols.Table()`
		`+ cl = tb.new_column('Name')`
		`+ cl_strm = tb.new_column('Stream')`
		`+ cl_prof = tb.new_column('Profiles')`
		`+ for modname, sdict in sorted(profiles.items()):`
		`+ if restrict_to and modname not in restrict_to:`
		`+ continue`
		`+`
		`+ def is_def_strm(s):`
		`+ return s == sdefaults.get(modname, '')`
		`+`
		`+ for sname, plist in sorted(sdict.items()):`
		`+ ln = tb.new_line()`
		`+ ln[cl] = modname`
		`+ ln[cl_strm] = sname + ' [d]' * is_def_strm(sname)`
		`+`
		`+ def is_def_prof(p):`
		`+ return p in pdefaults.get(modname, {}).get(sname, [])`
		`+`
		`+ ln[cl_prof] = ', '.join(p + ' [d]' if is_def_prof(p) else p`
		`+ for p in plist)`
		`+`
		`+ print(tb)`
		`+ print('\nHint: [d]efault')`
		`+`
		`+`
		`+ def _add_module_metadata(yaml_files, profiles, dstreams, dprofiles):`
		`+ for yaml in yaml_files:`
		`+ assert yaml.endswith('.yaml'), "Not a yaml file: {}".format(yaml)`
		`+`
		`+ mmd_index, failures = Modulemd.index_from_file(yaml)`
		`+ assert len(failures) == 0, failures`
		`+ for module_name, index in mmd_index.items():`
		`+ for module in merge_modules(index.get_streams().values()):`
		`+ stream_name = module.get_stream()`
		`+ plist = list(module.get_profiles().keys())`
		`+ profiles.setdefault(module_name, {}).setdefault(`
		`+ stream_name, []).extend(plist)`
		`+`
		`+ defaults = index.get_defaults()`
		`+ if not defaults:`
		`+ continue`
		`+`
		`+ # Local module metadata can overwrite metadata from repo`
		`+ dstreams[module_name] = defaults.peek_default_stream()`
		`+ for s, pset in defaults.peek_profile_defaults().items():`
		`+ dprofiles[module_name][s] = pset.get()`
		`+`
		`+`
		`+ def summarize_modules(restrict_list=None, yaml_files=None):`
		`+ """`
		`+ Load Modulemd objects from each repository in repo_list and print a summary`
		`+ of the modules found with a summary of their streams and profiles.`
		`+`
		`+ restrict_list: if present, restricts output to modules supplied`
		`+`
		`+ yaml_files: additional yaml files to parse and include in the summary`
		`+ """`
		`+`
		`+ profiles = get_dataset().module_to_profiles`
		`+ dstreams = get_dataset().stream_defaults`
		`+ dprofiles = get_dataset().profile_defaults`
		`+`
		`+ if yaml_files:`
		`+ _add_module_metadata(yaml_files, profiles, dstreams, dprofiles)`
		`+`
		`+ print_summary(profiles, dstreams, dprofiles, restrict_list)`

tests/test_module_summary.py

file added

+54

		`@@ -0,0 +1,54 @@`
		`+ """In-process tests for the module summary functionality"""`
		`+`
		`+ import re`
		`+ import pytest`
		`+ import os.path`
		`+ from _fedmod.modulemd_summarizer import summarize_modules`
		`+`
		`+`
		`+ testfiles_dir = os.path.join(os.path.dirname(__file__), 'files')`
		`+ spec_v1_yaml_path = os.path.join(testfiles_dir, 'spec.v1.yaml')`
		`+`
		`+`
		`+ @pytest.mark.needs_metadata`
		`+ class TestModuleSummary(object):`
		`+`
		`+ def matches(self, mod, strm, prof, out):`
		`+ return re.search(fr'^{mod}\s+{strm}\s+{prof}$', out, re.M) is not None`
		`+`
		`+ # FIXME: we should mock the fetched metadata so that these tests do not`
		`+ # fail when the metadata changes`
		`+ def test_summarize_modules(self, capfd):`
		`+ summarize_modules()`
		`+ out, err = capfd.readouterr()`
		`+`
		`+ assert self.matches('reviewboard', '2.5',`
		`+ r'server, default \[d\]',`
		`+ out)`
		`+ assert self.matches('reviewboard', '3.0',`
		`+ r'server, default \[d\]',`
		`+ out)`
		`+ assert self.matches('testmodule', 'master', 'default', out)`
		`+`
		`+ def test_summarize_modules_restricted(self, capfd):`
		`+ summarize_modules(['reviewboard', 'django'])`
		`+ out, err = capfd.readouterr()`
		`+`
		`+ assert self.matches('reviewboard', '2.5',`
		`+ r'server, default \[d\]',`
		`+ out)`
		`+ assert self.matches('reviewboard', '3.0',`
		`+ r'server, default \[d\]',`
		`+ out)`
		`+ assert self.matches('django', '1.6',`
		`+ r'python2_development, default \[d\]',`
		`+ out)`
		`+ assert not self.matches('testmodule', 'master', 'default', out)`
		`+`
		`+ def test_summarize_modules_local_files(self, capfd):`
		`+ summarize_modules(yaml_files=[spec_v1_yaml_path])`
		`+ out, err = capfd.readouterr()`
		`+`
		`+ assert self.matches('testmodule', 'master', 'default', out)`
		`+ assert self.matches('foo', 'stream-name', 'minimal, buildroot, ' +`
		`+ 'container, srpm-buildroot, default', out)`

rdossant commented 5 years ago

Fixes modularity/fedmod#83

Metadata Update from @nphilipp:
- Request assigned

5 years ago

nphilipp commented on line 47 of _fedmod/modulemd_summarizer.py 5 years ago

dict is a type... (*)
function/method default values shouldn't be mutable--yeah, I know the current code doesn't touch them, but as a matter of principle

E.g. rather do it like this:

def print_summary(profiles, sdefaults=None, pdefaults=None, restrict_to=None):
    if sdefaults is None:
        sdefaults = {}
    if pdefaults is None:
        pdefaults = {}
    if restrict_to is None:
        restrict_to = []
    ...

nphilipp commented on line 55 of _fedmod/modulemd_summarizer.py 5 years ago

(*): with sdefaults=dict above, this will cause an exception if no value is provided

nphilipp commented on line 6 of _fedmod/modulemd_summarizer.py 5 years ago

You might want to fix the preceding lines. :wink:

nphilipp commented on line 67 of _fedmod/_fetchrepodata.py 5 years ago

I like using comments to describe the structure of the dicts!

nphilipp commented on line 135 of _fedmod/_fetchrepodata.py 5 years ago

There should be a space after the comma.

nphilipp commented on line 13 of _fedmod/cli.py 5 years ago

PEP8 advises two blank lines between top-level "things", i.e. import blocks, classes, functions.

I won't comment every new PEP8 violation because flake8 flags so many of them (both in the existing code and this PR). If you prefer, I can fix the PEP8 issues before committing (I'll want to rebase your PR on top of the current master branch anyway).

nphilipp commented on line 45 of _fedmod/modulemd_summarizer.py 5 years ago

This looks like it's lifted from _repodata.py. Let's use these functions instead and import them here.

nphilipp commented on line 64 of _fedmod/modulemd_summarizer.py 5 years ago

Ugh, I really don't like the nested function + map here. How about using a list comprehension instead:

    ln[cl_prof] = ", ".join(p + " [d]"
                            for p in plist
                            if pdefaults.get(modname, {}).get(sname, [])
                            else p)

nphilipp commented on line 91 of _fedmod/modulemd_summarizer.py 5 years ago

Same issue with mutable default values as above.

nphilipp commented on line 18 of tests/test_module_summary.py 5 years ago

If we don't reuse the compiled regex, we can skip that step. Additionally, because we only target Python 3.6+, we can use f-strings here, too, and make the code a bit less dense:

    def matches(self, mod, strm, prof, out):
        return (
            re.search(fr'^{mod}\s+{strm}\s+{profile}$', out, re.MULTILINE)
            is not None
        )

rdossant commented 5 years ago

Ugh, I really don't like the nested function + map here. How about using a list comprehension instead:
ln[cl_prof] = ", ".join(p + " [d]"
for p in plist
if pdefaults.get(modname, {}).get(sname, [])
else p)

Don't know why you don't like the map, I find it more readable than the list comprehension. Anyway I'll change it.

rebased onto 0b54e37

5 years ago

7 new commits added

module-summarizer: add tests
Add tool for displaying summary of modules
fetchrepodata: make merge_modules public
repodata: make get_dataset public
Add lookup for default stream and profiles
Add lookup for module profiles in cached data
Use new ImprovedModule available in libmodulemd

5 years ago

7 new commits added

module-summarizer: add tests
Add tool for displaying summary of modules
fetchrepodata: make merge_modules public
repodata: make get_dataset public
Add lookup for default stream and profiles
Add lookup for module profiles in cached data
Use new ImprovedModule available in libmodulemd

5 years ago

nphilipp commented 5 years ago

Don't know why you don't like the map, I find it more readable than the list comprehension. Anyway I'll change it.

Thanks! I guess using map() vs. comprehensions can be a matter of taste. The main reason I prefer the latter is because it's easier for novices to understand the code.

I've noticed one other thing, i.e. that you use spec.v1.yaml in the test suite rather thanspec.v2.yaml but I'll just fix that afterwards.

Pull-Request has been merged by nphilipp

5 years ago