From 0b54e37a0fb3430b97939d3f37c82d9aea5c7402 Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Date: Nov 18 2018 23:45:31 +0000 Subject: [PATCH 1/7] Use new ImprovedModule available in libmodulemd This new object will make it possible to extract required information later on for creating a summary of the modules in the system Signed-off-by: Rafael dos Santos --- diff --git a/_fedmod/_fetchrepodata.py b/_fedmod/_fetchrepodata.py index 33b6ad2..7e45ee0 100644 --- a/_fedmod/_fetchrepodata.py +++ b/_fedmod/_fetchrepodata.py @@ -240,42 +240,45 @@ def _read_modules(repo_paths): with gzip.open(repo_modulemd_fname, "rt") as modules_yaml_gz: modules_yaml = modules_yaml_gz.read() - objects = Modulemd.objects_from_string(modules_yaml) - return (o for o in objects if isinstance(o, Modulemd.Module)) + objects, _ = Modulemd.index_from_string(modules_yaml) + return objects.values() # a list of ImprovedModule objects -def _merge_modules(module_sets): +def _merge_modules(module_set): modules = dict() - for module_set in module_sets: - for m in module_set: - old = modules.get((m.props.name, m.props.stream)) - if not old or m.props.version > old.props.version: - modules[(m.props.name, m.props.stream)] = m + for m in module_set: + old = modules.get((m.props.name, m.props.stream)) + if not old or m.props.version > old.props.version: + modules[(m.props.name, m.props.stream)] = m return modules.values() def _write_lookup_caches(paths): - module_sets = [modules_read - for modules_read in ( - _read_modules(repopaths.arch) - for repopaths in paths.repo_paths_by_name.values()) - if modules_read] + index_sets = [index_read + for index_read in ( + _read_modules(repopaths.arch) + for repopaths in paths.repo_paths_by_name.values()) + if index_read] module_forward_lookup = {} srpm_reverse_lookup = defaultdict(list) rpm_reverse_lookup = defaultdict(list) - for module in _merge_modules(module_sets): - module_name = module.props.name - artifacts = module.props.rpm_artifacts.get() - - module_forward_lookup[module_name] = list(set(artifacts)) - components = module.props.components_rpm - for srpmname in components: - srpm_reverse_lookup[srpmname].append(module_name) - for rpmname in artifacts: - rpmprefix = rpmname.split(":", 1)[0].rsplit("-", 1)[0] - rpm_reverse_lookup[rpmprefix].append(module_name) + for index_set in index_sets: + for index in index_set: + module_name = index.get_name() + # What we think of as module is a ModuleStream for libmodulemd + # We are ignoring context and using the stream with highest version + for module in _merge_modules(index.get_streams().values()): + artifacts = module.props.rpm_artifacts.get() + module_forward_lookup[module_name] = list(set(artifacts)) + components = module.get_rpm_components() #module.props.components_rpm + for srpmname in components: + srpm_reverse_lookup[srpmname].append(module_name) + for rpmname in artifacts: + rpmprefix = rpmname.split(":", 1)[0].rsplit("-", 1)[0] + rpm_reverse_lookup[rpmprefix].append(module_name) + # Cache the lookup tables as local JSON files print("Caching lookup tables") From 432e5069452979639a834a5a9c47302282de59b8 Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Date: Nov 20 2018 15:45:46 +0000 Subject: [PATCH 2/7] Add lookup for module profiles in cached data The profiles can be obtained through the mapping module_to_profiles = {module-name: {stream-name: [profiles]}} Signed-off-by: Rafael dos Santos --- diff --git a/_fedmod/_fetchrepodata.py b/_fedmod/_fetchrepodata.py index 7e45ee0..4e00efd 100644 --- a/_fedmod/_fetchrepodata.py +++ b/_fedmod/_fetchrepodata.py @@ -140,10 +140,12 @@ def _get_distro_paths(dataset_name): return DistroPaths(release_name, arch) _MODULE_FORWARD_LOOKUP_CACHE = "module-contents" +_PROFILE_FORWARD_LOOKUP_CACHE = "module-profiles" _SRPM_REVERSE_LOOKUP_CACHE = "srpm-to-module" _RPM_REVERSE_LOOKUP_CACHE = "rpm-to-module" _ALL_CACHES = [_MODULE_FORWARD_LOOKUP_CACHE, + _PROFILE_FORWARD_LOOKUP_CACHE, _SRPM_REVERSE_LOOKUP_CACHE, _RPM_REVERSE_LOOKUP_CACHE] @@ -263,6 +265,8 @@ def _write_lookup_caches(paths): module_forward_lookup = {} srpm_reverse_lookup = defaultdict(list) rpm_reverse_lookup = defaultdict(list) + # {module-name: {stream: [profiles]}} + profile_forward_lookup = defaultdict(dict) for index_set in index_sets: for index in index_set: @@ -270,6 +274,7 @@ def _write_lookup_caches(paths): # What we think of as module is a ModuleStream for libmodulemd # We are ignoring context and using the stream with highest version for module in _merge_modules(index.get_streams().values()): + sname = module.get_stream() artifacts = module.props.rpm_artifacts.get() module_forward_lookup[module_name] = list(set(artifacts)) components = module.get_rpm_components() #module.props.components_rpm @@ -279,10 +284,14 @@ def _write_lookup_caches(paths): rpmprefix = rpmname.split(":", 1)[0].rsplit("-", 1)[0] rpm_reverse_lookup[rpmprefix].append(module_name) + # {'name': ModuleProfile} + profiles = list(module.get_profiles().keys()) # only names + profile_forward_lookup[module_name][sname] = profiles # Cache the lookup tables as local JSON files print("Caching lookup tables") _write_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE, module_forward_lookup) + _write_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE, profile_forward_lookup) _write_cache(paths, _SRPM_REVERSE_LOOKUP_CACHE, srpm_reverse_lookup) _write_cache(paths, _RPM_REVERSE_LOOKUP_CACHE, rpm_reverse_lookup) @@ -303,6 +312,7 @@ class LocalMetadataCache: srpm_to_modules = attrib(type=dict) rpm_to_modules = attrib(type=dict) module_to_packages = attrib(type=dict) + module_to_profiles = attrib(type=dict) repo_cache_paths = attrib(type=dict) @@ -336,6 +346,7 @@ def load_cached_repodata(dataset_name): srpm_to_modules=_read_cache(paths, _SRPM_REVERSE_LOOKUP_CACHE), rpm_to_modules=_read_cache(paths, _RPM_REVERSE_LOOKUP_CACHE), module_to_packages=_read_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE), + module_to_profiles=_read_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE), repo_cache_paths={ n: (c.arch.local_cache_path, c.src.local_cache_path) for n, c in paths.repo_paths_by_name.items() From 65af39760a0cb37e33caec50cf195323d7b79b4b Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Date: Nov 20 2018 15:48:20 +0000 Subject: [PATCH 3/7] Add lookup for default stream and profiles stream_defaults = {module-name: stream-name} profile_defaults = {module-name: {stream-name: [profiles]}} Signed-off-by: Rafael dos Santos --- diff --git a/_fedmod/_fetchrepodata.py b/_fedmod/_fetchrepodata.py index 4e00efd..527539b 100644 --- a/_fedmod/_fetchrepodata.py +++ b/_fedmod/_fetchrepodata.py @@ -141,11 +141,15 @@ def _get_distro_paths(dataset_name): _MODULE_FORWARD_LOOKUP_CACHE = "module-contents" _PROFILE_FORWARD_LOOKUP_CACHE = "module-profiles" +_STREAM_DEFAULT_FORWARD_LOOKUP_CACHE = "stream-defaults" +_PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE = "profile-defaults" _SRPM_REVERSE_LOOKUP_CACHE = "srpm-to-module" _RPM_REVERSE_LOOKUP_CACHE = "rpm-to-module" _ALL_CACHES = [_MODULE_FORWARD_LOOKUP_CACHE, _PROFILE_FORWARD_LOOKUP_CACHE, + _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE, + _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE, _SRPM_REVERSE_LOOKUP_CACHE, _RPM_REVERSE_LOOKUP_CACHE] @@ -267,6 +271,10 @@ def _write_lookup_caches(paths): rpm_reverse_lookup = defaultdict(list) # {module-name: {stream: [profiles]}} profile_forward_lookup = defaultdict(dict) + # {module-name: stream} + stream_defaults_forward_lookup = {} + # {module-name: {stream : [profiles]}} + profile_defaults_forward_lookup = defaultdict(dict) for index_set in index_sets: for index in index_set: @@ -288,10 +296,24 @@ def _write_lookup_caches(paths): profiles = list(module.get_profiles().keys()) # only names profile_forward_lookup[module_name][sname] = profiles + defaults = index.get_defaults() + if not defaults: + continue + + stream_defaults_forward_lookup[module_name] = defaults.peek_default_stream() + # Default profiles for each stream in the module + for s, pset in defaults.peek_profile_defaults().items(): + profile_defaults_forward_lookup[module_name][s] = pset.get() + + # Cache the lookup tables as local JSON files print("Caching lookup tables") _write_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE, module_forward_lookup) _write_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE, profile_forward_lookup) + _write_cache(paths, _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE, + stream_defaults_forward_lookup) + _write_cache(paths, _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE, + profile_defaults_forward_lookup) _write_cache(paths, _SRPM_REVERSE_LOOKUP_CACHE, srpm_reverse_lookup) _write_cache(paths, _RPM_REVERSE_LOOKUP_CACHE, rpm_reverse_lookup) @@ -313,6 +335,8 @@ class LocalMetadataCache: rpm_to_modules = attrib(type=dict) module_to_packages = attrib(type=dict) module_to_profiles = attrib(type=dict) + stream_defaults = attrib(type=dict) + profile_defaults = attrib(type=dict) repo_cache_paths = attrib(type=dict) @@ -347,6 +371,8 @@ def load_cached_repodata(dataset_name): rpm_to_modules=_read_cache(paths, _RPM_REVERSE_LOOKUP_CACHE), module_to_packages=_read_cache(paths, _MODULE_FORWARD_LOOKUP_CACHE), module_to_profiles=_read_cache(paths, _PROFILE_FORWARD_LOOKUP_CACHE), + stream_defaults=_read_cache(paths, _STREAM_DEFAULT_FORWARD_LOOKUP_CACHE), + profile_defaults=_read_cache(paths, _PROFILE_DEFAULT_FORWARD_LOOKUP_CACHE), repo_cache_paths={ n: (c.arch.local_cache_path, c.src.local_cache_path) for n, c in paths.repo_paths_by_name.items() From a236b5b95d896b0f40ce1fa3c8fc3eed5195de36 Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Date: Nov 20 2018 15:48:25 +0000 Subject: [PATCH 4/7] repodata: make get_dataset public These methods can be reused by other parts of the code that required access to the local cached data. Signed-off-by: Rafael dos Santos --- diff --git a/_fedmod/_repodata.py b/_fedmod/_repodata.py index 55143cd..bb45364 100644 --- a/_fedmod/_repodata.py +++ b/_fedmod/_repodata.py @@ -21,7 +21,7 @@ def _load_dataset(): global _ACTIVE_DATASET _ACTIVE_DATASET = load_cached_repodata(dataset_name) -def _get_dataset(): +def get_dataset(): if _ACTIVE_DATASET is None: _load_dataset() return _ACTIVE_DATASET @@ -41,17 +41,17 @@ def set_dataset_name(name): dataset_name = name def list_modules(): - return _get_dataset().module_to_packages.keys() + return get_dataset().module_to_packages.keys() def get_rpms_in_module(module_name): - return _get_dataset().module_to_packages.get(module_name, []) + return get_dataset().module_to_packages.get(module_name, []) def get_modules_for_rpm(rpm_name): - result = _get_dataset().rpm_to_modules.get(rpm_name) + result = get_dataset().rpm_to_modules.get(rpm_name) return result def get_module_for_rpm(rpm_name): - result = _get_dataset().rpm_to_modules.get(rpm_name) + result = get_dataset().rpm_to_modules.get(rpm_name) if result is not None: if len(result) > 1: log.warn(f"Multiple modules found for {rpm_name!r}: {','.join(result)}") @@ -59,7 +59,7 @@ def get_module_for_rpm(rpm_name): return result def get_rpm_reverse_lookup(): - return _get_dataset().rpm_to_modules + return get_dataset().rpm_to_modules class Repo(object): def __init__(self, name, metadata_path): @@ -91,7 +91,7 @@ class Repo(object): path = "{}-{}.solvx".format(path, ext) else: path = "{}.solv".format(path) - return os.path.join(_get_dataset().cache_dir, path.replace("/", "_")) + return os.path.join(get_dataset().cache_dir, path.replace("/", "_")) def usecachedrepo(self, ext, mark=False): try: @@ -135,7 +135,7 @@ class Repo(object): tmpname = None try: fd, tmpname = tempfile.mkstemp(prefix=".newsolv-", - dir=_get_dataset().cache_dir) + dir=get_dataset().cache_dir) os.fchmod(fd, 0o444) f = os.fdopen(fd, "wb+") f = solv.xfopen_fd(None, f.fileno()) @@ -287,7 +287,7 @@ def load_stub(repodata): return False def setup_repos(): - dataset = _get_dataset() + dataset = get_dataset() repos = [] for reponame, (arch_cache_path, src_cache_path) in ( From 880076460b76c70780532545df5c97770a38a436 Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Date: Nov 20 2018 15:48:25 +0000 Subject: [PATCH 5/7] fetchrepodata: make merge_modules public Signed-off-by: Rafael dos Santos --- diff --git a/_fedmod/_fetchrepodata.py b/_fedmod/_fetchrepodata.py index 527539b..c69a602 100644 --- a/_fedmod/_fetchrepodata.py +++ b/_fedmod/_fetchrepodata.py @@ -249,7 +249,11 @@ def _read_modules(repo_paths): objects, _ = Modulemd.index_from_string(modules_yaml) return objects.values() # a list of ImprovedModule objects -def _merge_modules(module_set): +def merge_modules(module_set): + """ + Given a list of ModuleStream objects, "merge" them by picking only the + ModuleStream with highest version + """ modules = dict() for m in module_set: @@ -281,7 +285,7 @@ def _write_lookup_caches(paths): module_name = index.get_name() # What we think of as module is a ModuleStream for libmodulemd # We are ignoring context and using the stream with highest version - for module in _merge_modules(index.get_streams().values()): + for module in merge_modules(index.get_streams().values()): sname = module.get_stream() artifacts = module.props.rpm_artifacts.get() module_forward_lookup[module_name] = list(set(artifacts)) From f466b1e4a91cefa783e640b7c368725c2ec44335 Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Date: Nov 20 2018 15:56:03 +0000 Subject: [PATCH 6/7] Add tool for displaying summary of modules It prints name, streams and profiles of modules in cached repository as well as in any modulemd files provided as input Signed-off-by: Rafael dos Santos --- diff --git a/_fedmod/cli.py b/_fedmod/cli.py index e922be7..2ae79c8 100644 --- a/_fedmod/cli.py +++ b/_fedmod/cli.py @@ -7,6 +7,7 @@ from .config import config from .flatpak_generator import FlatpakGenerator, do_flatpak_report from .module_generator import ModuleGenerator from .module_repoquery import ModuleRepoquery +from .modulemd_summarizer import summarize_modules from . import modulemd_linter as mmdl from . import _depchase, _repodata, _fetchrepodata @@ -173,3 +174,14 @@ def lint(modulemd, min_level): """Validates a given modulemd YAML file""" linter = mmdl.ModuleMDLinter(modulemd_path=modulemd) return linter.lint(min_level=min_level) + + +# Summarize a list of modulemd files +@_cli_commands.command('summarize-module') +@click.argument("modules", metavar='MODULES', nargs=-1, required=False) +@click.option("--add-file", "-f", metavar="FILE", multiple=True, + type=click.Path(exists=True), + help="Additional modulemd files to check. Can be given multiple times") +def summarize(modules, add_file): + """Prints a summary of available modules""" + summarize_modules(modules, add_file) diff --git a/_fedmod/modulemd_summarizer.py b/_fedmod/modulemd_summarizer.py new file mode 100644 index 0000000..f074c69 --- /dev/null +++ b/_fedmod/modulemd_summarizer.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +# +# modulemd_summarizer - Prints a summary of ModuleMD files +# +# Copyright © 2018 Red Hat, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Author: +# Rafael dos Santos + +import smartcols + +import gi +gi.require_version('Modulemd', '1.0') +from gi.repository import Modulemd + +from ._repodata import get_dataset +from ._fetchrepodata import merge_modules + + +def print_summary(profiles, sdefaults=None, pdefaults=None, restrict_to=None): + sdefaults = sdefaults or {} + pdefaults = pdefaults or {} + restrict_to = restrict_to or [] + + tb = smartcols.Table() + cl = tb.new_column('Name') + cl_strm = tb.new_column('Stream') + cl_prof = tb.new_column('Profiles') + for modname, sdict in sorted(profiles.items()): + if restrict_to and modname not in restrict_to: + continue + + def is_def_strm(s): + return s == sdefaults.get(modname, '') + + for sname, plist in sorted(sdict.items()): + ln = tb.new_line() + ln[cl] = modname + ln[cl_strm] = sname + ' [d]' * is_def_strm(sname) + + def is_def_prof(p): + return p in pdefaults.get(modname, {}).get(sname, []) + + ln[cl_prof] = ', '.join(p + ' [d]' if is_def_prof(p) else p + for p in plist) + + print(tb) + print('\nHint: [d]efault') + + +def _add_module_metadata(yaml_files, profiles, dstreams, dprofiles): + for yaml in yaml_files: + assert yaml.endswith('.yaml'), "Not a yaml file: {}".format(yaml) + + mmd_index, failures = Modulemd.index_from_file(yaml) + assert len(failures) == 0, failures + for module_name, index in mmd_index.items(): + for module in merge_modules(index.get_streams().values()): + stream_name = module.get_stream() + plist = list(module.get_profiles().keys()) + profiles.setdefault(module_name, {}).setdefault( + stream_name, []).extend(plist) + + defaults = index.get_defaults() + if not defaults: + continue + + # Local module metadata can overwrite metadata from repo + dstreams[module_name] = defaults.peek_default_stream() + for s, pset in defaults.peek_profile_defaults().items(): + dprofiles[module_name][s] = pset.get() + + +def summarize_modules(restrict_list=None, yaml_files=None): + """ + Load Modulemd objects from each repository in repo_list and print a summary + of the modules found with a summary of their streams and profiles. + + *restrict_list*: if present, restricts output to modules supplied + + *yaml_files*: additional yaml files to parse and include in the summary + """ + + profiles = get_dataset().module_to_profiles + dstreams = get_dataset().stream_defaults + dprofiles = get_dataset().profile_defaults + + if yaml_files: + _add_module_metadata(yaml_files, profiles, dstreams, dprofiles) + + print_summary(profiles, dstreams, dprofiles, restrict_list) From 7cf33a16807931fb6bc5ee8e1c3d902bd9f76b1f Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Date: Nov 20 2018 15:59:04 +0000 Subject: [PATCH 7/7] module-summarizer: add tests Signed-off-by: Rafael dos Santos --- diff --git a/tests/test_module_summary.py b/tests/test_module_summary.py new file mode 100644 index 0000000..2ccd201 --- /dev/null +++ b/tests/test_module_summary.py @@ -0,0 +1,54 @@ +"""In-process tests for the module summary functionality""" + +import re +import pytest +import os.path +from _fedmod.modulemd_summarizer import summarize_modules + + +testfiles_dir = os.path.join(os.path.dirname(__file__), 'files') +spec_v1_yaml_path = os.path.join(testfiles_dir, 'spec.v1.yaml') + + +@pytest.mark.needs_metadata +class TestModuleSummary(object): + + def matches(self, mod, strm, prof, out): + return re.search(fr'^{mod}\s+{strm}\s+{prof}$', out, re.M) is not None + + # FIXME: we should mock the fetched metadata so that these tests do not + # fail when the metadata changes + def test_summarize_modules(self, capfd): + summarize_modules() + out, err = capfd.readouterr() + + assert self.matches('reviewboard', '2.5', + r'server, default \[d\]', + out) + assert self.matches('reviewboard', '3.0', + r'server, default \[d\]', + out) + assert self.matches('testmodule', 'master', 'default', out) + + def test_summarize_modules_restricted(self, capfd): + summarize_modules(['reviewboard', 'django']) + out, err = capfd.readouterr() + + assert self.matches('reviewboard', '2.5', + r'server, default \[d\]', + out) + assert self.matches('reviewboard', '3.0', + r'server, default \[d\]', + out) + assert self.matches('django', '1.6', + r'python2_development, default \[d\]', + out) + assert not self.matches('testmodule', 'master', 'default', out) + + def test_summarize_modules_local_files(self, capfd): + summarize_modules(yaml_files=[spec_v1_yaml_path]) + out, err = capfd.readouterr() + + assert self.matches('testmodule', 'master', 'default', out) + assert self.matches('foo', 'stream-name', 'minimal, buildroot, ' + + 'container, srpm-buildroot, default', out)