#92 More improvements to summarize-module
Closed 5 years ago by nphilipp. Opened 5 years ago by rdossant.
modularity/ rdossant/fedmod summarize-improve  into  master

file modified
+9 -4
@@ -196,10 +196,15 @@ 

  @click.argument("modules", metavar='MODULES', nargs=-1, required=False)

  @click.option("--add-file", "-f", metavar="FILE", multiple=True,

                type=click.Path(exists=True),

-               help="Additional modulemd files to check."

-                    " Can be given multiple times.")

+               help="Modulemd files to check. "

+                    "Causes cached metadata to be ignored. "

+                    "Can be given multiple times.")

+ @click.option("--add-url", "-u", metavar="URL", multiple=True,

+               help="Repositories to read module metadata from."

+                    "Causes cached metadata to be ignored. "

+                    "Can be given multiple times.")

  @click.option("--tree", "-t", is_flag=True, default=False,

                help="Print output as a tree")

- def summarize(modules, add_file, tree):

+ def summarize(modules, add_file, add_url, tree):

      """Prints a summary of available modules"""

-     summarize_modules(modules, add_file, tree)

+     summarize_modules(modules, add_file, add_url, tree)

file modified
+71 -50
@@ -24,12 +24,16 @@ 

  gi.require_version('Modulemd', '1.0')  # noqa: E402

  from gi.repository import Modulemd

  

+ import tempfile

  import smartcols

+ from fnmatch import fnmatch

+ from click import ClickException

+ from collections import defaultdict

  

- from . import _repodata

+ from . import _repodata, _fetchrepodata

  

  

- def _print_summary(profiles, sdefaults, pdefaults, deps, restrict_to, as_tree):

+ def _print_summary(profiles, sdefaults, pdefaults, deps, mfilter, as_tree):

      tb = smartcols.Table()

      cl = tb.new_column('Name')

      cl.tree = as_tree
@@ -40,11 +44,11 @@ 

      cl_deps = tb.new_column('Dependencies')

      parent_ln = {}

      for nsvc, plist in sorted(profiles.items()):

-         modname, sname, version, context = nsvc.split(':')

- 

-         if restrict_to and modname not in restrict_to:

+         if mfilter and not any((fnmatch(nsvc, p) for p in mfilter)):

              continue

  

+         modname, sname, version, context = nsvc.split(':')

+ 

          def is_def_strm(s):

              return s == sdefaults.get(modname, '')

  
@@ -81,57 +85,74 @@ 

      print('\nHint: [d]efault')

  

  

- def _add_module_metadata(yaml_files, profiles, dstreams, dprofiles, deps):

-     for yaml in yaml_files:

-         assert yaml.endswith('.yaml'), "Not a yaml file: {}".format(yaml)

- 

-         mmd_index, failures = Modulemd.index_from_file(yaml)

-         assert len(failures) == 0, failures

- 

-         for module_name, index in mmd_index.items():

-             for module in index.get_streams().values():

-                 # local modulemd files might miss some info like context or

-                 # version. So let's make sure nsvc is consistent

-                 ctxt = module.get_context() or ''

-                 nsvc = f'{module_name}:{module.get_stream()}:' \

-                        f'{module.get_version()}:{ctxt}'

-                 plist = list(module.get_profiles().keys())

-                 profiles[nsvc] = sorted(set(profiles.get(nsvc, []) + plist))

- 

-                 for dep in module.get_dependencies():

-                     deplist = set(deps.get(nsvc, []))

-                     for m, s in dep.peek_requires().items():

-                         deplist.add(f"{m}:{','.join(s.get())}"

-                                     if len(s.get()) else m)

-                     deps[nsvc] = sorted(deplist)

- 

-             defaults = index.get_defaults()

-             if not defaults:

-                 continue

+ def _parse_mmd(mmd_index, profiles, dstreams, dprofiles, deps):

+     for index in mmd_index:

+         module_name = index.get_name()

+         for module in index.get_streams().values():

+             # local modulemd files might miss some info like context or

+             # version. So let's make sure nsvc is consistent

+             ctxt = module.get_context() or ''

+             nsvc = f'{module_name}:{module.get_stream()}:' \

+                    f'{module.get_version()}:{ctxt}'

+             plist = list(module.get_profiles().keys())

+             profiles[nsvc] = sorted(set(profiles.get(nsvc, []) + plist))

+ 

+             for dep in module.get_dependencies():

+                 deplist = set(deps.get(nsvc, []))

+                 for m, s in dep.peek_requires().items():

+                     deplist.add(f"{m}:{','.join(s.get())}"

+                                 if len(s.get()) else m)

+                 deps[nsvc] = sorted(deplist)

+ 

+         defaults = index.get_defaults()

+         if not defaults:

+             continue

  

-             # Local module metadata can overwrite metadata from repo

-             dstreams[module_name] = defaults.peek_default_stream()

-             for s, pset in defaults.peek_profile_defaults().items():

-                 dprofiles[module_name][s] = pset.get()

+         # Local module metadata can overwrite metadata from repo

+         dstreams[module_name] = defaults.peek_default_stream()

+         for s, pset in defaults.peek_profile_defaults().items():

+             dprofiles[module_name][s] = pset.get()

  

  

- def summarize_modules(restrict_list=None, yaml_files=None, as_tree=False):

+ def summarize_modules(mfilter=None, yamls=None, urls=None, as_tree=False):

      """

-     Load Modulemd objects from each repository in repo_list and print a summary

-     of the modules found with a summary of their streams and profiles.

+     Load Modulemd objects from each yaml file in the `yamls` list or each url

+     in `urls` list and print a summary of the modules found with streams,

+     context, version, profiles and dependencies information.

+     If no files and no ulrs are passed, local cached metadata is used instead.

  

-     *restrict_list*: if present, restricts output to modules supplied

+     *mfilter*: if present, restricts output to the nsvc supplied. Glob pattern

+                can be used, e.g for filtering by stream: '*:master:*'

  

-     *yaml_files*: additional yaml files to parse and include in the summary

-     """

+     *yamls*: list of yaml files to parse. If any, locally cached metadata will

+              be ignored

  

-     profiles = _repodata.get_modules_profiles_lookup().copy()

-     dstreams = _repodata.get_modules_default_streams_lookup().copy()

-     dprofiles = _repodata.get_modules_default_profiles_lookup().copy()

-     deps = _repodata.get_modules_dependencies_lookup().copy()

+     *urls*: list of repositories to read from. If any, locally cached metadata

+             will be ignored.

  

-     if yaml_files:

-         _add_module_metadata(yaml_files, profiles, dstreams, dprofiles, deps)

+     *as_tree*: print the summary in tree format, grouping information.

+     """

  

-     restrict_list = restrict_list or []

-     _print_summary(profiles, dstreams, dprofiles, deps, restrict_list, as_tree)

+     profiles, dstreams, dprofiles, deps = {}, {}, defaultdict(dict), {}

+     if yamls:

+         for yaml in yamls:

+             index, failures = Modulemd.index_from_file(yaml)

+             if len(failures) != 0:

+                 msgs = "\n".join((str(f.get_gerror()) for f in failures))

+                 raise ClickException(f"Could not read {yaml}: {msgs}")

+             _parse_mmd(index.values(), profiles, dstreams, dprofiles, deps)

+     elif urls:

+         for url in urls:

+             with tempfile.TemporaryDirectory() as local_path:

+                 rp = _fetchrepodata.RepoPaths(url, local_path)

+                 _fetchrepodata._download_metadata_files(rp)

+                 indexes = _fetchrepodata._read_modules(rp)

+                 _parse_mmd(indexes, profiles, dstreams, dprofiles, deps)

+     else:

+         profiles = _repodata.get_modules_profiles_lookup()

+         dstreams = _repodata.get_modules_default_streams_lookup()

+         dprofiles = _repodata.get_modules_default_profiles_lookup()

+         deps = _repodata.get_modules_dependencies_lookup()

+ 

+     mfilter = mfilter or []

+     _print_summary(profiles, dstreams, dprofiles, deps, mfilter, as_tree)

file modified
+2 -4
@@ -35,7 +35,7 @@ 

                              'c2c572ec', 'default', 'platform:f29', out)

  

      def test_summarize_modules_restricted(self, capfd):

-         summarize_modules(['reviewboard', 'django'])

+         summarize_modules(['reviewboard*', 'django*'])

          out, err = capfd.readouterr()

  

          assert self.matches('reviewboard', '2.5', '20180828143308', '083bce86',
@@ -51,11 +51,9 @@ 

                                  'c2c572ec', 'default', 'platform:f29', out)

  

      def test_summarize_modules_local_files(self, capfd):

-         summarize_modules(yaml_files=[spec_v2_yaml_path])

+         summarize_modules(yamls=[spec_v2_yaml_path])

          out, err = capfd.readouterr()

  

-         assert self.matches('testmodule', 'master', '20180405123256',

-                             'c2c572ec', 'default', 'platform:f29', out)

          assert self.matches('foo', 'stream-name', '20160927144203', 'c0ffee43',

                              'buildroot, container, default, minimal, ' +

                              'srpm-buildroot', 'compatible:v3,v4,extras,' +

  • Ignore cached metadata when yaml is passed in the cmdline
  • Glob pattern match filtering
  • Update method documentation
  • Add option to read metadata from a repo URL

Metadata Update from @nphilipp:
- Request assigned

5 years ago

Same old story... :stuck_out_tongue_winking_eye:

For each commit:
- Running flake8 shouldn't flag errors or warnings.
- Unit tests should succeed. E.g. One of the existing module summarizer unit tests verifies the testmodule module which is only available in the repository when it needs to operate on the foo module which is described in spec.v2.yaml.

There's an easy way to run flake8 or the unit tests on all the commits in your branch (assuming you branched off master): running git rebase --exec '...' master which will execute the same command for each commit since master (or your branch off point referenced by hash). Once the command (e.g. flake8 or pytest) fails, it will interrupt the rebase process so things can be fixed & amended, then git rebase --continue.

One other thing I noticed is that you use assert to flag error conditions. There's a problem with that: assert statements won't be run if the interpreter is run with certain flags. Instead of assert, just check the condition and raise click.ClickException(...) with a suitable error description.

Aside from using assert, it would be better just attempting to read the file with libmodulemd because a file ending in .yaml doesn't guarantee that the file actually contains YAML documents.

rebased onto 3ea1489

5 years ago

1 new commit added

  • summarizer: replace assert by exception
5 years ago

I didn't find any flake8 messages. Tests were fixed and I replaced the assert by the Exception as asked.

Here's what I mean:

(fedmod) nils@gibraltar:~/src/fedmod/rdossant (rdossant-summarize)> git rebase master --exec 'git log --oneline -1; flake8'
Executing: git log --oneline -1; flake8
3ea1489 (HEAD) summarizer: ignore cached metadata when given a yaml
./_fedmod/modulemd_summarizer.py:28:1: I100 Import statements are in the wrong order. 'from collections import defaultdict' should be before 'import smartcols' and in a different group.
./_fedmod/modulemd_summarizer.py:28:1: I201 Missing newline between import groups. 'from collections import defaultdict' is identified as Stdlib and 'import smartcols' is identified as Third Party.
warning: execution failed: git log --oneline -1; flake8
You can fix the problem, and then run

  git rebase --continue


(fedmod) nils@gibraltar:~/src/fedmod/rdossant ((no branch, rebasing rdossant-summarize))>

And what i mean is:

$: flake8 _fedmod/modulemd_summarizer.py 
$: 

So flake8 doesn't give me the same warnings it gives you. I wonder why. Do you use some specific config for flake8?

If we do add a flake8 configuration file (.flake8, tox.ini or whatever), can we switch to a longer line limit? - 80 is distinctly annoying and, in my opinion, decreases the legibility of code by forcing a lot of line folding. A line length limit of 100 is a common value for many projects

@rdossant:

So flake8 doesn't give me the same warnings it gives you. I wonder why. Do you use some specific config for flake8?

I use the plain packaged version from Fedora 29 without any custom configuration on my end. To rule the latter out, I've run it with --config=/dev/null, to the same result:

(fedmod) nils@gibraltar:~/src/fedmod/rdossant (rdossant-summarize)> rpm -q python3-flake8
python3-flake8-3.5.0-6.fc29.noarch
(fedmod) nils@gibraltar:~/src/fedmod/rdossant (rdossant-summarize)> which flake8
/usr/bin/flake8
(fedmod) nils@gibraltar:~/src/fedmod/rdossant (rdossant-summarize)> flake8 --config=/dev/null
./_fedmod/modulemd_summarizer.py:27:1: I100 Import statements are in the wrong order. 'import tempfile' should be before 'from gi.repository import Modulemd' and in a different group.
./_fedmod/modulemd_summarizer.py:28:1: I201 Missing newline between import groups. 'import smartcols' is identified as Third Party and 'import tempfile' is identified as Stdlib.
./_fedmod/modulemd_summarizer.py:29:1: I100 Import statements are in the wrong order. 'from fnmatch import fnmatch' should be before 'import smartcols' and in a different group.
./_fedmod/modulemd_summarizer.py:29:1: I201 Missing newline between import groups. 'from fnmatch import fnmatch' is identified as Stdlib and 'import smartcols' is identified as Third Party.
./_fedmod/modulemd_summarizer.py:30:1: I201 Missing newline between import groups. 'from click import ClickException' is identified as Third Party and 'from fnmatch import fnmatch' is identified as Stdlib.
./_fedmod/modulemd_summarizer.py:31:1: I100 Import statements are in the wrong order. 'from collections import defaultdict' should be before 'from click import ClickException' and in a different group.
./_fedmod/modulemd_summarizer.py:31:1: I201 Missing newline between import groups. 'from collections import defaultdict' is identified as Stdlib and 'from click import ClickException' is identified as Third Party.
./_fedmod/modulemd_summarizer.py:33:1: I101 Imported names are in the wrong order. Should be _fetchrepodata, _repodata

@otaylor, fair enough, now I've found a font which I can read well at 11pt which I need to display two terminals of 100(+2) chars side-by-side (DejaVu Sans Mono): commit d899df1

$: rpm -q python3-flake8
python3-flake8-3.5.0-6.fc29.noarch
$: which flake8
/usr/bin/flake8
$: flake8 --config=/dev/null
$: 

Meanwhile we've found the culprit to be that I have the flake8-import-order plugin installed. I'll document this so contributions shouldn't be running into this particular snag in the future.

Applied with the discussed changes (import order) in commit b685535.

Pull-Request has been closed by nphilipp

5 years ago