PR#10: Expanding modlint - modulemd

		`@@ -1,23 +1,130 @@`
		`#!/usr/bin/python3`
		`- import modulemd`
		`+`
		`+`
		`+ import argparse`
		`import sys`

		`- def usage():`
		`- print("Usage: modlint <file>")`
		`- sys.exit()`
		`+ import requests`
		`+ from modulemd import ModuleMetadata`
		`+`
		`+`
		`+ class RpmContentError(ValueError):`
		`+ """Metadata contains invalid/nonexistent srpm or commit."""`
		`+`
		`+ #: Mapping of HTTP status codes to actual failure reasons.`
		`+ REASONS = {`
		`+ 400: 'Bad hash',`
		`+ 404: 'Nonexistent rpm',`
		`+ }`
		`+`
		`+ def __init__(self, package, commit, status_code):`
		`+ super().__init__(self, package, commit, status_code)`
		`+`
		`+ self.package = package`
		`+ self.commit = commit`
		`+ self.status_code = status_code`
		`+`
		`+ def __repr__(self):`
		`+ return 'RpmContentError({pkg!r}, {commit!r}, {status!r})'.format(`
		`+ pkg=self.package,`
		`+ commit=self.commit,`
		`+ status=self.status_code`
		`+ )`
		`+`
		`+ def __str__(self):`
		`+ return '{pkg} [{commit}]: {reason}'.format(`
		`+ pkg=self.package,`
		`+ commit=self.commit,`
		`+ reason=self.REASONS.get(self.status_code, 'Unknown error')`
		`+ )`
		`+`
		`+`
		`+ def verify_rpm_content(package, metadata, *, requests=requests):`
		`+ """Verify the existence of dist-git repo for described package.`
		`+`
		`+ Keyword arguments:`
		`+ package -- Name of the package to verify (key in`
		`+ modulemd packages)`
		`+ metadata -- Metadata associated with the package`
		`+ (value in modulemd packages)`
		`+`
		`+ requests -- Dependency injection of networking library.`
		`+`
		`+ Returns:`
		`+ None if the associated repo exists.`
		`+`
		`+ Raises:`
		`+ RpmContentError -- when the package (or commit) does not exist`
		`+ in dist-git.`
		`+ """`
		`+`
		`+ # TODO: Get URL from the metadata`
		`+ CGIT_URL_TEMPLATE = 'http://pkgs.fedoraproject.org/cgit/rpms/{name}.git/commit'`
		`+`
		`+ commit = metadata.get('commit', 'HEAD')`
		`+`
		`+ response = requests.head(`
		`+ CGIT_URL_TEMPLATE.format(name=package),`
		`+ params={'id': commit}`
		`+ )`
		`+`
		`+ if response.status_code != requests.codes.ok:`
		`+ raise RpmContentError(package, commit, response.status_code)`
		`+`
		`+`
		`+ def rpm_content(mmd):`
		`+ """Generate (package, metadata) pairs from module.`
		`+`
		`+ Keyword arguments:`
		`+ mmd -- ModuleMetadata to be inspected.`
		`+`
		`+ Yields:`
		`+ (package, metadata) from mmd, if any.`
		`+ """`
		`+`
		`+ try:`
		`+ packages = mmd.components.rpms.packages`
		`+ except AttributeError:`
		`+ # Module has no packages -- end generation with no output`
		`+ return None`
		`+`
		`+ yield from packages.items()`
		`+`
		`+`
		`+ if __name__ == '__main__':`
		`+ parser = argparse.ArgumentParser(description='Validate module metadata.')`
		`+`
		`+ # Positional arguments`
		`+ parser.add_argument('file', help='Input metadata file')`
		`+`
		`+ args = parser.parse_args()`
		`+`
		`+ metadata = ModuleMetadata()`
		`+ metadata_errors = TypeError, ValueError`
		`+ all_went_well = True`

		`- if (len(sys.argv) != 2):`
		`- usage()`
		`+ try:`
		`+ metadata.load(args.file)`
		`+ except metadata_errors as invalid_input_metadata:`
		`+ message = 'ERROR: Invalid input: {!s}'.format(invalid_input_metadata)`
		`+ sys.exit(message)`

		`- filename = sys.argv[1]`
		`- modulemd = modulemd.ModuleMetadata()`
		`+ try:`
		`+ metadata.validate()`
		`+ except metadata_errors as invalid_metadata_structure:`
		`+ all_went_well = False`
		`+ print('ERROR: Invalid structure:', str(invalid_metadata_structure),`
		`+ file=sys.stderr)`

		`- try:`
		`- modulemd.load(filename)`
		`- modulemd.validate()`
		`- except Exception as e:`
		`- # raise`
		`- print("ERROR: ", str(e))`
		`- sys.exit()`
		`+ for package in rpm_content(metadata):`
		`+ try:`
		`+ verify_rpm_content(*package)`
		`+ except RpmContentError as invalid_rpm:`
		`+ all_went_well = False`
		`+ print('RPM CONTENT ERROR:', str(invalid_rpm),`
		`+ file=sys.stderr)`

		`- print("Everything is OK!")`
		`+ if all_went_well:`
		`+ print('Everything OK')`
		`+ else:`
		`+ sys.exit(1)`

jstanek commented 7 years ago

This is more request for comments/review than PR.

I have expanded the modlint script with dist-git sanity check. For each package in the content, the Fedora CGIT is checked for existence of that package (and if present, its commit).

RFC:

New 3rd party dependency: requests toolkit. Is it OK to expand the dependecy list?
Code uses typing module from python 3.5, and it is thus only compatible with python 3.5+ (Fedora 24+). Is it OK, or should it be more backward compatible?

nphilipp commented on line 57 of modlint 7 years ago

The format string is nested badly:

>>> '{pkg [{hash}]: {msg}}'.format(pkg="foo", hash="hash", msg="BOOH")
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-1-2b656e3c1a82> in <module>()

Did you mean '{pkg}[{hash}]: {msg}'.format(...) instead?

nphilipp commented on line 59 of modlint 7 years ago

You could use the dict.get() method instead:

hash=details.get('commit', 'HEAD')

nphilipp commented on line 28 of modlint 7 years ago

Hmm. I don't particularly like returning a list of errors - we have exceptions for that. The name of the function could also be a little more descriptive:

class RpmContentError(Exception):
    ...

def verify_rpm_content(mmd: ModuleMetadata) -> None:
    """Check that rpm content points to an existing repository (commit).

    For each package in the module, this function queries dist-git
    and uses the status code to determine if the package, and
    optionally the commit, exists.

    Arguments:
        mmd -- The module metadata object to be checked

    Returns: Nothing

    Raises: An exception if packages, or commits can't be found.
    ...
    errors = []
    for package, details in ...:
        ...
        if response.status_code != requests.codes.ok:
            errors.append("{pkg}[{hash}]: {msg}".format(...))
    if errors:
        raise RpmContentError(errors)

nphilipp commented on line 85 of modlint 7 years ago

Why do you catch all exceptions and continue here? metadata probably won't be usable below.

nphilipp commented on line 102 of modlint 7 years ago

The "by-the-book" way would be to import sys at the top, then sys.exit(1) ;).

nphilipp commented on line 17 of modlint 7 years ago

Ah, interesting, the first time I've seen type annotations in the wild :). Do you have tools that use this information?

nphilipp commented 7 years ago

As to your RFCs:

I think it's totally okay to add the requests package as a new dependency for a developer tool like modlint
What benefit does using the typing module bring us? We may want that people on older releases can write modules, too, so IMO its benefit would have to greatly outweigh the cost of excluding people on older releases of not so much Fedora, but RHEL or CentOS.

jstanek commented on line 17 of modlint 7 years ago

Not really, I use it mainly as code documentation which could be potentially checked by some static analysis.

jstanek commented on line 28 of modlint 7 years ago

True, but my intention here was to report the find to the caller as soon as possible, and continue the search for others when asked. Among other things, it allows for printing errors as they are found, instead of waiting for whole check (which could take a while) and then dumping it all at once.

jstanek commented on line 102 of modlint 7 years ago

Well, yes, but since sys.exit() ultimately raises SystemExit() and I do not need sys for anything else, I figured that I will raise it directly, saving an import that is otherwise unneeded.

jstanek commented 7 years ago

What benefit does using the typing module bring us? We may want that people on older releases can write modules, too, so IMO its benefit would have to greatly outweigh the cost of excluding people on older releases of not so much Fedora, but RHEL or CentOS.

Good point. Since I use it mainly as code documentation, I will drop it in the name of older releases.

2 new commits added

Split metadata loading and validation to separate steps
Get default hash using builtin method

7 years ago

1 new commit added

Report rpm content errors with exceptions

7 years ago

nphilipp commented 7 years ago

Thanks for explaining your reasons, and for addressing my objections ;).

Let me expand on my comments a bit:

The idea of using a generator the way you did is very nifty, but also pretty unconventional which makes it harder for others to understand what's happening, especially to people not so familiar with Python. Additionally to "doing it the boring way", you could add a "don't break off at first error" option/flag which would then continue processing the file, gathering errors and raising an exception only when finished (and put the error reporting inside the function).
I guess the use of sys.exit() vs. raise SystemExit() is a matter of taste, the only real advantage of the function is that you can do things like sys.exit("Error: An error happened") which will print the message to stderr and exit with exitcode 1.

jstanek commented 7 years ago

Thank you for review :)

Ad the generator/exceptions: I rethought the process and split the function–
see commit 4fb5a70. Now the validation is scoped to single
package, and the iteration/catching/reporting is done in the main section.
Does it look better to you?
Ad the sys.exit vs. SystemExit – AFAIK they acts exactly the same, you
can pass the string message to the exception too ;). But since I noticed
I am in fact using the sys module already, I changed it to the sys.exit
function – perhaps it will reduce confusion :)

Edited 7 years ago by jstanek

nphilipp commented on line 23 of modlint 7 years ago

Not sure why you use a docstring here and not a comment before the REASON dict – I really had to think twice what this line is referring to.

nphilipp commented on line 33 of modlint 7 years ago

You could use {pkg!r} etc. in the format string, so strings get quoted.

nphilipp commented on line 92 of modlint 7 years ago

For my understanding, is accessing .rpms.packages this way and catching AttributeError the canonical way for dealing with modules, and does it avoid hiding real errors?

nphilipp commented on line 93 of modlint 7 years ago

Hah, wasn't aware that you could return from generators to cut them short :). I think using return would better convey the idea (adding a comment, too :wink: ), I was actually looking for what happened to None in the caller (i.e. nothing, unless you catch the StopIteration object and inspect it).

nphilipp commented on line 143 of modlint 7 years ago

Maybe I'm too pedantic :wink:, but how about wrapping everything from the line where all_went_well is initialized to about here in a try: ... except: ... block instead? That seems more idiomatic to me.

jstanek commented 7 years ago

Since in-file comments get lost after new commit, I will respond in normal one,
with references to the file as links. Hopefully it helps with the mental
juggling.

Docstring after assignment
```
REASONS = {
    400: 'Bad hash',
    404: 'Nonexistent rpm',
}
"""Mapping of HTTP status codes to actual failure reasons."""
```
Not sure why you use a docstring here and not a comment before the
REASON dict – I really had to think twice what this line is referring
to.

I was under impression that this kind of docstring is extractable by
automated tools in standard library. However, after re-reading the
PEP 257, it seems that this is
more of proposal than usable fact. Changing to comment.
Nested attribute access, returning from generators
```
try:
    packages = mmd.components.rpms.packages
except AttributeError:
    return None
```
For my understanding, is accessing .rpms.packages this way and catching
AttributeError the canonical way for dealing with modules, and does it
avoid hiding real errors?

AttributeError indicates that the code is accessing nonexistent
attribute/property. I found it convenient for avoiding "nested" ifs like
if mmd.components is not None and mmd.components.rpms is not None and ...
(since each step might be None). I'm not aware about any "canonical" way
to safely access the package contents.

Only "real" errors I'm aware of that could be hidden would occur if the
modulemd inner structure change – the function would then always return
without iterating.

Maybe the modulemd author could provide insight on this matter :) Any
comments here, @psabata?

Hah, wasn't aware that you could return from generators to cut them
short :). I think using return would better convey the idea (adding
a comment, too 😉), I was actually looking for what happened to None
in the caller (i.e. nothing, unless you catch the StopIteration object
and inspect it).

return None is the same as return
(doc).
And since <q cite='import this'>explicit is better than implicit</q>,
I think return None is better ("give him nothing").

I will add at least comment explanation to prevent further confusion. I see
I might be too generator-happy :)
Error reporting
```
all_went_well = True
...
except ...:
    all_went_well = False
    ...
...
if all_went_well:
    print('Everything OK')
else:
    sys.exit(1)
```
Maybe I'm too pedantic :wink:, but how about wrapping everything from
the line where all_went_well is initialized to about here in a try: ... except: ... block instead? That seems more idiomatic to me.

My issue with that is that try: ... except: ... block will short-circuit
the script, ending after first exception. I want to run all the checks, note
if any errors occurred, and then exit accordingly. I would love to do it in
more pythonic way, but I'm afraid that wrapping it all in one
exception-catching block will not suffice. Suggestions are welcome 😉.

Edited 7 years ago by jstanek

nphilipp commented 7 years ago

Hmm, need to think about the issues you raise. Perhaps time for a third opinion on the above. @psabata?

jkaluza commented 7 years ago

Docstring after assignment

If you want the doc string to appear at least in the modulemd readthedocs.org documentation, you can use "#: comment" before that variable.

Nested attribute access, returning from generators

That's interesting question. I would vote for using try/except blocks, but use tests to ensure that internal structure modulemd changes are handled - in that case, test would fail.

Nils, can we agree here that this is OK for this sprint, and in the next sprint, I will write unit-tests for the modlint to ensure this is tested automatically?

I think we plan to release modlint together with modulemd, so it should always use the right version of modulemd library.

Error reporting

I agree with Jan here.

nphilipp commented 7 years ago

@jkaluza Sure, let's merge this as is, for you to improve upon later.

@jstanek Do you want to change the doc string as @jkaluza described above before I merge, or should I?

There are a couple of things I want to fix regarding commit structure before merging, e.g.:

don't introduce something (like the typing module) only to remove it later
split up large commits into topical ones (e.g. the last commit)

Okay if I do that?

jkaluza commented 7 years ago

I'm not sure if jstanek is reading that or will do the job.

Nils, can you do the changes? I'm OK with them. or I can fork modulemd and do the changes myself and send another pull request.

jstanek commented 7 years ago

I'm still reading this and aim to finish it :wink:. I will implement the agreed changes.

Ad commit structure: I'm not sure how to change commits after pushing them, so if you know a way, fill free to do it, Nils.

3 new commits added

Add explanation for returning from generator early
Add proper quoting to repr string
Change attribute docstring to documentation comment

7 years ago

nphilipp commented 7 years ago

I've merged the changes as discussed.

https://pagure.io/modulemd/c/d0f5063f60b1533abadf648f9c3ed7fc3305d5c2?branch=master

Pull-Request has been closed by nphilipp

7 years ago

Metadata

Assignee

None

Tags

No Tags

Changes Summary 1

+123 -16

file changed

modlint

modulemd

Source Code

Documentation

#10 Expanding modlint Closed 7 years ago by nphilipp. Opened 7 years ago by jstanek. modlint-dist-git into master

Metadata

Changes Summary 1

#10 Expanding modlint

Closed 7 years ago by nphilipp. Opened 7 years ago by jstanek.

modlint-dist-git into master