PR#16: Updating filters - modulemd

modulemd

#16 Updating filters

Closed 7 years ago by psabata. Opened 7 years ago by cpacheco.

filter_update into master

Updating code to follow new coding guidelines

Courtney Pacheco • 7 years ago

c9cdc3e

Fixing bug with filtering

iamcourtney • 7 years ago

aef85a5

Fixing typos: archs --> arches

iamcourtney • 7 years ago

88dbecf

Updating modulemd filtering code and updating test scripts

iamcourtney • 7 years ago

8a73530

modulemd/__init__.py

file modified

+66 -3

		`@@ -41,6 +41,7 @@`
		`"""`

		`import yaml`
		`+ import ast`

		`from modulemd.components import ModuleComponents`
		`from modulemd.content import ModuleContent`
		`@@ -170,9 +171,49 @@`
		`if "multilib" in e:`
		`extras["multilib"] = e["multilib"]`
		`self.components.rpms.add_package(p, **extras)`
		`+`
		`+ #Iterate through the filtered packages (if they exist)`
		`if "filter" in yml["data"]["components"]["rpms"]:`
		`- self.components.rpms.filter = \`
		`- set(yml["data"]["components"]["rpms"]["filter"])`
		`+`
		`+ #Grab list of RPMs, filters, and packages, and create a dict`
		`+ #to store all the filtered packages' arches`
		`+ filtered_packages = {}`
		`+ rpms_yaml = yml["data"]["components"]["rpms"]`
		`+ filter_yaml = rpms_yaml["filter"]`
		`+ packages_yaml = rpms_yaml["packages"]`
		`+`
		`+ #Iterate through all the filters`
		`+ for filtkey, filt in filter_yaml:`
		`+ for p in packages_yaml:`
		`+`
		`+ #Exclude this package`
		`+ if ((p == filt) or`
		`+ (filtkey in filtered_packages) or`
		`+ (filt in packages_yaml)):`
		`+ continue`
		`+`
		`+ arches = filt.get("arches")`
		`+`
		`+ #This case is for excluding secondary architectures`
		`+ if arches:`
		`+ if arches[0] == ".+":`
		`+ arches = ["*"]`
		`+ filt = filt + ".+"`
		`+ else: #Otherwise, exclude all package architectures`
		`+ arches = ["*"]`
		`+ filtered_packages[filt] = arches`
		`+`
		`+ #Combine like-architectures by switching dict keys with dict values`
		`+ filtered = {}`
		`+ for pkey, pvalue in filtered_packages:`
		`+ for a in pvalue:`
		`+ if a not in filtered:`
		`+ filtered[a] = [pkey]`
		`+ else:`
		`+ filtered[a].append(pkey)`
		`+`
		`+ #This is for json formatting. (Python is picky about ' vs ")`
		`+ self.components.rpms.filter = set([str(filtered).replace("'",'"')])`

		`def dump(self, f):`
		`"""Dumps the metadata into the supplied file.`
		`@@ -261,8 +302,30 @@`
		`data["data"]["components"]["rpms"]["packages"][p] = \`
		`extra`
		`if self.components.rpms.filter:`
		`+ filter_list = list(self.components.rpms.filter)[0]`
		`+ filter_str = ast.literal_eval(filter_list)`
		`+ if isinstance(filter_str, str):`
		`+ filter_dict = ast.literal_eval(filter_str[0])`
		`+ else:`
		`+ filter_dict = filter_str`
		`+ filter_dict_reversed = dict()`
		`+ for a in filter_dict:`
		`+ for p in filter_dict[a]:`
		`+ arches = dict()`
		`+ if p not in filter_dict_reversed:`
		`+ if ".+" in p:`
		`+ (name, empty) = p.split(".+")`
		`+ arches["arches"] = [".+"]`
		`+ filter_dict_reversed[name] = arches`
		`+ else:`
		`+ arches["arches"] = [a]`
		`+ filter_dict_reversed[p] = arches`
		`+ else:`
		`+ filter_dict_reversed[p]["arches"].append(a)`
		`data["data"]["components"]["rpms"]["filter"] = \`
		`- list(self.components.rpms.filter)`
		`+ filter_dict_reversed`
		`+`
		`+`
		`return yaml.safe_dump(data)`

		`def validate(self):`

tests/test.yaml

file modified

+13 -2

		`@@ -40,8 +40,19 @@`
		`- alfa`
		`- alfa-extras`
		`filter:`
		`- - filter_1`
		`- - filter_2`
		`+ alfa:`
		`+ arches:`
		`+ - ppc64`
		`+ - x86_64`
		`+ beta:`
		`+ arches:`
		`+ - x86_64`
		`+ delta:`
		`+ arches:`
		`+ - "*"`
		`+ epsilon:`
		`+ arches:`
		`+ - ".+"`
		`packages:`
		`alfa:`
		`rationale: alfa rationale`

tests/test_basic.py

file modified

+2 -2

		`@@ -59,7 +59,7 @@`
		`cls.mmd.components.rpms.dependencies = True`
		`cls.mmd.components.rpms.api = set([ "api" ])`
		`cls.mmd.components.rpms.packages = { "rpm" : { "rationale" : "" } }`
		`- cls.mmd.components.rpms.filter = set([ "filter_1", "filter_2" ])`
		`+ cls.mmd.components.rpms.filter = set(['{"x86_64": ["bar"], "*": ["bar-doc2"], "ppc64": ["bar-doc", "foo-doc"], "ppc64le": ["foo-doc"]}'])`

		`def test_mdversion(self):`
		`self.assertIn(self.mmd.mdversion, modulemd.supported_mdversions)`
		`@@ -126,7 +126,7 @@`
		`self.assertEqual(self.mmd.components.rpms.packages, { "rpm" : { "rationale" : "" } })`

		`def test_rpm_filter(self):`
		`- self.assertEqual(self.mmd.components.rpms.filter, set([ "filter_1", "filter_2" ]))`
		`+ self.assertEqual(self.mmd.components.rpms.filter, set(['{"x86_64": ["bar"], "*": ["bar-doc2"], "ppc64": ["bar-doc", "foo-doc"], "ppc64le": ["foo-doc"]}']))`

		`if __name__ == "__main__":`
		`unittest.main()`

tests/test_io.py

file modified

+7 -8

		`@@ -68,7 +68,7 @@`
		`"repository" : "golf",`
		`"cache" : "hotel" } } )`
		`self.assertEqual(mmd.components.rpms.filter,`
		`- set([ "filter_1", "filter_2" ]))`
		`+ set(['{"x86_64": ["beta"], "*": ["epsilon.+", "delta"]}']))`

		`def test_loads(self, yaml=None):`
		`mmd = modulemd.ModuleMetadata()`
		`@@ -120,8 +120,9 @@`
		`repository: golf`
		`cache: hotel`
		`filter:`
		`- - filter_1`
		`- - filter_2`
		`+ delta:`
		`+ arches:`
		`+ - x86_64`
		`"""`
		`if not yaml:`
		`yaml = document`
		`@@ -157,7 +158,7 @@`
		`"repository" : "golf",`
		`"cache" : "hotel" } } )`
		`self.assertEqual(mmd.components.rpms.filter,`
		`- set([ "filter_1", "filter_2" ]))`
		`+ set(['{"x86_64": ["delta"]}']))`

		`def test_dump(self):`
		`mmd = modulemd.ModuleMetadata()`
		`@@ -189,8 +190,7 @@`
		`mmd.components.rpms.add_package("bravo", rationale="bravo rationale",`
		`arches=["charlie", "delta"], multilib=["echo"],`
		`commit="foxtrot", repository="golf", cache="hotel")`
		`- mmd.components.rpms.add_filter("filter_1")`
		`- mmd.components.rpms.add_filter("filter_2")`
		`+ mmd.components.rpms.add_filter('{"x86_64": ["beta"], "*": ["epsilon.+", "delta"]}')`
		`mmd.dump("tests/dump.yaml")`
		`self.test_load(filename="tests/dump.yaml")`

		`@@ -224,8 +224,7 @@`
		`mmd.components.rpms.add_package("bravo", rationale="bravo rationale",`
		`arches=["charlie", "delta"], multilib=["echo"],`
		`commit="foxtrot", repository="golf", cache="hotel")`
		`- mmd.components.rpms.add_filter("filter_1")`
		`- mmd.components.rpms.add_filter("filter_2")`
		`+ mmd.components.rpms.add_filter('{"x86_64": ["delta"]}')`
		`self.test_loads(yaml=mmd.dumps())`

		`if __name__ == "__main__":`

tests/test_rpms.py

file modified

+7 -7

		`@@ -116,17 +116,17 @@`
		`self.assertEqual(self.mr.api, set([]))`

		`def test_add_filter(self):`
		`- self.assertNotIn("AddRPMAPI", self.mr.filter)`
		`- self.mr.add_filter("AddRPMAPI")`
		`- self.assertIn("AddRPMAPI", self.mr.filter)`
		`+ self.assertNotIn('{ "*": [foo] }', self.mr.filter)`
		`+ self.mr.add_filter('{ "*": [foo] }')`
		`+ self.assertIn('{ "*": [foo] }', self.mr.filter)`

		`def test_del_filter(self):`
		`- self.mr.filter = set(["DelRPMAPI"])`
		`- self.mr.del_filter("DelRPMAPI")`
		`- self.assertNotIn("DelRPMAPI", self.mr.filter)`
		`+ self.mr.filter = set('{ "*": [bar] }')`
		`+ self.mr.del_filter('{ "*": [bar] }')`
		`+ self.assertNotIn('{ "*": [bar] }', self.mr.filter)`

		`def test_clear_filter(self):`
		`- self.mr.filter = set(["ClearRPMAPI"])`
		`+ self.mr.filter = set(['{ "": [foo] }', '{ "": [bar] }'])`
		`self.mr.clear_filter()`
		`self.assertEqual(self.mr.filter, set([]))`

cpacheco commented 7 years ago

Hi, I've updated the filtering feature to accommodate binary filtering, which we plan to later use in Rida (fm-orchestrator). The only file affected is __init__.py along with its related tests.

1 new commit added

Fixing bug with filtering

7 years ago

nphilipp commented on line 17 of modulemd/__init__.py 7 years ago

That could be filtered_packages = {}.

nphilipp commented on line 31 of modulemd/__init__.py 7 years ago

A couple of things about this block:

introduce & use "shortcut variables" rather than drill down through yaml every time
use dict.items() if and only if you need to access both keys and values
if you don't know that a dict contains an element, you can use the get() method to conditionally retrieve it
put operators besides operands (i.e. or on the beginning of the next lines), see PEP8
drop unnecessary parentheses
indent continued lines of if-conditions by one additional level to easier distinguish it from the following block (PEP8)
more and more verbose comments please :wink:, check PEP 8 guidelines about them

E.g.:

rpms_yaml = yml['data']['components']['rpms']
filter_yaml = rpms_yaml['filter']
packages_yaml = rpms_yaml['packages']

for filtkey, filter in filter_yaml.items():
    for p in packages_yaml:
        if (p == filtkey
                or filtkey in filtered_packages
                or filtkey in packages_yaml):
            # exclude this package
            continue
        arches = filter.get('arches')
        if arches:
            # pls add comment: what does the following mean?
            if arches[0] == ".+":
                arches = ["*"]
                filtkey += ".+"
        else:
            # filter is for all architectures
            arches = ["*"]
        filtered_packages[filtkey] = arches

Edited 7 years ago by nphilipp

nphilipp commented on line 35 of modulemd/__init__.py 7 years ago

similar here:

...
for pkey, pvalue in filtered_packages.items():
    for a in pvalue:
...

nphilipp commented on line 40 of modulemd/__init__.py 7 years ago

What does str(filtered).replace("'",'"') do here?

nphilipp commented on line 56 of modulemd/__init__.py 7 years ago

Another instance where dict.items() is handy...

psabata commented 7 years ago

Hmm, this is something I remember discussing with Lubos a few times.
I was always against this as it's already solved on the RPM level. There's no need to filter packages that don't get built on those particular architectures.