PR#34: Fix accessing product info from configuration - fedora-infra/distgit-bugzilla-sync

		`@@ -19,6 +19,7 @@`

		`from defusedxml import cElementTree as etree`
		`import requests`
		`+ from xml.etree.ElementTree import ParseError`


		`KOJI_REPO = 'https://kojipkgs.fedoraproject.org/repos/'`
		`@@ -32,8 +33,7 @@`


		`def download_db(name, repomd_url, archive):`
		`- log.info('%s Downloading file: %s to %s' % (`
		`- name.ljust(12), repomd_url, archive))`
		`+ log.info('%-12s Downloading file: %s to %s', name, repomd_url, archive)`
		`response = requests.get(repomd_url, verify=True)`
		`with open(archive, 'wb') as stream:`
		`stream.write(response.content)`
		`@@ -41,7 +41,7 @@`

		`def decompress_db(name, archive, location):`
		`''' Decompress the given archive at the specified location. '''`
		`- log.info('%s Extracting %s to %s' % (name.ljust(12), archive, location))`
		`+ log.info('%-12s Extracting %s to %s', name, archive, location)`
		`if archive.endswith('.xz'):`
		`import lzma`
		`with contextlib.closing(lzma.LZMAFile(archive)) as stream_xz:`
		`@@ -99,32 +99,39 @@`
		`repomd_url = url + '/repomd.xml'`
		`response = requests.get(repomd_url, verify=True)`
		`if not bool(response):`
		`- print('%s !! Failed to get %r %r' % (`
		`- name.ljust(12), repomd_url, response))`
		`+ log.warning('%-12s !! Failed to get %s %s', name, repomd_url, response)`
		`return`

		`- # Parse the xml doc and get a list of locations and their shasum.`
		`- files = ((`
		`- node.find('repo:location', repomd_xml_namespace),`
		`- node.find('repo:open-checksum', repomd_xml_namespace),`
		`- ) for node in etree.fromstring(response.text))`
		`+ try:`
		`+ root = etree.fromstring(response.text)`
		`+ except ParseError:`
		`+ log.warning('%-12s !! Failed to parse %s %s', name, repomd_url, response)`
		`+ return`

		`- # Extract out the attributes that we're really interested in.`
		`- files = (`
		`- (f.attrib['href'].replace('repodata/', ''), s.text, s.attrib['type'])`
		`- for f, s in files if f is not None and s is not None`
		`- )`
		`+ data_nodes = list(root.findall('repo:data[@type="primary"]', repomd_xml_namespace))`
		`+ if not data_nodes:`
		`+ log.debug('No primary.xml could be found in %s', url)`
		`+ return`
		`+ elif len(data_nodes) > 1:`
		`+ log.debug("More than one primary.xml could be found in %s", url)`
		`+ return`

		`- # Filter down to only the primary.xml files`
		`- files = list((f, s, t) for f, s, t in files if 'primary.xml' in f)`
		`+ primary_node = data_nodes[0]`

		`- if not files:`
		`- log.debug('No primary.xml could be found in %s' % url)`
		`- elif len(files) > 1:`
		`- log.debug("More than one primary.xml could be found in %s" % url)`
		`+ location_node = primary_node.find('repo:location', repomd_xml_namespace)`
		`+ if location_node is None or 'href' not in location_node.attrib:`
		`+ log.debug('No valid location found for primary.xml in %s', url)`
		`return`

		`- filename, shasum, shatype = files[0]`
		`+ cksuminfo_node = primary_node.find('repo:open-checksum', repomd_xml_namespace)`
		`+ if cksuminfo_node is None or 'type' not in cksuminfo_node.attrib:`
		`+ log.debug('No valid checksum information found for primary.xml in %s', url)`
		`+ return`
		`+`
		`+ filename = location_node.attrib['href'].replace('repodata/', '')`
		`+ hash_digest = cksuminfo_node.text`
		`+ hash_type = cksuminfo_node.attrib['type']`
		`+`
		`repomd_url = url + '/' + filename`

		`# First, determine if the file has changed by comparing hash`
		`@@ -132,8 +139,8 @@`

		`# Have we downloaded this before? Did it change?`
		`destfile = os.path.join(destfolder, db)`
		`- if not needs_update(destfile, shasum, shatype):`
		`- log.debug('%s No change of %s' % (name.ljust(12), repomd_url))`
		`+ if not needs_update(destfile, hash_digest, hash_type):`
		`+ log.debug('%s No change of %s', name.ljust(12), repomd_url)`
		`else:`
		`# If it has changed, then download it and move it into place.`
		`archive = os.path.join(destfolder, filename)`

distgit_bugzilla_sync/script.py

file modified

+32 -38

		`@@ -162,43 +162,41 @@`
		`# Old API -- in python-bugzilla. But with current server, this`
		`# gives ProxyError`
		`for collection, product in self.config["products"].items():`
		`- self.product_cache[collection] = self.server.getcomponentsdetails(product)`
		`+ bz_product_name = product.get('bz_product_name', collection)`
		`+ self.product_cache[collection] = self.server.getcomponentsdetails(bz_product_name)`
		`elif self.config['bugzilla']['compat_api'] == 'component.get':`
		`# Way that's undocumented in the partner-bugzilla api but works`
		`# currently`
		`for collection, product in self.config["products"].items():`
		`-`
		`+ bz_product_name = product.get('bz_product_name', collection)`
		`# restrict the list of info returned to only the packages of`
		`# interest`
		`pkglist = [`
		`project["name"]`
		`for project in pagure_projects`
		`- if product in project["products"]`
		`+ if bz_product_name in project["products"]`
		`]`
		`- products = {}`
		`+ product_info_by_pkg = {}`
		`for pkg_segment in segment(pkglist, self.config['bugzilla']['req_segment']):`
		`# Format that bugzilla will understand. Strip None's that`
		`# segment() pads out the final data segment() with`
		`query = [`
		`- dict(`
		`- product=self.config['products'][collection],`
		`- component=p`
		`- )`
		`+ {'product': bz_product_name, 'component': p}`
		`for p in pkg_segment`
		`if p is not None`
		`]`
		`- raw_data = self.server._proxy.Component.get(dict(names=query))`
		`+ raw_data = self.server._proxy.Component.get({'names': query})`
		`for package in raw_data['components']:`
		`# Reformat data to be the same as what's returned from`
		`# getcomponentsdetails`
		`- product = dict(`
		`- initialowner=package['default_assignee'],`
		`- description=package['description'],`
		`- initialqacontact=package['default_qa_contact'],`
		`- initialcclist=package['default_cc']`
		`- )`
		`- products[package['name'].lower()] = product`
		`- self.product_cache[collection] = products`
		`+ product_info = {`
		`+ 'initialowner': package['default_assignee'],`
		`+ 'description': package['description'],`
		`+ 'initialqacontact': package['default_qa_contact'],`
		`+ 'initialcclist': package['default_cc'],`
		`+ }`
		`+ product_info_by_pkg[package['name'].lower()] = product_info`
		`+ self.product_cache[collection] = product_info_by_pkg`

		`def invert_user_cache(self):`
		`""" Takes the user_cache built when querying FAS and invert it so`
		`@@ -314,6 +312,8 @@`
		`e.args = ('ProtocolError', e.errcode, e.errmsg)`
		`raise`

		`+ bz_product_name = self.config['products'][collection].get('bz_product_name', collection)`
		`+`
		`# Set the qacontact_email and name`
		`default_qa_contact_email = self.config['default_qa_contact_email']`
		`default_qa_contact = f"<default: {default_qa_contact_email.split('@', 1)[0]}@...>"`
		`@@ -346,12 +346,11 @@`
		`data['initialcclist'] = initial_cc_emails`
		`break`

		`- data["is_active"] = not retired`
		`-`
		`if data:`
		`# Changes occurred. Submit a request to change via xmlrpc`
		`- data['product'] = self.config['products'][collection]`
		`+ data['product'] = bz_product_name`
		`data['component'] = package`
		`+ data["is_active"] = not retired`

		`if self.config["verbose"]:`
		`print(f'[EDITCOMP] {data["product"]}/{data["component"]}')`
		`@@ -414,16 +413,12 @@`
		`new_poc=owner_email,`
		`prev_poc=product[pkg_key]['initialowner'],`
		`name=package,`
		`- product=self.config['products'][collection],`
		`+ product=bz_product_name,`
		`)`
		`else:`
		`if self.config.get("print-no-change"):`
		`- bz_product_name = self.config['products'][collection].get(`
		`- 'bz_product_name', collection`
		`- )`
		`print(f"[NOCHANGE] {package}/{bz_product_name}")`
		`else:`
		`- bz_product_name = self.config['products'][collection].get('bz_product_name', collection)`
		`if retired:`
		`if self.config['verbose']:`
		`print(f"[NOADD] {bz_product_name}/{package} (is retired)")`
		`@@ -473,10 +468,10 @@`
		`:return: a list of the repo's branches`
		`"""`
		`branches_url = '{0}component-branches/'.format(env['pdc_url'])`
		`- params = dict(`
		`- global_component=repo['name'],`
		`- type=env['pdc_types'][repo['namespace']]`
		`- )`
		`+ params = {`
		`+ 'global_component': repo['name'],`
		`+ 'type': env['pdc_types'][repo['namespace']],`
		`+ }`
		`if config["verbose"]:`
		`print('Querying {0} {1}'.format(branches_url, params))`
		`rv = session.get(branches_url, params=params, timeout=60)`
		`@@ -506,7 +501,6 @@`
		`_namespace_to_product = None`
		`_product_to_branch_regex = None`
		`_branch_regex_to_product = None`
		`- errors = collections.defaultdict(list)`

		`def send_email(self, from_address, to_address, subject, message, cc_address=None):`
		`'''Send an email if there's an error.`
		`@@ -656,16 +650,16 @@`
		`# Combine and collapse those two into a single list:`
		`self.pagure_projects = []`
		`if project_list:`
		`- project_list = set(tuple(p.split("/", 1)) for p in project_list)`
		`+ project_list = {tuple(p.split("/", 1)) for p in project_list}`
		`for namespace, entries in pagure_namespace_to_poc.items():`
		`for name, poc in entries.items():`
		`if not project_list or (namespace, name) in project_list:`
		`- self.pagure_projects.append(dict(`
		`- namespace=namespace,`
		`- name=name,`
		`- poc=poc,`
		`- watchers=pagure_namespace_to_cc[namespace][name],`
		`- ))`
		`+ self.pagure_projects.append({`
		`+ 'namespace': namespace,`
		`+ 'name': name,`
		`+ 'poc': poc,`
		`+ 'watchers': pagure_namespace_to_cc[namespace][name],`
		`+ })`

		`@property`
		`def namespace_to_product(self):`
		`@@ -854,7 +848,7 @@`
		`self.env["print-no-change"] = self.args.print_no_change`

		`# Non-fatal errors to alert people about`
		`- errors = []`
		`+ self.errors = collections.defaultdict(list)`

		`self.session = retry_session()`

nphilipp commented 4 years ago

Previously, the products dict mapped product names in Fedora to their
Bugzilla counter parts. Now that all information pertaining products is
in this dictionary, the Bugzilla product name (if differing) is in the
bz_product_name sub-key.
Rename some variables to make their purpose a little more obvious, and
not use product for two different things (names and the informational
dictionary stored in the product cache).
use dict literals and set/list comprehensions

rebased onto 743b3aa

4 years ago

pingou commented on line 5 of distgit_bugzilla_sync/package_summaries.py 4 years ago

Should we document what f, s and t stand for here?

pingou commented on line 116 of distgit_bugzilla_sync/script.py 4 years ago

Didn't know this notation, for me {} were for dictionaries, cool :)

2 new commits added

allow lazy expansion of log format strings
straighten out finding primary.xml file

4 years ago

2 new commits added

initialize self.errors in run()
don't print EDITCOMP messages for every package

4 years ago

nphilipp commented 4 years ago

Didn't know this notation, for me {} were for dictionaries, cool :)

{} is still an empty dictionary, an empty set must be created with set(), but non-empty sets can be specified like {1, 2, 3} since Python 3.1 and set comprehensions {x for x in iterable} are a thing since about 3.4 or so.

Edited 4 years ago by nphilipp

7 new commits added