#34 Fix accessing product info from configuration
Merged 4 years ago by pingou. Opened 4 years ago by nphilipp.
fedora-infra/ nphilipp/distgit-bugzilla-sync master--fix-product-config  into  master

@@ -19,6 +19,7 @@ 

  

  from defusedxml import cElementTree as etree

  import requests

+ from xml.etree.ElementTree import ParseError

  

  

  KOJI_REPO = 'https://kojipkgs.fedoraproject.org/repos/'
@@ -32,8 +33,7 @@ 

  

  

  def download_db(name, repomd_url, archive):

-     log.info('%s Downloading file: %s to %s' % (

-         name.ljust(12), repomd_url, archive))

+     log.info('%-12s Downloading file: %s to %s', name, repomd_url, archive)

      response = requests.get(repomd_url, verify=True)

      with open(archive, 'wb') as stream:

          stream.write(response.content)
@@ -41,7 +41,7 @@ 

  

  def decompress_db(name, archive, location):

      ''' Decompress the given archive at the specified location. '''

-     log.info('%s Extracting %s to %s' % (name.ljust(12), archive, location))

+     log.info('%-12s Extracting %s to %s', name, archive, location)

      if archive.endswith('.xz'):

          import lzma

          with contextlib.closing(lzma.LZMAFile(archive)) as stream_xz:
@@ -99,32 +99,39 @@ 

      repomd_url = url + '/repomd.xml'

      response = requests.get(repomd_url, verify=True)

      if not bool(response):

-         print('%s !! Failed to get %r %r' % (

-             name.ljust(12), repomd_url, response))

+         log.warning('%-12s !! Failed to get %s %s', name, repomd_url, response)

          return

  

-     # Parse the xml doc and get a list of locations and their shasum.

-     files = ((

-         node.find('repo:location', repomd_xml_namespace),

-         node.find('repo:open-checksum', repomd_xml_namespace),

-     ) for node in etree.fromstring(response.text))

+     try:

+         root = etree.fromstring(response.text)

+     except ParseError:

+         log.warning('%-12s !! Failed to parse %s %s', name, repomd_url, response)

+         return

  

-     # Extract out the attributes that we're really interested in.

-     files = (

-         (f.attrib['href'].replace('repodata/', ''), s.text, s.attrib['type'])

-         for f, s in files if f is not None and s is not None

-     )

+     data_nodes = list(root.findall('repo:data[@type="primary"]', repomd_xml_namespace))

+     if not data_nodes:

+         log.debug('No primary.xml could be found in %s', url)

+         return

+     elif len(data_nodes) > 1:

+         log.debug("More than one primary.xml could be found in %s", url)

+         return

  

-     # Filter down to only the primary.xml files

-     files = list((f, s, t) for f, s, t in files if 'primary.xml' in f)

+     primary_node = data_nodes[0]

  

-     if not files:

-         log.debug('No primary.xml could be found in %s' % url)

-     elif len(files) > 1:

-         log.debug("More than one primary.xml could be found in %s" % url)

+     location_node = primary_node.find('repo:location', repomd_xml_namespace)

+     if location_node is None or 'href' not in location_node.attrib:

+         log.debug('No valid location found for primary.xml in %s', url)

          return

  

-     filename, shasum, shatype = files[0]

+     cksuminfo_node = primary_node.find('repo:open-checksum', repomd_xml_namespace)

+     if cksuminfo_node is None or 'type' not in cksuminfo_node.attrib:

+         log.debug('No valid checksum information found for primary.xml in %s', url)

+         return

+ 

+     filename = location_node.attrib['href'].replace('repodata/', '')

+     hash_digest = cksuminfo_node.text

+     hash_type = cksuminfo_node.attrib['type']

+ 

      repomd_url = url + '/' + filename

  

      # First, determine if the file has changed by comparing hash
@@ -132,8 +139,8 @@ 

  

      # Have we downloaded this before?  Did it change?

      destfile = os.path.join(destfolder, db)

-     if not needs_update(destfile, shasum, shatype):

-         log.debug('%s No change of %s' % (name.ljust(12), repomd_url))

+     if not needs_update(destfile, hash_digest, hash_type):

+         log.debug('%s No change of %s', name.ljust(12), repomd_url)

      else:

          # If it has changed, then download it and move it into place.

          archive = os.path.join(destfolder, filename)

file modified
+32 -38
@@ -162,43 +162,41 @@ 

              # Old API -- in python-bugzilla.  But with current server, this

              # gives ProxyError

              for collection, product in self.config["products"].items():

-                 self.product_cache[collection] = self.server.getcomponentsdetails(product)

+                 bz_product_name = product.get('bz_product_name', collection)

+                 self.product_cache[collection] = self.server.getcomponentsdetails(bz_product_name)

          elif self.config['bugzilla']['compat_api'] == 'component.get':

              # Way that's undocumented in the partner-bugzilla api but works

              # currently

              for collection, product in self.config["products"].items():

- 

+                 bz_product_name = product.get('bz_product_name', collection)

                  # restrict the list of info returned to only the packages of

                  # interest

                  pkglist = [

                      project["name"]

                      for project in pagure_projects

-                     if product in project["products"]

+                     if bz_product_name in project["products"]

                  ]

-                 products = {}

+                 product_info_by_pkg = {}

                  for pkg_segment in segment(pkglist, self.config['bugzilla']['req_segment']):

                      # Format that bugzilla will understand.  Strip None's that

                      # segment() pads out the final data segment() with

                      query = [

-                         dict(

-                             product=self.config['products'][collection],

-                             component=p

-                         )

+                         {'product': bz_product_name, 'component': p}

                          for p in pkg_segment

                          if p is not None

                      ]

-                     raw_data = self.server._proxy.Component.get(dict(names=query))

+                     raw_data = self.server._proxy.Component.get({'names': query})

                      for package in raw_data['components']:

                          # Reformat data to be the same as what's returned from

                          # getcomponentsdetails

-                         product = dict(

-                             initialowner=package['default_assignee'],

-                             description=package['description'],

-                             initialqacontact=package['default_qa_contact'],

-                             initialcclist=package['default_cc']

-                         )

-                         products[package['name'].lower()] = product

-                 self.product_cache[collection] = products

+                         product_info = {

+                             'initialowner': package['default_assignee'],

+                             'description': package['description'],

+                             'initialqacontact': package['default_qa_contact'],

+                             'initialcclist': package['default_cc'],

+                         }

+                         product_info_by_pkg[package['name'].lower()] = product_info

+                 self.product_cache[collection] = product_info_by_pkg

  

      def invert_user_cache(self):

          """ Takes the user_cache built when querying FAS and invert it so
@@ -314,6 +312,8 @@ 

              e.args = ('ProtocolError', e.errcode, e.errmsg)

              raise

  

+         bz_product_name = self.config['products'][collection].get('bz_product_name', collection)

+ 

          # Set the qacontact_email and name

          default_qa_contact_email = self.config['default_qa_contact_email']

          default_qa_contact = f"<default: {default_qa_contact_email.split('@', 1)[0]}@...>"
@@ -346,12 +346,11 @@ 

                          data['initialcclist'] = initial_cc_emails

                          break

  

-             data["is_active"] = not retired

- 

              if data:

                  # Changes occurred.  Submit a request to change via xmlrpc

-                 data['product'] = self.config['products'][collection]

+                 data['product'] = bz_product_name

                  data['component'] = package

+                 data["is_active"] = not retired

  

                  if self.config["verbose"]:

                      print(f'[EDITCOMP] {data["product"]}/{data["component"]}')
@@ -414,16 +413,12 @@ 

                          new_poc=owner_email,

                          prev_poc=product[pkg_key]['initialowner'],

                          name=package,

-                         product=self.config['products'][collection],

+                         product=bz_product_name,

                      )

              else:

                  if self.config.get("print-no-change"):

-                     bz_product_name = self.config['products'][collection].get(

-                         'bz_product_name', collection

-                     )

                      print(f"[NOCHANGE] {package}/{bz_product_name}")

          else:

-             bz_product_name = self.config['products'][collection].get('bz_product_name', collection)

              if retired:

                  if self.config['verbose']:

                      print(f"[NOADD] {bz_product_name}/{package} (is retired)")
@@ -473,10 +468,10 @@ 

      :return: a list of the repo's branches

      """

      branches_url = '{0}component-branches/'.format(env['pdc_url'])

-     params = dict(

-         global_component=repo['name'],

-         type=env['pdc_types'][repo['namespace']]

-     )

+     params = {

+         'global_component': repo['name'],

+         'type': env['pdc_types'][repo['namespace']],

+     }

      if config["verbose"]:

          print('Querying {0} {1}'.format(branches_url, params))

      rv = session.get(branches_url, params=params, timeout=60)
@@ -506,7 +501,6 @@ 

      _namespace_to_product = None

      _product_to_branch_regex = None

      _branch_regex_to_product = None

-     errors = collections.defaultdict(list)

  

      def send_email(self, from_address, to_address, subject, message, cc_address=None):

          '''Send an email if there's an error.
@@ -656,16 +650,16 @@ 

          # Combine and collapse those two into a single list:

          self.pagure_projects = []

          if project_list:

-             project_list = set(tuple(p.split("/", 1)) for p in project_list)

+             project_list = {tuple(p.split("/", 1)) for p in project_list}

          for namespace, entries in pagure_namespace_to_poc.items():

              for name, poc in entries.items():

                  if not project_list or (namespace, name) in project_list:

-                     self.pagure_projects.append(dict(

-                         namespace=namespace,

-                         name=name,

-                         poc=poc,

-                         watchers=pagure_namespace_to_cc[namespace][name],

-                     ))

+                     self.pagure_projects.append({

+                         'namespace': namespace,

+                         'name': name,

+                         'poc': poc,

+                         'watchers': pagure_namespace_to_cc[namespace][name],

+                     })

  

      @property

      def namespace_to_product(self):
@@ -854,7 +848,7 @@ 

          self.env["print-no-change"] = self.args.print_no_change

  

          # Non-fatal errors to alert people about

-         errors = []

+         self.errors = collections.defaultdict(list)

  

          self.session = retry_session()

  

  • Previously, the products dict mapped product names in Fedora to their
    Bugzilla counter parts. Now that all information pertaining products is
    in this dictionary, the Bugzilla product name (if differing) is in the
    bz_product_name sub-key.

  • Rename some variables to make their purpose a little more obvious, and
    not use product for two different things (names and the informational
    dictionary stored in the product cache).

  • use dict literals and set/list comprehensions

rebased onto 743b3aa

4 years ago

Should we document what f, s and t stand for here?

Didn't know this notation, for me {} were for dictionaries, cool :)

2 new commits added

  • allow lazy expansion of log format strings
  • straighten out finding primary.xml file
4 years ago

2 new commits added

  • initialize self.errors in run()
  • don't print EDITCOMP messages for every package
4 years ago

Didn't know this notation, for me {} were for dictionaries, cool :)

{} is still an empty dictionary, an empty set must be created with set(), but non-empty sets can be specified like {1, 2, 3} since Python 3.1 and set comprehensions {x for x in iterable} are a thing since about 3.4 or so.

7 new commits added

  • initialize self.errors in run()
  • don't print EDITCOMP messages for every package
  • allow lazy expansion of log format strings
  • straighten out finding primary.xml file
  • use list and set comprehensions where appropriate
  • consistently use dictionary literals
  • fix accessing product info from configuration
4 years ago

I had to review this PR commit by commit but this is looking good to me, thanks! :)

Pull-Request has been merged by pingou

4 years ago