From 15d20a860a67f57af672b5194d5375b18897d758 Mon Sep 17 00:00:00 2001 From: Nils Philippsen Date: Nov 18 2019 15:07:27 +0000 Subject: [PATCH 1/5] convert to Python 3 (only) For Python 3, some modules got renamed or functionality was moved between modules. Also, Python 2 is as good as dead, so get rid of compat quirks and simplify some constructs. Signed-off-by: Nils Philippsen --- diff --git a/pagure-sync-bugzilla.py b/pagure-sync-bugzilla.py index b6fd6a4..e416a87 100644 --- a/pagure-sync-bugzilla.py +++ b/pagure-sync-bugzilla.py @@ -1,4 +1,4 @@ -#!/usr/bin/python -tt +#!/usr/bin/python3 -tt # -*- coding: utf-8 -*- # # Copyright © 2013-2019 Red Hat, Inc. @@ -29,7 +29,7 @@ sync information from the Pagure into bugzilla This ... script takes information about package onwership and imports it into bugzilla. ''' -from __future__ import print_function + import re import argparse import datetime @@ -38,26 +38,21 @@ import sys import os import itertools import json -import xmlrpclib -import codecs +import xmlrpc.client import smtplib import traceback import multiprocessing.pool -try: - from email.Message import Message -except ImportError: - from email.message import EmailMessage as Message +from email.message import EmailMessage import bugzilla as bugzilla_lib import dogpile.cache import requests import yaml -from six import string_types import fedora.client from fedora.client.fas2 import AccountSystem from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry +from urllib3.util import Retry env = 'staging' @@ -151,7 +146,7 @@ PDC_TYPES = { 'modules': 'module', 'container': 'container', } -INVERSE_PDC_TYPES = dict([(v, k) for k, v in PDC_TYPES.items()]) +INVERSE_PDC_TYPES = {v: k for k, v in PDC_TYPES.items()} # When querying for current info, take segments of 1000 packages a time @@ -231,7 +226,7 @@ class DataChangedError(Exception): def segment(iterable, chunk, fill=None): '''Collect data into `chunk` sized block''' args = [iter(iterable)] * chunk - return itertools.izip_longest(*args, fillvalue=fill) + return itertools.zip_longest(*args, fillvalue=fill) class ProductCache(dict): @@ -256,7 +251,7 @@ class ProductCache(dict): elif BZCOMPAPI == 'component.get': # Way that's undocumented in the partner-bugzilla api but works # currently - pkglist = projects_dict[key].keys() + pkglist = list(projects_dict[key]) products = {} for pkg_segment in segment(pkglist, BZ_PKG_SEGMENT): # Format that bugzilla will understand. Strip None's that @@ -280,7 +275,7 @@ class ProductCache(dict): return super(ProductCache, self).__getitem__(key) -class BugzillaProxy(object): +class BugzillaProxy: def __init__(self, bzServer, username, password, acls): self.bzXmlRpcServer = bzServer @@ -355,11 +350,11 @@ class BugzillaProxy(object): # Lookup product try: product = self.productCache[collection] - except xmlrpclib.Fault as e: + except xmlrpc.client.Fault as e: # Output something useful in args e.args = (e.faultCode, e.faultString) raise - except xmlrpclib.ProtocolError as e: + except xmlrpc.client.ProtocolError as e: e.args = ('ProtocolError', e.errcode, e.errmsg) raise @@ -411,11 +406,11 @@ class BugzillaProxy(object): if not DRYRUN: try: self.server.editcomponent(data) - except xmlrpclib.Fault as e: + except xmlrpc.client.Fault as e: # Output something useful in args e.args = (data, e.faultCode, e.faultString) raise - except xmlrpclib.ProtocolError as e: + except xmlrpc.client.ProtocolError as e: e.args = ('ProtocolError', e.errcode, e.errmsg) raise else: @@ -441,7 +436,7 @@ class BugzillaProxy(object): if not DRYRUN: try: self.server.addcomponent(data) - except xmlrpclib.Fault as e: + except xmlrpc.client.Fault as e: # Output something useful in args e.args = (data, e.faultCode, e.faultString) raise @@ -456,7 +451,7 @@ def send_email(fromAddress, toAddress, subject, message, ccAddress=None): # Send no email in staging... pass else: - msg = Message() + msg = EmailMessage() msg.add_header('To', ','.join(toAddress)) msg.add_header('From', fromAddress) msg.add_header('Subject', subject) @@ -638,7 +633,7 @@ def _to_legacy_schema(product_and_project, session=None): # Check if the Bugzilla ticket assignee has been manually overridden override_yaml = _get_override_yaml(project) if override_yaml.get(product) \ - and isinstance(override_yaml[product], string_types): + and isinstance(override_yaml[product], str): owner = override_yaml[product] return { @@ -661,8 +656,6 @@ def _to_legacy_schema(product_and_project, session=None): if __name__ == '__main__': - sys.stdout = codecs.getwriter('utf-8')(sys.stdout) - parser = argparse.ArgumentParser( description='Script syncing information between Pagure and bugzilla' ) @@ -775,21 +768,21 @@ if __name__ == '__main__': # Initialize the connection to bugzilla bugzilla = BugzillaProxy(BZSERVER, BZUSER, BZPASS, projects_dict) - for product in projects_dict.keys(): + for product, pkgs in projects_dict.items(): if product not in PRODUCTS: continue - for pkg in sorted(projects_dict[product]): + for pkgname, pkginfo in sorted(projects_dict[product].items(), + key=lambda x: x[0]): if VERBOSE: - print("Assesssing bugzilla status for %r" % pkg) - pkgInfo = projects_dict[product][pkg] + print("Assesssing bugzilla status for %r" % pkgname) try: bugzilla.add_edit_component( - pkg, + pkgname, product, - pkgInfo['owner'], - pkgInfo['summary'], - pkgInfo['qacontact'], - pkgInfo['cclist'] + pkginfo['owner'], + pkginfo['summary'], + pkginfo['qacontact'], + pkginfo['cclist'] ) except ValueError as e: # A username didn't have a bugzilla address @@ -798,15 +791,15 @@ if __name__ == '__main__': # A Package or Collection was returned via xmlrpc but wasn't # present when we tried to change it errors.append(str(e.args)) - except xmlrpclib.ProtocolError as e: + except xmlrpc.client.ProtocolError as e: # Unrecoverable and likely means that nothing is going to # succeed. errors.append(str(e.args)) break - except xmlrpclib.Error as e: + except xmlrpc.client.Error as e: # An error occurred in the xmlrpc call. Shouldn't happen but # we better see what it is - errors.append('%s -- %s' % (pkg, e.args[-1])) + errors.append('%s -- %s' % (pkgname, e.args[-1])) # Send notification of errors if errors: From 0109c9455ad8ab108f67c7c5d37584d1f56760df Mon Sep 17 00:00:00 2001 From: Nils Philippsen Date: Nov 18 2019 15:07:27 +0000 Subject: [PATCH 2/5] fix typo Signed-off-by: Nils Philippsen --- diff --git a/pagure-sync-bugzilla.py b/pagure-sync-bugzilla.py index e416a87..70b2e75 100644 --- a/pagure-sync-bugzilla.py +++ b/pagure-sync-bugzilla.py @@ -774,7 +774,7 @@ if __name__ == '__main__': for pkgname, pkginfo in sorted(projects_dict[product].items(), key=lambda x: x[0]): if VERBOSE: - print("Assesssing bugzilla status for %r" % pkgname) + print("Assessing bugzilla status for %r" % pkgname) try: bugzilla.add_edit_component( pkgname, From 1297522e6add6139c17294d1dae11a9ff621ef62 Mon Sep 17 00:00:00 2001 From: Nils Philippsen Date: Nov 18 2019 15:07:27 +0000 Subject: [PATCH 3/5] appease flake8 Signed-off-by: Nils Philippsen --- diff --git a/pagure-sync-bugzilla.py b/pagure-sync-bugzilla.py index 70b2e75..ed28202 100644 --- a/pagure-sync-bugzilla.py +++ b/pagure-sync-bugzilla.py @@ -30,29 +30,28 @@ This ... script takes information about package onwership and imports it into bugzilla. ''' -import re import argparse import datetime -import time -import sys -import os +from email.message import EmailMessage import itertools import json -import xmlrpc.client +import multiprocessing.pool +import os +import re import smtplib +import sys +import time import traceback -import multiprocessing.pool -from email.message import EmailMessage +import xmlrpc.client -import bugzilla as bugzilla_lib +from bugzilla import Bugzilla import dogpile.cache -import requests -import yaml import fedora.client from fedora.client.fas2 import AccountSystem - +import requests from requests.adapters import HTTPAdapter from urllib3.util import Retry +import yaml env = 'staging' @@ -282,7 +281,7 @@ class BugzillaProxy: self.username = username self.password = password - self.server = bugzilla_lib.Bugzilla( + self.server = Bugzilla( url=self.bzXmlRpcServer, user=self.username, password=self.password) @@ -588,6 +587,7 @@ def _get_pdc_branches(session, repo): data = rv.json() return [branch['name'] for branch in data['results']] + def _is_retired(product, project): branches = project['branches'] if product == 'Fedora EPEL': @@ -751,8 +751,8 @@ if __name__ == '__main__': products.add(NAMESPACE_TO_PRODUCT[project['namespace']]) project['products'] = list(products) - ## Now, we must transform the data we collected into something that PkgDB - ## would have returned + # Now, we must transform the data we collected into something that PkgDB + # would have returned p_to_legacy_schema = resilient_partial(_to_legacy_schema, session=session) items = [ (product, project) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..0749e7b --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[flake8] +show-source = True +max-line-length = 100 +exclude = .git,.tox,dist,*egg,build,tools +#ignore = + +# Configure flake8-import-order +#application-import-names = +import-order-style = google From 79e44b2953bed761676c209baa827e27435e1d09 Mon Sep 17 00:00:00 2001 From: Nils Philippsen Date: Nov 18 2019 15:07:27 +0000 Subject: [PATCH 4/5] wrap main code path in its own function This lets us use it as a console_scripts entry point. Signed-off-by: Nils Philippsen --- diff --git a/pagure-sync-bugzilla.py b/pagure-sync-bugzilla.py index ed28202..8f379c6 100644 --- a/pagure-sync-bugzilla.py +++ b/pagure-sync-bugzilla.py @@ -522,7 +522,7 @@ def notify_users(errors): @cache.cache_on_arguments() -def _get_override_yaml(project): +def _get_override_yaml(project, session): pagure_override_url = '{0}/{1}/raw/master/f/{2}/{3}'.format( PAGUREURL.rstrip('/'), BUGZILLA_OVERRIDE_REPO, project['namespace'], project['name']) @@ -631,7 +631,7 @@ def _to_legacy_schema(product_and_project, session=None): owner = 'orphan' # Check if the Bugzilla ticket assignee has been manually overridden - override_yaml = _get_override_yaml(project) + override_yaml = _get_override_yaml(project, session) if override_yaml.get(product) \ and isinstance(override_yaml[product], str): owner = override_yaml[product] @@ -655,7 +655,10 @@ def _to_legacy_schema(product_and_project, session=None): } -if __name__ == '__main__': +def main(): + """The entrypoint to the script.""" + global VERBOSE, DRYRUN, projects_dict + parser = argparse.ArgumentParser( description='Script syncing information between Pagure and bugzilla' ) @@ -817,3 +820,7 @@ if __name__ == '__main__': json.dump({}, stream) sys.exit(0) + + +if __name__ == '__main__': + main() From b8e37f62fc81caf937074af2501903fdb0e71e39 Mon Sep 17 00:00:00 2001 From: Nils Philippsen Date: Nov 19 2019 10:24:35 +0000 Subject: [PATCH 5/5] add setup.py and related files Signed-off-by: Nils Philippsen --- diff --git a/pagure-sync-bugzilla.py b/pagure-sync-bugzilla.py deleted file mode 100644 index 8f379c6..0000000 --- a/pagure-sync-bugzilla.py +++ /dev/null @@ -1,826 +0,0 @@ -#!/usr/bin/python3 -tt -# -*- coding: utf-8 -*- -# -# Copyright © 2013-2019 Red Hat, Inc. -# -# This copyrighted material is made available to anyone wishing to use, modify, -# copy, or redistribute it subject to the terms and conditions of the GNU -# General Public License v.2, or (at your option) any later version. This -# program is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY expressed or implied, including the implied warranties of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. You should have received a copy of the GNU -# General Public License along with this program; if not, write to the Free -# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the source -# code or documentation are not subject to the GNU General Public License and -# may only be used or replicated with the express permission of Red Hat, Inc. -# -# Red Hat Author(s): Toshio Kuratomi -# Author(s): Mike Watters -# Author(s): Pierre-Yves Chibon -# Author(s): Matt Prahl -# Author(s): Ralph Bean -# -''' -sync information from the Pagure into bugzilla - -This ... script takes information about package onwership and imports it -into bugzilla. -''' - -import argparse -import datetime -from email.message import EmailMessage -import itertools -import json -import multiprocessing.pool -import os -import re -import smtplib -import sys -import time -import traceback -import xmlrpc.client - -from bugzilla import Bugzilla -import dogpile.cache -import fedora.client -from fedora.client.fas2 import AccountSystem -import requests -from requests.adapters import HTTPAdapter -from urllib3.util import Retry -import yaml - - -env = 'staging' - -cache = dogpile.cache.make_region().configure( - 'dogpile.cache.memory', - expiration_time=3600, -) - - -def retry_session(): - session = requests.Session() - retry = Retry( - total=5, - read=5, - connect=5, - backoff_factor=0.3, - status_forcelist=(500, 502, 504), - ) - adapter = HTTPAdapter(max_retries=retry) - session.mount('http://', adapter) - session.mount('https://', adapter) - return session - - -if env == 'staging': - BZSERVER = 'https://partner-bugzilla.redhat.com' -else: - BZSERVER = 'https://bugzilla.redhat.com' - -BZUSER = '{{ bugzilla_user }}' -BZPASS = '{{ bugzilla_password }}' -BZCOMPAPI = 'component.get' -FASUSER = '{{ fedorathirdpartyUser }}' -FASPASS = '{{ fedorathirdpartyPassword }}' -BUGZILLA_OVERRIDE_REPO = 'releng/fedora-scm-requests' -NOTIFYEMAIL = [ - 'kevin@fedoraproject.org', - 'pingou@fedoraproject.org', -] -VERBOSE = False -DRYRUN = False - -if env == 'staging': - FASURL = 'https://admin.stg.fedoraproject.org/accounts' - FASINSECURE = True - PAGUREURL = 'https://stg.pagure.io' - PAGURE_DIST_GIT_URL = 'https://src.stg.fedoraproject.org' - PDCURL = 'https://pdc.stg.fedoraproject.org/rest_api/v1/' - MDAPIURL = 'https://apps.stg.fedoraproject.org/mdapi/' -else: - FASURL = 'https://admin.fedoraproject.org/accounts' - FASINSECURE = False - PAGUREURL = 'https://pagure.io' - PAGURE_DIST_GIT_URL = 'https://src.fedoraproject.org' - PDCURL = 'https://pdc.fedoraproject.org/rest_api/v1/' - MDAPIURL = 'https://apps.fedoraproject.org/mdapi/' - - -EMAIL_FROM = 'accounts@fedoraproject.org' -DATA_CACHE = '/var/tmp/pagure_sync_bz.json' - -PRODUCTS = { - 'Fedora': 'Fedora', - 'Fedora Container': 'Fedora Container Images', - 'Fedora Modules': 'Fedora Modules', - 'Fedora EPEL': 'Fedora EPEL', -} - -NAMESPACE_TO_PRODUCT = { - 'rpms': 'Fedora', # except EPEL... - 'container': 'Fedora Container', - 'modules': 'Fedora Modules', -} - -# This maps bugzilla products to "lead" branches in PDC. If the lead branch is -# retired, then we in turn set the default assignee to "orphan" for all new bugs -# in the given product. -PRODUCTS_TO_LEAD_BRANCH = { - # If rawhide is retired, then all new bugs go to orphan for Fedora. - 'Fedora': 'master', - # Same for containers. - 'Fedora Container': 'master', - # Same for modules. - 'Fedora Modules': 'master', - # If epel7 is retired, then all new epel bugs go to orphan. - 'Fedora EPEL': 'epel7', -} -PDC_TYPES = { - 'rpms': 'rpm', - 'modules': 'module', - 'container': 'container', -} -INVERSE_PDC_TYPES = {v: k for k, v in PDC_TYPES.items()} - - -# When querying for current info, take segments of 1000 packages a time -BZ_PKG_SEGMENT = 1000 - - -TMPL_EMAIL_ADMIN = ''' -The following errors were encountered while updating bugzilla with information -from the Package Database. Please have the problems taken care of: - -%s -''' - -# PkgDB sync bugzilla email -PKGDB_SYNC_BUGZILLA_EMAIL = """Greetings. - -You are receiving this email because there's a problem with your -bugzilla.redhat.com account. - -If you recently changed the email address associated with your -Fedora account in the Fedora Account System, it is now out of sync -with your bugzilla.redhat.com account. This leads to problems -with Fedora packages you own or are CC'ed on bug reports for. - -Please take one of the following actions: - -a) login to your old bugzilla.redhat.com account and change the email -address to match your current email in the Fedora account system. -https://bugzilla.redhat.com login, click preferences, account -information and enter new email address. - -b) Create a new account in bugzilla.redhat.com to match your -email listed in your Fedora account system account. -https://bugzilla.redhat.com/ click 'new account' and enter email -address. - -c) Change your Fedora Account System email to match your existing -bugzilla.redhat.com account. -https://admin.fedoraproject.org/accounts login, click on 'my account', -then 'edit' and change your email address. - -If you have questions or concerns, please let us know. - -Your prompt attention in this matter is appreciated. - -The Fedora admins. -""" - - -def resilient_partial(fn, *initial, **kwargs): - """ A decorator that partially applies arguments. - - It additionally catches all raised exceptions, prints them, but then returns - None instead of propagating the failures. - - This is used to protect functions used in a threadpool. If one fails, we - want to know about it, but we don't want it to kill the whole program. So - catch its error, log it, but proceed. - """ - def wrapper(*additional): - try: - full = initial + additional - return fn(*full, **kwargs) - except Exception: - traceback.print_exc() - return None - wrapper.__name__ = fn.__name__ - wrapper.__doc__ = fn.__doc__ - return wrapper - - -class DataChangedError(Exception): - '''Raised when data we are manipulating changes while we're modifying it.''' - pass - - -def segment(iterable, chunk, fill=None): - '''Collect data into `chunk` sized block''' - args = [iter(iterable)] * chunk - return itertools.zip_longest(*args, fillvalue=fill) - - -class ProductCache(dict): - def __init__(self, bz, acls): - self.bz = bz - self.acls = acls - - # Ask bugzilla for a section of the pkglist. - # Save the information from the section that we want. - def __getitem__(self, key): - try: - return super(ProductCache, self).__getitem__(key) - except KeyError: - # We can only cache products we have pagure information for - if key not in self.acls: - raise - - if BZCOMPAPI == 'getcomponentsdetails': - # Old API -- in python-bugzilla. But with current server, this - # gives ProxyError - products = self.bz.getcomponentsdetails(key) - elif BZCOMPAPI == 'component.get': - # Way that's undocumented in the partner-bugzilla api but works - # currently - pkglist = list(projects_dict[key]) - products = {} - for pkg_segment in segment(pkglist, BZ_PKG_SEGMENT): - # Format that bugzilla will understand. Strip None's that - # segment() pads out the final data segment() with - query = [ - dict(product=PRODUCTS[key], component=p) - for p in pkg_segment if p is not None - ] - raw_data = self.bz._proxy.Component.get(dict(names=query)) - for package in raw_data['components']: - # Reformat data to be the same as what's returned from - # getcomponentsdetails - product = dict( - initialowner=package['default_assignee'], - description=package['description'], - initialqacontact=package['default_qa_contact'], - initialcclist=package['default_cc']) - products[package['name'].lower()] = product - self[key] = products - - return super(ProductCache, self).__getitem__(key) - - -class BugzillaProxy: - - def __init__(self, bzServer, username, password, acls): - self.bzXmlRpcServer = bzServer - self.username = username - self.password = password - - self.server = Bugzilla( - url=self.bzXmlRpcServer, - user=self.username, - password=self.password) - self.productCache = ProductCache(self.server, acls) - - # Connect to the fedora account system - self.fas = AccountSystem( - base_url=FASURL, - username=FASUSER, - password=FASPASS) - - try: - self.userCache = self.fas.people_by_key( - key='username', - fields=['bugzilla_email']) - except fedora.client.ServerError: - # Sometimes, building the userCache up front fails with a timeout. - # It's ok, we build the cache as-needed later in the script. - self.userCache = {} - - def _get_bugzilla_email(self, username): - '''Return the bugzilla email address for a user. - - First looks in a cache for a username => bugzilla email. If not found, - reloads the cache from fas and tries again. - ''' - try: - return self.userCache[username]['bugzilla_email'].lower() - except KeyError: - if username.startswith('@'): - group = self.fas.group_by_name(username[1:]) - self.userCache[username] = { - 'bugzilla_email': group.mailing_list} - else: - person = self.fas.person_by_username(username) - bz_email = person.get('bugzilla_email', None) - if bz_email is None: - print('%s has no bugzilla email, valid account?' - % username) - else: - self.userCache[username] = {'bugzilla_email': bz_email} - return self.userCache[username]['bugzilla_email'].lower() - - def add_edit_component(self, package, collection, owner, description=None, - qacontact=None, cclist=None): - '''Add or update a component to have the values specified. - ''' - # Turn the cclist into something usable by bugzilla - if not cclist or 'people' not in cclist: - initialCCList = list() - else: - initialCCList = [ - self._get_bugzilla_email(cc) for cc in cclist['people']] - if 'groups' in cclist: - group_cc = [ - self._get_bugzilla_email(cc) for cc in cclist['groups']] - initialCCList.extend(group_cc) - - # Add owner to the cclist so comaintainers taking over a bug don't - # have to do this manually - owner = self._get_bugzilla_email(owner) - if owner not in initialCCList: - initialCCList.append(owner) - - # Lookup product - try: - product = self.productCache[collection] - except xmlrpc.client.Fault as e: - # Output something useful in args - e.args = (e.faultCode, e.faultString) - raise - except xmlrpc.client.ProtocolError as e: - e.args = ('ProtocolError', e.errcode, e.errmsg) - raise - - pkgKey = package.lower() - if pkgKey in product: - # edit the package information - data = {} - - # Grab bugzilla email for things changable via xmlrpc - if qacontact: - qacontact = self._get_bugzilla_email(qacontact) - else: - qacontact = 'extras-qa@fedoraproject.org' - - # Check for changes to the owner, qacontact, or description - if product[pkgKey]['initialowner'] != owner: - data['initialowner'] = owner - - if description and product[pkgKey]['description'] != description: - data['description'] = description - if product[pkgKey]['initialqacontact'] != qacontact and ( - qacontact or product[pkgKey]['initialqacontact']): - data['initialqacontact'] = qacontact - - if len(product[pkgKey]['initialcclist']) != len(initialCCList): - data['initialcclist'] = initialCCList - else: - for ccMember in product[pkgKey]['initialcclist']: - if ccMember not in initialCCList: - data['initialcclist'] = initialCCList - break - - if data: - # FIXME: initialowner has been made mandatory for some - # reason. Asking dkl why. - data['initialowner'] = owner - - # Changes occurred. Submit a request to change via xmlrpc - data['product'] = PRODUCTS[collection] - data['component'] = package - if VERBOSE: - print('[EDITCOMP] Changing via editComponent(' - '%s, %s, "xxxxx")' % (data, self.username)) - print('[EDITCOMP] Former values: %s|%s|%s|%s' % ( - product[pkgKey]['initialowner'], - product[pkgKey]['description'], - product[pkgKey]['initialqacontact'], - product[pkgKey]['initialcclist'])) - if not DRYRUN: - try: - self.server.editcomponent(data) - except xmlrpc.client.Fault as e: - # Output something useful in args - e.args = (data, e.faultCode, e.faultString) - raise - except xmlrpc.client.ProtocolError as e: - e.args = ('ProtocolError', e.errcode, e.errmsg) - raise - else: - # Add component - if qacontact: - qacontact = self._get_bugzilla_email(qacontact) - else: - qacontact = 'extras-qa@fedoraproject.org' - - data = { - 'product': PRODUCTS[collection], - 'component': package, - 'description': description or 'NA', - 'initialowner': owner, - 'initialqacontact': qacontact - } - if initialCCList: - data['initialcclist'] = initialCCList - - if VERBOSE: - print('[ADDCOMP] Adding new component AddComponent:(' - '%s, %s, "xxxxx")' % (data, self.username)) - if not DRYRUN: - try: - self.server.addcomponent(data) - except xmlrpc.client.Fault as e: - # Output something useful in args - e.args = (data, e.faultCode, e.faultString) - raise - - -def send_email(fromAddress, toAddress, subject, message, ccAddress=None): - '''Send an email if there's an error. - - This will be replaced by sending messages to a log later. - ''' - if env == 'staging': - # Send no email in staging... - pass - else: - msg = EmailMessage() - msg.add_header('To', ','.join(toAddress)) - msg.add_header('From', fromAddress) - msg.add_header('Subject', subject) - if ccAddress is not None: - msg.add_header('Cc', ','.join(ccAddress)) - toAddress = toAddress + ccAddress - msg.set_payload(message) - smtp = smtplib.SMTP('bastion') - smtp.sendmail(fromAddress, toAddress, msg.as_string()) - smtp.quit() - - -def notify_users(errors): - ''' Browse the list of errors and when we can retrieve the email - address, use it to notify the user about the issue. - ''' - data = {} - if os.path.exists(DATA_CACHE): - try: - with open(DATA_CACHE) as stream: - data = json.load(stream) - except Exception as err: - print('Could not read the json file at %s: \nError: %s' % ( - DATA_CACHE, err)) - - new_data = {} - seen = [] - for error in errors: - notify_user = False - if 'The name ' in error and ' is not a valid username' in error: - user_email = error.split(' is not a valid username')[0].split( - 'The name ')[1].strip() - now = datetime.datetime.utcnow() - - # See if we already know about this user - if user_email in data and data[user_email]['last_update']: - last_update = datetime.datetime.fromtimestamp( - int(data[user_email]['last_update'])) - # Only notify users once per hour - if (now - last_update).seconds >= 3600: - notify_user = True - else: - new_data[user_email] = data[user_email] - elif not data or user_email not in data: - notify_user = True - - # Ensure we notify the user only once, no matter how many errors we - # got concerning them. - if user_email not in seen: - seen.append(user_email) - else: - notify_user = False - - if notify_user: - send_email( - EMAIL_FROM, - [user_email], - subject='Please fix your bugzilla.redhat.com account', - message=PKGDB_SYNC_BUGZILLA_EMAIL, - ccAddress=NOTIFYEMAIL, - ) - - new_data[user_email] = { - 'last_update': time.mktime(now.timetuple()) - } - - with open(DATA_CACHE, 'w') as stream: - json.dump(new_data, stream) - - -@cache.cache_on_arguments() -def _get_override_yaml(project, session): - pagure_override_url = '{0}/{1}/raw/master/f/{2}/{3}'.format( - PAGUREURL.rstrip('/'), BUGZILLA_OVERRIDE_REPO, project['namespace'], - project['name']) - - if VERBOSE: - print('Querying {0}'.format(pagure_override_url)) - override_rv = session.get(pagure_override_url, timeout=30) - if override_rv.status_code == 200: - override_yaml = yaml.load(override_rv.text) - return override_yaml.get('bugzilla_contact', {}) - return {} - - -@cache.cache_on_arguments() -def _get_package_summary_from_mdapi(namespace, repo, session=None): - summary = None - if namespace != 'rpms': - return summary - - if session is None: - session = retry_session() - - url = '{0}/rawhide/srcpkg/{1}'.format(MDAPIURL.rstrip('/'), repo) - if VERBOSE: - print('Querying {0}'.format(url)) - - rv = session.get(url, timeout=60) - if rv.ok: - rv_json = rv.json() - summary = rv_json['summary'] - elif not rv.ok and rv.status_code != 404: - error_msg = ('The connection to "{0}" failed with the status code {1} ' - 'and output "{2}"').format(url, rv.status_code, rv.text) - raise RuntimeError(error_msg) - - return summary - - -def _get_pdc_branches(session, repo): - """ - Gets the branches on a project. This function is used for mapping. - :param repo: the project dict - :return: a list of the repo's branches - """ - branches_url = '{0}component-branches/'.format(PDCURL) - params = dict( - global_component=repo['name'], - type=PDC_TYPES[repo['namespace']] - ) - if VERBOSE: - print('Querying {0} {1}'.format(branches_url, params)) - rv = session.get(branches_url, params=params, timeout=60) - - # If the project's branches can't be reported, just return no branches and - # it will be skipped later on - if not rv.ok: - print(('The connection to "{0}" failed with the status code {1} and ' - 'output "{2}"'.format(branches_url, rv.status_code, rv.text)), - file=sys.stderr) - return [] - - data = rv.json() - return [branch['name'] for branch in data['results']] - - -def _is_retired(product, project): - branches = project['branches'] - if product == 'Fedora EPEL': - for branch, active in branches: - if re.match(r'^epel\d+$', branch): - if active: - return False - # No active branches means it is retired. - return True - else: - for branch, active in branches: - if active: - return False - return True - - -def _to_legacy_schema(product_and_project, session=None): - """ - This function translates the JSON of a Pagure project to what PkgDB used to - output in the Bugzilla API. This function is used for mapping. - :param project_and_product: a tuple containing the dictionary of the JSON - of a Pagure project and a string of the product (e.g. "Fedora", - "Fedora EPEL") - :param session: a requests session object or None - :return: a dictionary of the content that the PkgDB Bugzilla API would - return - """ - product, project = product_and_project - - if session is None: - session = retry_session() - - owner = project['poc'] - watchers = project['watchers'] - - summary = _get_package_summary_from_mdapi( - project['namespace'], project['name'], session) - - # Check if the project is retired in PDC, and if so set assignee to orphan. - if _is_retired(product, project): - owner = 'orphan' - - # Check if the Bugzilla ticket assignee has been manually overridden - override_yaml = _get_override_yaml(project, session) - if override_yaml.get(product) \ - and isinstance(override_yaml[product], str): - owner = override_yaml[product] - - return { - 'cclist': { - # Groups is empty because you can't have groups watch projects. - # This is done only at the user level. - 'groups': [], - 'people': watchers, - }, - 'owner': owner, - # No package has this set in PkgDB's API, so it can be safely turned - # off and set to the defaults later on in the code - 'qacontact': None, - 'summary': summary, - # These two values are not part of original PkgDB RV, but they are - # useful - 'product': product, - 'project': project['name'] - } - - -def main(): - """The entrypoint to the script.""" - global VERBOSE, DRYRUN, projects_dict - - parser = argparse.ArgumentParser( - description='Script syncing information between Pagure and bugzilla' - ) - parser.add_argument( - '--dry-run', dest='dryrun', action='store_true', default=False, - help='Do not actually make the changes') - parser.add_argument( - '--verbose', dest='verbose', action='store_true', default=False, - help='Print actions verbosely') - parser.add_argument( - '--debug', dest='debug', action='store_true', default=False, - help='Combination of --verbose and --dry-run') - - args = parser.parse_args() - - if args.debug: - VERBOSE = True - DRYRUN = True - - if args.verbose: - VERBOSE = True - - if args.dryrun: - DRYRUN = True - - # Non-fatal errors to alert people about - errors = [] - - projects_dict = { - 'Fedora': {}, - 'Fedora Container': {}, - 'Fedora Modules': {}, - 'Fedora EPEL': {}, - } - - session = retry_session() - pool = multiprocessing.pool.ThreadPool(8) - - # Get the initial ownership and CC data from pagure - # This part is easy. - poc_url = PAGURE_DIST_GIT_URL + '/extras/pagure_poc.json' - if VERBOSE: - print("Querying %r for points of contact." % poc_url) - pagure_namespace_to_poc = session.get(poc_url, timeout=120).json() - cc_url = PAGURE_DIST_GIT_URL + '/extras/pagure_bz.json' - if VERBOSE: - print("Querying %r for initial cc list." % cc_url) - pagure_namespace_to_cc = session.get(cc_url, timeout=120).json() - - # Combine and collapse those two into a single list: - pagure_projects = [] - for namespace, entries in pagure_namespace_to_poc.items(): - for name, poc in entries.items(): - pagure_projects.append(dict( - namespace=namespace, - name=name, - poc=poc, - watchers=pagure_namespace_to_cc[namespace][name], - )) - if env == 'staging': - # Filter out any modules, since we don't have rights to create new - # components in the "Fedora Modules" project yet - pagure_projects = [ - p for p in pagure_projects if p['namespace'] != 'modules' - ] - - branches_url = PDCURL.split('rest_api')[0] + 'extras/active_branches.json' - if VERBOSE: - print("Querying %r for EOL information." % branches_url) - pdc_branches = session.get(branches_url, timeout=120).json() - for proj in pagure_projects: - if proj['namespace'] not in PDC_TYPES: - proj['branches'] = [] - if VERBOSE: - print('! Namespace {namespace} unknown to PDC, project ' - '{namespace}/{name} ignored'.format(**proj)) - continue - pdc_type = PDC_TYPES[proj['namespace']] - proj['branches'] = pdc_branches.get(pdc_type, {}).get(proj['name'], []) - if not proj['branches'] and VERBOSE: - print("! No PDC branch found for {namespace}/{name}".format(**proj)) - - # Determine what products each project maps to based on its branches. - # pagure_rpms_project_products will be in the format of - # [('python-requests': 'Fedora')...] which will be used my a mapping - # function below - for project in pagure_projects: - products = set() - for branch, active in project['branches']: - if re.match(r'^epel\d+$', branch): - products.add('Fedora EPEL') - else: - products.add(NAMESPACE_TO_PRODUCT[project['namespace']]) - project['products'] = list(products) - - # Now, we must transform the data we collected into something that PkgDB - # would have returned - p_to_legacy_schema = resilient_partial(_to_legacy_schema, session=session) - items = [ - (product, project) - for project in pagure_projects - for product in project['products'] - ] - legacy_responses = pool.map(p_to_legacy_schema, items) - for response in legacy_responses: - if not response: - continue - projects_dict[response['product']][response['project']] = response - - # Initialize the connection to bugzilla - bugzilla = BugzillaProxy(BZSERVER, BZUSER, BZPASS, projects_dict) - - for product, pkgs in projects_dict.items(): - if product not in PRODUCTS: - continue - for pkgname, pkginfo in sorted(projects_dict[product].items(), - key=lambda x: x[0]): - if VERBOSE: - print("Assessing bugzilla status for %r" % pkgname) - try: - bugzilla.add_edit_component( - pkgname, - product, - pkginfo['owner'], - pkginfo['summary'], - pkginfo['qacontact'], - pkginfo['cclist'] - ) - except ValueError as e: - # A username didn't have a bugzilla address - errors.append(str(e.args)) - except DataChangedError as e: - # A Package or Collection was returned via xmlrpc but wasn't - # present when we tried to change it - errors.append(str(e.args)) - except xmlrpc.client.ProtocolError as e: - # Unrecoverable and likely means that nothing is going to - # succeed. - errors.append(str(e.args)) - break - except xmlrpc.client.Error as e: - # An error occurred in the xmlrpc call. Shouldn't happen but - # we better see what it is - errors.append('%s -- %s' % (pkgname, e.args[-1])) - - # Send notification of errors - if errors: - if VERBOSE: - print('[DEBUG]', '\n'.join(errors)) - else: - notify_users(errors) - send_email( - EMAIL_FROM, - NOTIFYEMAIL, - 'Errors while syncing bugzilla with the PackageDB', - TMPL_EMAIL_ADMIN % ('\n'.join(errors),)) - else: - with open(DATA_CACHE, 'w') as stream: - json.dump({}, stream) - - sys.exit(0) - - -if __name__ == '__main__': - main() diff --git a/pagure_sync_bugzilla.py b/pagure_sync_bugzilla.py new file mode 100644 index 0000000..8f379c6 --- /dev/null +++ b/pagure_sync_bugzilla.py @@ -0,0 +1,826 @@ +#!/usr/bin/python3 -tt +# -*- coding: utf-8 -*- +# +# Copyright © 2013-2019 Red Hat, Inc. +# +# This copyrighted material is made available to anyone wishing to use, modify, +# copy, or redistribute it subject to the terms and conditions of the GNU +# General Public License v.2, or (at your option) any later version. This +# program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY expressed or implied, including the implied warranties of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. You should have received a copy of the GNU +# General Public License along with this program; if not, write to the Free +# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the source +# code or documentation are not subject to the GNU General Public License and +# may only be used or replicated with the express permission of Red Hat, Inc. +# +# Red Hat Author(s): Toshio Kuratomi +# Author(s): Mike Watters +# Author(s): Pierre-Yves Chibon +# Author(s): Matt Prahl +# Author(s): Ralph Bean +# +''' +sync information from the Pagure into bugzilla + +This ... script takes information about package onwership and imports it +into bugzilla. +''' + +import argparse +import datetime +from email.message import EmailMessage +import itertools +import json +import multiprocessing.pool +import os +import re +import smtplib +import sys +import time +import traceback +import xmlrpc.client + +from bugzilla import Bugzilla +import dogpile.cache +import fedora.client +from fedora.client.fas2 import AccountSystem +import requests +from requests.adapters import HTTPAdapter +from urllib3.util import Retry +import yaml + + +env = 'staging' + +cache = dogpile.cache.make_region().configure( + 'dogpile.cache.memory', + expiration_time=3600, +) + + +def retry_session(): + session = requests.Session() + retry = Retry( + total=5, + read=5, + connect=5, + backoff_factor=0.3, + status_forcelist=(500, 502, 504), + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + return session + + +if env == 'staging': + BZSERVER = 'https://partner-bugzilla.redhat.com' +else: + BZSERVER = 'https://bugzilla.redhat.com' + +BZUSER = '{{ bugzilla_user }}' +BZPASS = '{{ bugzilla_password }}' +BZCOMPAPI = 'component.get' +FASUSER = '{{ fedorathirdpartyUser }}' +FASPASS = '{{ fedorathirdpartyPassword }}' +BUGZILLA_OVERRIDE_REPO = 'releng/fedora-scm-requests' +NOTIFYEMAIL = [ + 'kevin@fedoraproject.org', + 'pingou@fedoraproject.org', +] +VERBOSE = False +DRYRUN = False + +if env == 'staging': + FASURL = 'https://admin.stg.fedoraproject.org/accounts' + FASINSECURE = True + PAGUREURL = 'https://stg.pagure.io' + PAGURE_DIST_GIT_URL = 'https://src.stg.fedoraproject.org' + PDCURL = 'https://pdc.stg.fedoraproject.org/rest_api/v1/' + MDAPIURL = 'https://apps.stg.fedoraproject.org/mdapi/' +else: + FASURL = 'https://admin.fedoraproject.org/accounts' + FASINSECURE = False + PAGUREURL = 'https://pagure.io' + PAGURE_DIST_GIT_URL = 'https://src.fedoraproject.org' + PDCURL = 'https://pdc.fedoraproject.org/rest_api/v1/' + MDAPIURL = 'https://apps.fedoraproject.org/mdapi/' + + +EMAIL_FROM = 'accounts@fedoraproject.org' +DATA_CACHE = '/var/tmp/pagure_sync_bz.json' + +PRODUCTS = { + 'Fedora': 'Fedora', + 'Fedora Container': 'Fedora Container Images', + 'Fedora Modules': 'Fedora Modules', + 'Fedora EPEL': 'Fedora EPEL', +} + +NAMESPACE_TO_PRODUCT = { + 'rpms': 'Fedora', # except EPEL... + 'container': 'Fedora Container', + 'modules': 'Fedora Modules', +} + +# This maps bugzilla products to "lead" branches in PDC. If the lead branch is +# retired, then we in turn set the default assignee to "orphan" for all new bugs +# in the given product. +PRODUCTS_TO_LEAD_BRANCH = { + # If rawhide is retired, then all new bugs go to orphan for Fedora. + 'Fedora': 'master', + # Same for containers. + 'Fedora Container': 'master', + # Same for modules. + 'Fedora Modules': 'master', + # If epel7 is retired, then all new epel bugs go to orphan. + 'Fedora EPEL': 'epel7', +} +PDC_TYPES = { + 'rpms': 'rpm', + 'modules': 'module', + 'container': 'container', +} +INVERSE_PDC_TYPES = {v: k for k, v in PDC_TYPES.items()} + + +# When querying for current info, take segments of 1000 packages a time +BZ_PKG_SEGMENT = 1000 + + +TMPL_EMAIL_ADMIN = ''' +The following errors were encountered while updating bugzilla with information +from the Package Database. Please have the problems taken care of: + +%s +''' + +# PkgDB sync bugzilla email +PKGDB_SYNC_BUGZILLA_EMAIL = """Greetings. + +You are receiving this email because there's a problem with your +bugzilla.redhat.com account. + +If you recently changed the email address associated with your +Fedora account in the Fedora Account System, it is now out of sync +with your bugzilla.redhat.com account. This leads to problems +with Fedora packages you own or are CC'ed on bug reports for. + +Please take one of the following actions: + +a) login to your old bugzilla.redhat.com account and change the email +address to match your current email in the Fedora account system. +https://bugzilla.redhat.com login, click preferences, account +information and enter new email address. + +b) Create a new account in bugzilla.redhat.com to match your +email listed in your Fedora account system account. +https://bugzilla.redhat.com/ click 'new account' and enter email +address. + +c) Change your Fedora Account System email to match your existing +bugzilla.redhat.com account. +https://admin.fedoraproject.org/accounts login, click on 'my account', +then 'edit' and change your email address. + +If you have questions or concerns, please let us know. + +Your prompt attention in this matter is appreciated. + +The Fedora admins. +""" + + +def resilient_partial(fn, *initial, **kwargs): + """ A decorator that partially applies arguments. + + It additionally catches all raised exceptions, prints them, but then returns + None instead of propagating the failures. + + This is used to protect functions used in a threadpool. If one fails, we + want to know about it, but we don't want it to kill the whole program. So + catch its error, log it, but proceed. + """ + def wrapper(*additional): + try: + full = initial + additional + return fn(*full, **kwargs) + except Exception: + traceback.print_exc() + return None + wrapper.__name__ = fn.__name__ + wrapper.__doc__ = fn.__doc__ + return wrapper + + +class DataChangedError(Exception): + '''Raised when data we are manipulating changes while we're modifying it.''' + pass + + +def segment(iterable, chunk, fill=None): + '''Collect data into `chunk` sized block''' + args = [iter(iterable)] * chunk + return itertools.zip_longest(*args, fillvalue=fill) + + +class ProductCache(dict): + def __init__(self, bz, acls): + self.bz = bz + self.acls = acls + + # Ask bugzilla for a section of the pkglist. + # Save the information from the section that we want. + def __getitem__(self, key): + try: + return super(ProductCache, self).__getitem__(key) + except KeyError: + # We can only cache products we have pagure information for + if key not in self.acls: + raise + + if BZCOMPAPI == 'getcomponentsdetails': + # Old API -- in python-bugzilla. But with current server, this + # gives ProxyError + products = self.bz.getcomponentsdetails(key) + elif BZCOMPAPI == 'component.get': + # Way that's undocumented in the partner-bugzilla api but works + # currently + pkglist = list(projects_dict[key]) + products = {} + for pkg_segment in segment(pkglist, BZ_PKG_SEGMENT): + # Format that bugzilla will understand. Strip None's that + # segment() pads out the final data segment() with + query = [ + dict(product=PRODUCTS[key], component=p) + for p in pkg_segment if p is not None + ] + raw_data = self.bz._proxy.Component.get(dict(names=query)) + for package in raw_data['components']: + # Reformat data to be the same as what's returned from + # getcomponentsdetails + product = dict( + initialowner=package['default_assignee'], + description=package['description'], + initialqacontact=package['default_qa_contact'], + initialcclist=package['default_cc']) + products[package['name'].lower()] = product + self[key] = products + + return super(ProductCache, self).__getitem__(key) + + +class BugzillaProxy: + + def __init__(self, bzServer, username, password, acls): + self.bzXmlRpcServer = bzServer + self.username = username + self.password = password + + self.server = Bugzilla( + url=self.bzXmlRpcServer, + user=self.username, + password=self.password) + self.productCache = ProductCache(self.server, acls) + + # Connect to the fedora account system + self.fas = AccountSystem( + base_url=FASURL, + username=FASUSER, + password=FASPASS) + + try: + self.userCache = self.fas.people_by_key( + key='username', + fields=['bugzilla_email']) + except fedora.client.ServerError: + # Sometimes, building the userCache up front fails with a timeout. + # It's ok, we build the cache as-needed later in the script. + self.userCache = {} + + def _get_bugzilla_email(self, username): + '''Return the bugzilla email address for a user. + + First looks in a cache for a username => bugzilla email. If not found, + reloads the cache from fas and tries again. + ''' + try: + return self.userCache[username]['bugzilla_email'].lower() + except KeyError: + if username.startswith('@'): + group = self.fas.group_by_name(username[1:]) + self.userCache[username] = { + 'bugzilla_email': group.mailing_list} + else: + person = self.fas.person_by_username(username) + bz_email = person.get('bugzilla_email', None) + if bz_email is None: + print('%s has no bugzilla email, valid account?' + % username) + else: + self.userCache[username] = {'bugzilla_email': bz_email} + return self.userCache[username]['bugzilla_email'].lower() + + def add_edit_component(self, package, collection, owner, description=None, + qacontact=None, cclist=None): + '''Add or update a component to have the values specified. + ''' + # Turn the cclist into something usable by bugzilla + if not cclist or 'people' not in cclist: + initialCCList = list() + else: + initialCCList = [ + self._get_bugzilla_email(cc) for cc in cclist['people']] + if 'groups' in cclist: + group_cc = [ + self._get_bugzilla_email(cc) for cc in cclist['groups']] + initialCCList.extend(group_cc) + + # Add owner to the cclist so comaintainers taking over a bug don't + # have to do this manually + owner = self._get_bugzilla_email(owner) + if owner not in initialCCList: + initialCCList.append(owner) + + # Lookup product + try: + product = self.productCache[collection] + except xmlrpc.client.Fault as e: + # Output something useful in args + e.args = (e.faultCode, e.faultString) + raise + except xmlrpc.client.ProtocolError as e: + e.args = ('ProtocolError', e.errcode, e.errmsg) + raise + + pkgKey = package.lower() + if pkgKey in product: + # edit the package information + data = {} + + # Grab bugzilla email for things changable via xmlrpc + if qacontact: + qacontact = self._get_bugzilla_email(qacontact) + else: + qacontact = 'extras-qa@fedoraproject.org' + + # Check for changes to the owner, qacontact, or description + if product[pkgKey]['initialowner'] != owner: + data['initialowner'] = owner + + if description and product[pkgKey]['description'] != description: + data['description'] = description + if product[pkgKey]['initialqacontact'] != qacontact and ( + qacontact or product[pkgKey]['initialqacontact']): + data['initialqacontact'] = qacontact + + if len(product[pkgKey]['initialcclist']) != len(initialCCList): + data['initialcclist'] = initialCCList + else: + for ccMember in product[pkgKey]['initialcclist']: + if ccMember not in initialCCList: + data['initialcclist'] = initialCCList + break + + if data: + # FIXME: initialowner has been made mandatory for some + # reason. Asking dkl why. + data['initialowner'] = owner + + # Changes occurred. Submit a request to change via xmlrpc + data['product'] = PRODUCTS[collection] + data['component'] = package + if VERBOSE: + print('[EDITCOMP] Changing via editComponent(' + '%s, %s, "xxxxx")' % (data, self.username)) + print('[EDITCOMP] Former values: %s|%s|%s|%s' % ( + product[pkgKey]['initialowner'], + product[pkgKey]['description'], + product[pkgKey]['initialqacontact'], + product[pkgKey]['initialcclist'])) + if not DRYRUN: + try: + self.server.editcomponent(data) + except xmlrpc.client.Fault as e: + # Output something useful in args + e.args = (data, e.faultCode, e.faultString) + raise + except xmlrpc.client.ProtocolError as e: + e.args = ('ProtocolError', e.errcode, e.errmsg) + raise + else: + # Add component + if qacontact: + qacontact = self._get_bugzilla_email(qacontact) + else: + qacontact = 'extras-qa@fedoraproject.org' + + data = { + 'product': PRODUCTS[collection], + 'component': package, + 'description': description or 'NA', + 'initialowner': owner, + 'initialqacontact': qacontact + } + if initialCCList: + data['initialcclist'] = initialCCList + + if VERBOSE: + print('[ADDCOMP] Adding new component AddComponent:(' + '%s, %s, "xxxxx")' % (data, self.username)) + if not DRYRUN: + try: + self.server.addcomponent(data) + except xmlrpc.client.Fault as e: + # Output something useful in args + e.args = (data, e.faultCode, e.faultString) + raise + + +def send_email(fromAddress, toAddress, subject, message, ccAddress=None): + '''Send an email if there's an error. + + This will be replaced by sending messages to a log later. + ''' + if env == 'staging': + # Send no email in staging... + pass + else: + msg = EmailMessage() + msg.add_header('To', ','.join(toAddress)) + msg.add_header('From', fromAddress) + msg.add_header('Subject', subject) + if ccAddress is not None: + msg.add_header('Cc', ','.join(ccAddress)) + toAddress = toAddress + ccAddress + msg.set_payload(message) + smtp = smtplib.SMTP('bastion') + smtp.sendmail(fromAddress, toAddress, msg.as_string()) + smtp.quit() + + +def notify_users(errors): + ''' Browse the list of errors and when we can retrieve the email + address, use it to notify the user about the issue. + ''' + data = {} + if os.path.exists(DATA_CACHE): + try: + with open(DATA_CACHE) as stream: + data = json.load(stream) + except Exception as err: + print('Could not read the json file at %s: \nError: %s' % ( + DATA_CACHE, err)) + + new_data = {} + seen = [] + for error in errors: + notify_user = False + if 'The name ' in error and ' is not a valid username' in error: + user_email = error.split(' is not a valid username')[0].split( + 'The name ')[1].strip() + now = datetime.datetime.utcnow() + + # See if we already know about this user + if user_email in data and data[user_email]['last_update']: + last_update = datetime.datetime.fromtimestamp( + int(data[user_email]['last_update'])) + # Only notify users once per hour + if (now - last_update).seconds >= 3600: + notify_user = True + else: + new_data[user_email] = data[user_email] + elif not data or user_email not in data: + notify_user = True + + # Ensure we notify the user only once, no matter how many errors we + # got concerning them. + if user_email not in seen: + seen.append(user_email) + else: + notify_user = False + + if notify_user: + send_email( + EMAIL_FROM, + [user_email], + subject='Please fix your bugzilla.redhat.com account', + message=PKGDB_SYNC_BUGZILLA_EMAIL, + ccAddress=NOTIFYEMAIL, + ) + + new_data[user_email] = { + 'last_update': time.mktime(now.timetuple()) + } + + with open(DATA_CACHE, 'w') as stream: + json.dump(new_data, stream) + + +@cache.cache_on_arguments() +def _get_override_yaml(project, session): + pagure_override_url = '{0}/{1}/raw/master/f/{2}/{3}'.format( + PAGUREURL.rstrip('/'), BUGZILLA_OVERRIDE_REPO, project['namespace'], + project['name']) + + if VERBOSE: + print('Querying {0}'.format(pagure_override_url)) + override_rv = session.get(pagure_override_url, timeout=30) + if override_rv.status_code == 200: + override_yaml = yaml.load(override_rv.text) + return override_yaml.get('bugzilla_contact', {}) + return {} + + +@cache.cache_on_arguments() +def _get_package_summary_from_mdapi(namespace, repo, session=None): + summary = None + if namespace != 'rpms': + return summary + + if session is None: + session = retry_session() + + url = '{0}/rawhide/srcpkg/{1}'.format(MDAPIURL.rstrip('/'), repo) + if VERBOSE: + print('Querying {0}'.format(url)) + + rv = session.get(url, timeout=60) + if rv.ok: + rv_json = rv.json() + summary = rv_json['summary'] + elif not rv.ok and rv.status_code != 404: + error_msg = ('The connection to "{0}" failed with the status code {1} ' + 'and output "{2}"').format(url, rv.status_code, rv.text) + raise RuntimeError(error_msg) + + return summary + + +def _get_pdc_branches(session, repo): + """ + Gets the branches on a project. This function is used for mapping. + :param repo: the project dict + :return: a list of the repo's branches + """ + branches_url = '{0}component-branches/'.format(PDCURL) + params = dict( + global_component=repo['name'], + type=PDC_TYPES[repo['namespace']] + ) + if VERBOSE: + print('Querying {0} {1}'.format(branches_url, params)) + rv = session.get(branches_url, params=params, timeout=60) + + # If the project's branches can't be reported, just return no branches and + # it will be skipped later on + if not rv.ok: + print(('The connection to "{0}" failed with the status code {1} and ' + 'output "{2}"'.format(branches_url, rv.status_code, rv.text)), + file=sys.stderr) + return [] + + data = rv.json() + return [branch['name'] for branch in data['results']] + + +def _is_retired(product, project): + branches = project['branches'] + if product == 'Fedora EPEL': + for branch, active in branches: + if re.match(r'^epel\d+$', branch): + if active: + return False + # No active branches means it is retired. + return True + else: + for branch, active in branches: + if active: + return False + return True + + +def _to_legacy_schema(product_and_project, session=None): + """ + This function translates the JSON of a Pagure project to what PkgDB used to + output in the Bugzilla API. This function is used for mapping. + :param project_and_product: a tuple containing the dictionary of the JSON + of a Pagure project and a string of the product (e.g. "Fedora", + "Fedora EPEL") + :param session: a requests session object or None + :return: a dictionary of the content that the PkgDB Bugzilla API would + return + """ + product, project = product_and_project + + if session is None: + session = retry_session() + + owner = project['poc'] + watchers = project['watchers'] + + summary = _get_package_summary_from_mdapi( + project['namespace'], project['name'], session) + + # Check if the project is retired in PDC, and if so set assignee to orphan. + if _is_retired(product, project): + owner = 'orphan' + + # Check if the Bugzilla ticket assignee has been manually overridden + override_yaml = _get_override_yaml(project, session) + if override_yaml.get(product) \ + and isinstance(override_yaml[product], str): + owner = override_yaml[product] + + return { + 'cclist': { + # Groups is empty because you can't have groups watch projects. + # This is done only at the user level. + 'groups': [], + 'people': watchers, + }, + 'owner': owner, + # No package has this set in PkgDB's API, so it can be safely turned + # off and set to the defaults later on in the code + 'qacontact': None, + 'summary': summary, + # These two values are not part of original PkgDB RV, but they are + # useful + 'product': product, + 'project': project['name'] + } + + +def main(): + """The entrypoint to the script.""" + global VERBOSE, DRYRUN, projects_dict + + parser = argparse.ArgumentParser( + description='Script syncing information between Pagure and bugzilla' + ) + parser.add_argument( + '--dry-run', dest='dryrun', action='store_true', default=False, + help='Do not actually make the changes') + parser.add_argument( + '--verbose', dest='verbose', action='store_true', default=False, + help='Print actions verbosely') + parser.add_argument( + '--debug', dest='debug', action='store_true', default=False, + help='Combination of --verbose and --dry-run') + + args = parser.parse_args() + + if args.debug: + VERBOSE = True + DRYRUN = True + + if args.verbose: + VERBOSE = True + + if args.dryrun: + DRYRUN = True + + # Non-fatal errors to alert people about + errors = [] + + projects_dict = { + 'Fedora': {}, + 'Fedora Container': {}, + 'Fedora Modules': {}, + 'Fedora EPEL': {}, + } + + session = retry_session() + pool = multiprocessing.pool.ThreadPool(8) + + # Get the initial ownership and CC data from pagure + # This part is easy. + poc_url = PAGURE_DIST_GIT_URL + '/extras/pagure_poc.json' + if VERBOSE: + print("Querying %r for points of contact." % poc_url) + pagure_namespace_to_poc = session.get(poc_url, timeout=120).json() + cc_url = PAGURE_DIST_GIT_URL + '/extras/pagure_bz.json' + if VERBOSE: + print("Querying %r for initial cc list." % cc_url) + pagure_namespace_to_cc = session.get(cc_url, timeout=120).json() + + # Combine and collapse those two into a single list: + pagure_projects = [] + for namespace, entries in pagure_namespace_to_poc.items(): + for name, poc in entries.items(): + pagure_projects.append(dict( + namespace=namespace, + name=name, + poc=poc, + watchers=pagure_namespace_to_cc[namespace][name], + )) + if env == 'staging': + # Filter out any modules, since we don't have rights to create new + # components in the "Fedora Modules" project yet + pagure_projects = [ + p for p in pagure_projects if p['namespace'] != 'modules' + ] + + branches_url = PDCURL.split('rest_api')[0] + 'extras/active_branches.json' + if VERBOSE: + print("Querying %r for EOL information." % branches_url) + pdc_branches = session.get(branches_url, timeout=120).json() + for proj in pagure_projects: + if proj['namespace'] not in PDC_TYPES: + proj['branches'] = [] + if VERBOSE: + print('! Namespace {namespace} unknown to PDC, project ' + '{namespace}/{name} ignored'.format(**proj)) + continue + pdc_type = PDC_TYPES[proj['namespace']] + proj['branches'] = pdc_branches.get(pdc_type, {}).get(proj['name'], []) + if not proj['branches'] and VERBOSE: + print("! No PDC branch found for {namespace}/{name}".format(**proj)) + + # Determine what products each project maps to based on its branches. + # pagure_rpms_project_products will be in the format of + # [('python-requests': 'Fedora')...] which will be used my a mapping + # function below + for project in pagure_projects: + products = set() + for branch, active in project['branches']: + if re.match(r'^epel\d+$', branch): + products.add('Fedora EPEL') + else: + products.add(NAMESPACE_TO_PRODUCT[project['namespace']]) + project['products'] = list(products) + + # Now, we must transform the data we collected into something that PkgDB + # would have returned + p_to_legacy_schema = resilient_partial(_to_legacy_schema, session=session) + items = [ + (product, project) + for project in pagure_projects + for product in project['products'] + ] + legacy_responses = pool.map(p_to_legacy_schema, items) + for response in legacy_responses: + if not response: + continue + projects_dict[response['product']][response['project']] = response + + # Initialize the connection to bugzilla + bugzilla = BugzillaProxy(BZSERVER, BZUSER, BZPASS, projects_dict) + + for product, pkgs in projects_dict.items(): + if product not in PRODUCTS: + continue + for pkgname, pkginfo in sorted(projects_dict[product].items(), + key=lambda x: x[0]): + if VERBOSE: + print("Assessing bugzilla status for %r" % pkgname) + try: + bugzilla.add_edit_component( + pkgname, + product, + pkginfo['owner'], + pkginfo['summary'], + pkginfo['qacontact'], + pkginfo['cclist'] + ) + except ValueError as e: + # A username didn't have a bugzilla address + errors.append(str(e.args)) + except DataChangedError as e: + # A Package or Collection was returned via xmlrpc but wasn't + # present when we tried to change it + errors.append(str(e.args)) + except xmlrpc.client.ProtocolError as e: + # Unrecoverable and likely means that nothing is going to + # succeed. + errors.append(str(e.args)) + break + except xmlrpc.client.Error as e: + # An error occurred in the xmlrpc call. Shouldn't happen but + # we better see what it is + errors.append('%s -- %s' % (pkgname, e.args[-1])) + + # Send notification of errors + if errors: + if VERBOSE: + print('[DEBUG]', '\n'.join(errors)) + else: + notify_users(errors) + send_email( + EMAIL_FROM, + NOTIFYEMAIL, + 'Errors while syncing bugzilla with the PackageDB', + TMPL_EMAIL_ADMIN % ('\n'.join(errors),)) + else: + with open(DATA_CACHE, 'w') as stream: + json.dump({}, stream) + + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..671e32b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +bugzilla +dogpile.cache +python-fedora +PyYAML +requests +urllib3 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..546e4b7 --- /dev/null +++ b/setup.py @@ -0,0 +1,46 @@ +import os.path + +from setuptools import setup + + +HERE = os.path.dirname(__file__) +with open(os.path.join(HERE, 'requirements.txt'), 'r') as f: + INSTALL_REQUIRES = [x.strip() for x in f.readlines()] +with open(os.path.join(HERE, 'test_requirements.txt'), 'r') as f: + TESTS_REQUIRE = [x.strip() for x in f.readlines()] + + +setup( + name='distgit-bugzilla-sync', + version='0.1', + description='script to set default assignee, CC list from component owners', + # Possible options are at https://pypi.python.org/pypi?%3Aaction=list_classifiers + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'Intended Audience :: System Administrators', + 'License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Topic :: Software Development :: Bug Tracking', + ], + license='GPLv2+', + maintainer='Fedora Infrastructure Team', + maintainer_email='infrastructure@lists.fedoraproject.org', + platforms=['Fedora', 'GNU/Linux'], + url='https://pagure.io/Fedora-Infra/distgit-bugzilla-sync', + keywords='fedora', + packages=[], + include_package_data=True, + zip_safe=False, + install_requires=INSTALL_REQUIRES, + tests_require=TESTS_REQUIRE, + entry_points={ + 'console_scripts': [ + 'distgit-bugzilla-sync = pagure_sync_bugzilla:main', + ], + }, +) diff --git a/test_requirements.txt b/test_requirements.txt new file mode 100644 index 0000000..28ecaca --- /dev/null +++ b/test_requirements.txt @@ -0,0 +1,2 @@ +flake8 +pytest