#1957 Remove pdc calls from owner-sync-pagure.j2 script
Merged 5 months ago by zlopez. Opened 6 months ago by lenkaseg.
fedora-infra/ lenkaseg/ansible osp  into  main

@@ -22,6 +22,7 @@ 

  import multiprocessing.pool

  from math import ceil

  from functools import partial

+ import re

  

  import requests

  import koji
@@ -50,6 +51,7 @@ 

  HOSTNAME = 'bodhi{{ env_suffix }}.fedoraproject.org'

  IPA_REALM = '{{ ipa_realm }}'

  ENV_SUFFIX = '{{ env_suffix }}'

+ BODHI_URL = 'https://bodhi.fedoraproject.org/'

  if STAGING:

      PAGURE_URL = 'https://src.stg.fedoraproject.org/'

      PDC_URL = 'https://pdc.stg.fedoraproject.org/rest_api/v1/'
@@ -186,8 +188,50 @@ 

      return branch, arches

  

  

- def get_pdc_project_name_and_branch(session, namespace, project_name,

-                                     verbose=False):

+ def get_active_releases_from_bodhi():

+     bodhi_url = '{0}releases/?exclude_archived=True'.format(BODHI_URL)

+ 

+     rv = requests.get(bodhi_url, timeout=60)

+ 

+     if rv.ok:

+         active_releases = []

+         rv_json = rv.json()

+         if rv_json['releases']:

+             for release in rv_json['releases']:

+                 if re.match(r'^(f|epel)\d{1,2}$', release['branch']):

+                     active_releases.append(release['branch'])

+         return list(set(active_releases))

+     return []

+ 

+ def get_project_branches(session, namespace, project_name):

+     """

+     Returns list of branches for the repo from Pagure dist-git.

+     :param logger: A logger object

+     :param url: a string of the URL to Pagure

+     :param namespace: a string determines a type of the repository

+     :param repo_name: a string of the repository name

+     :return: a list of branches

+     """

+     get_branches_url = '{0}api/0/{1}/{2}/git/branches'.format(PAGURE_URL, namespace, project_name)

+ 

+     headers = {

+         'Accept': 'application/json',

+         'Content-Type': 'application/json'

+     }

+     rv = requests.get(

+             get_branches_url, headers=headers, timeout=60)

+     rv_json = rv.json()

+ 

+     if rv.ok:

+         return rv_json.get("branches", ())

+ 

+     # When specific namespace has no branches, API returns error "Project not found".

+     # Do not fail. Return "no branches found" instead.

+     return project_name, []

+ 

+ 

+ def get_project_name_and_its_active_branches(session, namespace, active_releases,

+                                              lookaside, project_name, verbose=False):

      """

      Gets the branches on a project. This function is used for mapping.

      :param namespace: string of the namespace the project is in
@@ -196,24 +240,18 @@ 

      :return: a tuple containing the string of the project and a list of

      branches

      """

-     project_branches_url = '{0}component-branches/'.format(PDC_URL)

-     params = dict(

-         global_component=project_name,

-         type=PDC_TYPES[namespace],

-         active=True,

-     )

      if verbose:

-         print('- Querying {0} {1}'.format(project_branches_url, params))

-     project_branches_rv = session.get(

-         project_branches_url, params=params, verify=VERIFY, timeout=60)

+         print('- Querying pagure distgit for package branches')

+     project_branches = get_project_branches(session, namespace, project_name)

  

-     # If the project's branches can't be reported, just return no branches and

-     # it will be skipped later on

-     if not project_branches_rv.ok:

-         return project_name, []

+     active_package_branches = list(set(active_releases) & set(project_branches)) + ['rawhide']

  

-     data = project_branches_rv.json()

-     return project_name, [branch['name'] for branch in data['results']]

+     # Check if a package is not retired on any of the branches

+     for branch in active_package_branches:

+         if project_name in lookaside[branch]:

+             active_package_branches.remove(branch)

+ 

+     return project_name, active_package_branches

  

  

  def get_pagure_project_names_from_page(session, namespace, page,
@@ -233,16 +271,18 @@ 

      if verbose:

          print('- Querying {0}'.format(url))

      response = session.get(url, verify=VERIFY, timeout=120)

-     if not bool(response):

-         print("Failed to talk to %r %r." % (

-             response.request.url, response), file=sys.stderr)

-         return set()

  

-     names = set()

-     for project in response.json()['projects']:

-         names.add(project['name'])

+     if bool(response):

+         names = set()

+         for project in response.json()['projects']:

+             names.add(project['name'])

  

-     return names

+         return names

+ 

+     else:

+         print("Failed to talk to %r %r." % (

+         response.request.url, response), file=sys.stderr)

+         return set()

  

  

  def get_pagure_project_branches(namespace, package=None, verbose=False):
@@ -288,7 +328,7 @@ 

      # Since we are going to multi-thread, we need to make a partial function

      # call so that all the function needs is an iterable to run

      partial_get_pdc_project_name_and_branch = partial(

-         get_pdc_project_name_and_branch, session, namespace,

+         get_project_name_and_its_active_branches, session, namespace, active_releases, lookaside,

          verbose=verbose)

      # Get a list of tuples in the form of (project, [branch...]), then convert

      # that to a dictionary
@@ -433,6 +473,22 @@ 

          }

          unique_namespaces.update(namespaces)

  

+     # Let's start with getting the active releases from bodhi

+     active_releases = get_active_releases_from_bodhi()

+ 

+     # Let's fetch the json files with retired packages per release from lookaside cache

+     # This is a bit ugly, but the idea is to have the latest release removed in favor of rawhide

+     rawhide_active_releases = active_releases[:]

+     rawhide_active_releases.remove(max(rawhide_active_releases))

+     rawhide_active_releases + ['rawhide']

+     # Let's store the json files with retired packages in lookaside

+     lookaside = {}

+     for branch in rawhide_active_releases:

+         url = "https://src.fedoraproject.org/lookaside/retired_in_{0}.json".format(branch)

+         rv = requests.get(url)  # change to session

+         lookaside[branch] = rv.json()

+ 

+ 

      # Get all the project to branch mappings for every namespace

      namespace_to_projects = {}

      for namespace in unique_namespaces:
@@ -440,7 +496,7 @@ 

              print('Querying for all the projects with the namespace "{0}"'

                    .format(namespace))

          namespace_to_projects[namespace] = \

-             get_pagure_project_branches(namespace, package=package, verbose=verbose)

+             get_pagure_project_branches(namespace, active_releases, lookaside, package=package, verbose=verbose)

  

      for tag, info in list(tag_info.items()):

          if verbose:

As part of PDC decommission initiative, pdc calls need to be removed from this script.

Signed-off-by: Lenka Segura lsegura@redhat.com

Example output from the original function get_pdc_project_name_and_branch https://pagure.io/fedora-infra/ansible/blob/main/f/roles/bodhi2/backend/templates/owner-sync-pagure.j2#_189

For package 0ad:

('0ad', ['epel7', 'f38', 'f39', 'f40', 'rawhide'])

For package 0install:

('0install', ['epel7'])

Example output of the proposed function get_project_name_and_its_active_branches:
For package 0ad:

('0ad', ['f39', 'f38', 'f40', 'epel7', 'rawhide'])

For package 0install:

('0install', ['epel7'])

What does it do now to avoid pdc:
- gets active releases from bodhi
- gets project branches
- creates intersection of these two lists - project branches that are on active releases
- checks if the package is not retired on one of these branches (marked by presence in the json file retired_in_<release>
- outputs package name and a list of it's active branches

Since it's using multithreading I moved fetching of the active releases from bodhi and the json files with retired packages from lookaside out of the function.

rebased onto a58c9f2e1bc23153a447799bedaa534cce31774e

6 months ago

This looks reasonable to me. Is it ready to go?

Any other folks want to review? @zlopez or @abompard ? ;)

I wrote it in quite a raw way, review greatly appreciated!

ok. Lets push this out... but perhaps you could do it tomorrow when you are around in case there's any issues that come up?

Or if you want I can...

Yeah, it is not tested on the machine, I tested only the function locally, so anything could happen.

Indeed. Lets merge early next week... Possibly @abompard or @zlopez could merge/push it since their timezones are nearer yours, otherwise I can do so early my morning sometime?

rebased onto 7fde661

5 months ago

rebased onto 7fde661

5 months ago

Pull-Request has been merged by zlopez

5 months ago
Metadata