#161 ABRT: Support processing aggregated data from the retrace server
Merged 2 years ago by frantisekz. Opened 2 years ago by frantisekz.

file modified
+4
@@ -10,6 +10,10 @@ 

        command: "pip3 install --user -r requirements.txt"

        args:

          chdir: '{{ zuul.project.src_dir }}'

+     - name: Install oraculum test dependencies

+       command: "pip3 install --user -r requirements-test.txt"

+       args:

+         chdir: '{{ zuul.project.src_dir }}'

      - name: run-pytest

        command: "pytest -vvv"

        args:

@@ -47,6 +47,7 @@ 

  SCHEDULE_URL = "https://fedorapeople.org/groups/schedule/f-%d/f-%d-key.ics"

  PKG_CALENDARS_PATH = "https://pagure.io/package-calendars/raw/main/f/calendars/%s"

  PKG_CALENDARS_REPO_API = "https://pagure.io/api/0/package-calendars/tree/main/f/calendars"

+ RETRACE_SERVER_URL = 'https://retrace.fedoraproject.org/faf/problems/'

  

  EPEL_RELEASES = [7, 8]

  
@@ -83,6 +84,7 @@ 

  #    packager_dashboard_static_user_data

  #    packager_dashboard_package_bugs

  #    packager_dashboard_package_prs

+ #    packager-dashboard_abrt_issues

  #    packages_owners_json

  #    pagure_groups

  #    package_calendars

file modified
+2 -1
@@ -75,6 +75,7 @@ 

      SCHEDULE_URL = 'https://fedorapeople.org/groups/schedule/f-%d/f-%d-key.ics'

      PKG_CALENDARS_PATH = 'https://pagure.io/package-calendars/raw/main/f/calendars/%s'

      PKG_CALENDARS_REPO_API = 'https://pagure.io/api/0/package-calendars/tree/main/f/calendars'

+     RETRACE_SERVER_URL = 'https://retrace.fedoraproject.org/faf/problems/'

  

      EPEL_RELEASES = [7, 8]

  
@@ -110,7 +111,7 @@ 

      #    packager_dashboard_static_user_data

      #    packager_dashboard_package_bugs

      #    packager_dashboard_package_prs

-     #    packager_dashboard_package_versions

+     #    packager-dashboard_abrt_issues

      #    packages_owners_json

      #    pagure_groups

      #    package_versions_generic

file modified
+2 -1
@@ -22,7 +22,7 @@ 

  from flask import url_for, jsonify

  

  from oraculum import app, CACHE

- from oraculum.utils import dashboard_helpers, pagure, koschei, health_check, bodhi, bugzilla, calendars, versions

+ from oraculum.utils import dashboard_helpers, pagure, koschei, health_check, bodhi, bugzilla, calendars, versions, retrace_server

  

  from oraculum.action_providers import ACTION_PROVIDERS

  
@@ -72,6 +72,7 @@ 

      CACHE.register('packager-dashboard_bugs', bugzilla.get_package_bugs)

      CACHE.register('packager-dashboard_bugs_private', bugzilla.get_package_bugs_private)

      CACHE.register('packager-dashboard_pull_requests', pagure.get_package_prs)

+     CACHE.register('packager-dashboard_abrt_issues', retrace_server.fetch_retrace_data)

      CACHE.register('packager-dashboard_package_versions', versions.get_package_versions)

      CACHE.register('packager-dashboard_orphan_page', handle_orphan_user)

  

@@ -23,7 +23,7 @@ 

  from flask_login import current_user

  

  from oraculum import app, CACHE

- from oraculum.utils import bodhi, cache_utils, celery_utils, calendars, dashboard_helpers, health_check, koschei, orphans, pagure, versions

+ from oraculum.utils import bodhi, cache_utils, celery_utils, calendars, dashboard_helpers, health_check, koschei, orphans, pagure, versions, retrace_server

  

  def handle_orphan_user():

      orphans_data = list(CACHE.get('orphans_json')['affected_packages'].keys())
@@ -48,7 +48,7 @@ 

          'static_info': {"data": static_info, 'status': 200, 'last_synced': CACHE.get_refreshed_time('orphans_json').isoformat()},

          'prs': empty,

          'bzs': empty,

-         'package_versions': empty

+         'abrt_reports': empty

          }

  

  @app.route('/api/v1/packager_dashboard_caching')
@@ -78,8 +78,9 @@ 

  

      prs = dashboard_data_prs(package, kind="package")

      bzs = dashboard_data_bzs(package, kind="package", authenticated=is_packager())

+     abrt = dashboard_data_abrt(package, kind="package")

  

-     return jsonify({'static_info': static_info, 'prs': prs, 'bzs': bzs})

+     return jsonify({'static_info': static_info, 'prs': prs, 'bzs': bzs, 'abrt_reports': abrt})

  

  @app.route('/api/v1/packager_dashboard/<user>', methods=['GET'])

  def route_dashboard_user_data(user):
@@ -104,8 +105,9 @@ 

  

      prs = dashboard_data_prs(user, kind="user")

      bzs = dashboard_data_bzs(user, kind="user", authenticated=is_packager())

+     abrt = dashboard_data_abrt(user, kind="user")

  

-     return jsonify({'static_info': static_info, 'prs': prs, 'bzs': bzs})

+     return jsonify({'static_info': static_info, 'prs': prs, 'bzs': bzs, 'abrt_reports': abrt})

  

  

  def dashboard_data_static(item, kind="user"):
@@ -140,6 +142,21 @@ 

          'package_versions': versions.get_packages_versions(packages["combined"])

      }

  

+ def dashboard_data_abrt(item, kind="user"):

+     if kind == "user":

+         data = CACHE.async_get('packager-dashboard_data_static', 'low', item)

+         if data in [cache_utils.RefresherNotRegistered, cache_utils.AsyncRefreshInProgress]:

+             return {"status": 204, "data": None}

+         packages = data["packages"]

+     else:

+         packages = [item]

+ 

+     status = 200

+     data, load_status = CACHE.async_get_batch('packager-dashboard_abrt_issues', packages, 'low')

+     if load_status in [cache_utils.RefresherNotRegistered, cache_utils.AsyncRefreshInProgress]:

+         status = 202

+     return {"status": status, "data": data}

+ 

  def dashboard_data_prs(item, kind="user"):

      if kind == "user":

          data = CACHE.async_get('packager-dashboard_data_static', 'low', item)

@@ -126,6 +126,9 @@ 

      app.logger.debug("Adding periodic task to sync package prs every %s seconds" %

                       SYNC_INTERVALS["packager_dashboard_package_prs"])

      sender.add_periodic_task(SYNC_INTERVALS["packager_dashboard_package_prs"], celery_sync_package_prs.s(), priority=2)

+     app.logger.debug("Adding periodic task to sync package abrt issues every %s seconds" %

+                      SYNC_INTERVALS["packager-dashboard_abrt_issues"])

+     sender.add_periodic_task(SYNC_INTERVALS["packager-dashboard_abrt_issues"], celery_sync_package_abrt_issues.s(), priority=2)

  

  

  @celery_app.task
@@ -210,3 +213,15 @@ 

  

      for package in packages:

          plan_celery_refresh("low", 'packager-dashboard_pull_requests', package)

+ 

+ @celery_app.task

+ def celery_sync_package_abrt_issues():

+     users = get_users_for_sync()

+     packages = set(get_packages_for_sync())

+ 

+     for user in users:

+         data = oraculum.controllers.packager_dashboard.dashboard_data_static(user)

+         packages.update(data["packages"])

+ 

+     for package in packages:

+         plan_celery_refresh("low", 'packager-dashboard_abrt_issues', package)

@@ -0,0 +1,88 @@ 

+ #

+ # retrace_server.py - Retrace server data parsers and utilities for packager dashboard

+ #

+ # Copyright 2021, Red Hat, Inc

+ #

+ # This program is free software; you can redistribute it and/or modify

+ # it under the terms of the GNU General Public License as published by

+ # the Free Software Foundation; either version 2 of the License, or

+ # (at your option) any later version.

+ #

+ # This program is distributed in the hope that it will be useful,

+ # but WITHOUT ANY WARRANTY; without even the implied warranty of

+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

+ # GNU General Public License for more details.

+ #

+ # You should have received a copy of the GNU General Public License along

+ # with this program; if not, write to the Free Software Foundation, Inc.,

+ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

+ #

+ # Authors:

+ #   Frantisek Zatloukal <fzatlouk@redhat.com>

+ 

+ from collections import defaultdict

+ 

+ import datetime

+ import statistics

+ 

+ from oraculum import app

+ from oraculum.utils.dashboard_helpers import get_json

+ 

+ def fetch_retrace_data(package):

+     """

+     Returns {

+         "retrace_link": "https://retrace.fedoraproject.org/faf/problems/?component_names=my_package&daterange=today-30:today"

+         "problems_present": True/False, # Based on existence of any problems

+         "outstanding_problems": [{ # List of problems with high (outlying) counts of occurences

+             "id": 1111,

+             "crash_function": "my_crashing_function",

+             "count": 999,

+             "url": "https://retrace.fedoraproject.org/faf/problems/1111"

+         }, ...]

+     }

+     """

+     now = datetime.date.today().isoformat()

+     past = (datetime.date.today() - datetime.timedelta(days=30)).isoformat()

+ 

+     data = {

+         "retrace_link": app.config["RETRACE_SERVER_URL"] + "?component_names=%s&daterange=%s:%s" % (package, past, now),

+         "problems_present": False,

+         "outstanding_problems": []

+     }

+     retrace_server_data = get_json(data["retrace_link"],

+                                    headers={"Accept": "application/json", "Content-type": "application/json"})

+     if len(retrace_server_data["problems"]) == 0:

+         return data

+ 

+     # We have at least 1 problem, there may or may not be any outstanding issues

+     data["problems_present"] = True

+ 

+     # To count occurences of a problem (by counting all the sub-issues together)

+     occurences = defaultdict(lambda: 0)

+ 

+     for problem in retrace_server_data["problems"]:

+         for report in problem["reports"]:

+             occurences[problem["id"]] += report["count"]

+ 

+     # Count the threshold for occurences outliers (same algo as a box-plot uses)

+     # We don't want to display/output every problem from the retrace server, just the extreme ones to filter out the noise

+     if len(occurences) >= 2:

+         occurences_quartiles = statistics.quantiles(occurences.values(), n=4)

+         occurences_iqr = occurences_quartiles[2] - occurences_quartiles[0]

+         occurences_outliers_threshold = occurences_quartiles[2] + 1.5 * occurences_iqr

+     else:

+         # If we have 1 problem we can't count quantiles, so let's output that problem by setting the threshold to 0

+         occurences_outliers_threshold = 0

+ 

+     for problem in retrace_server_data["problems"]:

+         cid = problem["id"]

+         if occurences[cid] > occurences_outliers_threshold:

+             data["outstanding_problems"].append({

+                                                  "id": cid,

+                                                  "crash_function": problem["crash_function"],

+                                                  "count": occurences[cid],

+                                                  "url": app.config["RETRACE_SERVER_URL"] + str(cid)

+                                                  })

+ 

+     return data

+ 

@@ -0,0 +1,2 @@ 

+ pytest

+ pytest-freezegun

file modified
+1
@@ -67,5 +67,6 @@ 

        ],

        test_require=[

            'pytest',

+           'pytest-freezegun',

        ],

        )

@@ -0,0 +1,118 @@ 

+ import pytest

+ 

+ from unittest import mock

+ 

+ from oraculum.utils import retrace_server

+ 

+ 

+ class TestDashboardRetrace(object):

+ 

+ 

+     stub_basic_retrace_data = {

+         "problems": [

+             {

+                 "crash_function": "increment",

+                 "id": 1111,

+                 "reports": [{"count": 2}]

+             },

+             {

+                 "crash_function": "increment",

+                 "id": 2222,

+                 "reports": [{"count": 2}, {"count": 1}]

+             },

+             {

+                 "crash_function": "increment",

+                 "id": 3333,

+                 "reports": [{"count": 1}, {"count": 3}]

+             },

+             {

+                 "crash_function": "increment",

+                 "id": 4444,

+                 "reports": [{"count": 3}, {"count": 1}]

+             },

+             {

+                 "crash_function": "increment",

+                 "id": 5555,

+                 "reports": [{"count": 5}]

+             },

+             {

+                 "crash_function": "increment",

+                 "id": 6666,

+                 "reports": [{"count": 6}]

+             },

+             {

+                 "crash_function": "increment",

+                 "id": 7777,

+                 "reports": [{"count": 5}, {"count": 5}, {"count": 5}]

+             }

+         ]

+     }

+ 

+     stub_zero_problems = {"problems": []}

+ 

+     stub_one_problem = {

+         "problems": [

+             {

+                 "crash_function": "increment",

+                 "id": 7777,

+                 "reports": [{"count": 15}]

+             }

+         ]

+     }

+ 

+     stub_no_outstanding = {

+         "problems": [

+             {

+                 "crash_function": "increment",

+                 "id": 1111,

+                 "reports": [{"count": 14}, {"count": 4}]

+             },

+             {

+                 "crash_function": "_decorator",

+                 "id": 2222,

+                 "reports": [{"count": 4}, {"count": 5}]

+             },

+             {

+                 "crash_function": "__init__",

+                 "id": 3333,

+                 "reports": [{"count": 2}]

+             }

+         ]

+     }

+ 

+     @pytest.mark.freeze_time('2100-02-01')

+     def test_fetch_retrace_data(self, monkeypatch):

+ 

+         retrace_link = 'https://retrace.fedoraproject.org/faf/problems/?component_names=my_package&daterange=2100-01-02:2100-02-01'

+         expected_result_basic = {

+             'outstanding_problems': [

+                 {

+                     'count': 15,

+                     'crash_function': 'increment',

+                                     'id': 7777,

+                                     'url': 'https://retrace.fedoraproject.org/faf/problems/7777'

+                 }

+             ],

+             'problems_present': True,

+             'retrace_link': retrace_link

+         }

+ 

+         expected_result_zero = {'outstanding_problems': [], 'problems_present': False, 'retrace_link': retrace_link}

+         expected_result_zero_outstanding = {'outstanding_problems': [], 'problems_present': True, 'retrace_link': retrace_link}

+ 

+ 

+         # Basic test

+         monkeypatch.setattr(retrace_server, 'get_json', mock.MagicMock(return_value=self.stub_basic_retrace_data))

+         assert retrace_server.fetch_retrace_data("my_package") == expected_result_basic

+ 

+         # Only one problem on retrace server

+         monkeypatch.setattr(retrace_server, 'get_json', mock.MagicMock(return_value=self.stub_one_problem))

+         assert retrace_server.fetch_retrace_data("my_package") == expected_result_basic

+ 

+         # Zero problems

+         monkeypatch.setattr(retrace_server, 'get_json', mock.MagicMock(return_value=self.stub_zero_problems))

+         assert retrace_server.fetch_retrace_data("my_package") == expected_result_zero

+ 

+         # Some problems on the retrace server, zero outstanding

+         monkeypatch.setattr(retrace_server, 'get_json', mock.MagicMock(return_value=self.stub_no_outstanding))

+         assert retrace_server.fetch_retrace_data("my_package") == expected_result_zero_outstanding