#23 Open a ticket when a test run fails
Merged 3 years ago by nphilipp. Opened 3 years ago by pingou.
fedora-ci/ pingou/monitor-gating report_issue  into  master

file modified
+21 -2
@@ -1,11 +1,11 @@ 

  # Time between two runs in second

+ # 3600 = 1h

  delay = 3600

  

  # Time between two blocked runs in second

+ # 43200 = 12h

  delay_when_failing = 43200

  

- # blocker issue tags, issue has to have all of them

- blocker_tags = ['packager_workflow_blocker', 'staging']

  

  # CLI arguments to give to the script testing the single build gating workflow

  workflow_single_gating_args = "--conf monitor_gating_stg.cfg --auto-update --no-pr"
@@ -20,3 +20,22 @@ 

  # kb_keytab_file = "/etc/keytabs/monitor-gating-keytab"

  

  fedpkg = "fedpkg"

+ 

+ # The configuration key below are used when interacting with pagure projects

+ # There are two ways monitor-gating interacts with them.

+ # a) it monitors a specific project to slows down its run in case a known issue

+ #    prevents the workflow from working (so as to now increase the load on a

+ #    known broken system).

+ # b) it reports to a specific project (but not necessarily the same) when a

+ #    run failed to run properly end to end.

+ 

+ # Project whose issue will slow down the subsequent runs (delay defined

+ # above).

+ pagure_blocking_project = "fedora-infrastructure"

+ # blocker issue tags, issue has to have all of them.

+ blocker_tags = ['packager_workflow_blocker', 'staging']

+ 

+ # Project against which failed runs report their failure.

+ pagure_report_project = "fedora-infra/packaging_workflow_health"

+ pagure_api_token = "<to edit>"

+ env = "prod"

file modified
+25 -3
@@ -68,8 +68,10 @@ 

          run_command(cmd)

  

      delay = conf["delay"]

-     delay_when_failing = conf["delay_when_failing"]

-     blocker_tags = conf["blocker_tags"]

+     report_project = conf["pagure_report_project"]

+     report_api_token = conf["pagure_api_token"]

+     report_env = conf["env"]

+ 

      print("Tests started:", datetime.datetime.utcnow(), flush=True)

      runid = f"{datetime.datetime.utcnow().year}-{uuid.uuid4()}"

      try:
@@ -85,6 +87,14 @@ 

              result = "succeeded"

          else:

              result = "failed"

+             report_failure(

+                 report_project,

+                 report_api_token,

+                 report_env,

+                 "single-package",

+                 monit_utils,

+             )

+ 

          notify(

              topic=f"single-build.end.{result}",

              message={
@@ -115,6 +125,14 @@ 

              result = "succeeded"

          else:

              result = "failed"

+             report_failure(

+                 report_project,

+                 report_api_token,

+                 report_env,

+                 "multi-package",

+                 monit_utils,

+             )

+ 

          notify(

              topic=f"multi-build.end.{result}",

              message={
@@ -135,7 +153,11 @@ 

              topic=f"multi-build.end.error", message={"runid": runid, "exception": err},

          )

  

-     blocking_issues_list = blocking_issues(blocker_tags)

+     delay_when_failing = conf["delay_when_failing"]

+     blocker_tags = conf["blocker_tags"]

+     blocking_project = conf["pagure_blocking_project"]

+ 

+     blocking_issues_list = blocking_issues(blocking_project, blocker_tags)

      now = datetime.datetime.utcnow().strftime("%H:%M:%S")

      if blocking_issues_list:

          print(

file modified
+37 -7
@@ -17,25 +17,55 @@ 

  _log = logging.getLogger(__name__)

  

  

- def blocking_issues(tags):

+ def report_failure(project, token, env, workflow, monit_utils):

+     """ Open a pagure ticket against the instance specified in the

+     configuration file when something does not work.

+     """

+     url = f"https://pagure.io/api/0/{project}/new_issue"

+     title = f"Failure in {env} of the {workflow} packager workflow"

+     logs = "\n".join(monit_utils.logs)

+     content = f"""A run of monitor-gating has just failed in {env} for the {workflow} workflow.

+ 

+ The suspects are '{", ".join(monit_utils.failed)}'.

+ 

+ Full log:

+ ````

+ {logs}

+ ````

+ """

+     tag = env

+ 

+     data = {

+         "title": title,

+         "content": content,

+         "tag": tag,

+     }

+     headers = {

+         "Authorization": f"token {token}",

+     }

+ 

+     req = requests.post(url, data=data, headers=headers)

+     if not req.ok:

+         print(f"Error when trying to open a ticket at: {url} to report the failure")

+ 

+ 

+ def blocking_issues(project, tags):

      """Lists blocking issues we track in the fedora-infrastructure project.

      """

      if not tags:

          print(f"No tags to filter blocking issues by, returning empty.")

          return []

-     api = f"https://pagure.io/api/0/fedora-infrastructure/issues"

-     q = f"?status=Open&tags={tags[0]}"

+ 

+     api = f"https://pagure.io/api/0/{project}/issues?status=Open&tags={tags[0]}"

      issues = []

      try:

-         r = requests.get(api + q)

+         r = requests.get(api)

          issues = r.json()["issues"]

          if tags:

              t = set(tags[1:])

              issues = [i for i in issues if t & set(i["tags"])]

          for i in issues:

-             print(

-                 f"Found blocking issue https://pagure.io/fedora-infrastructure/issue/{i['id']}"

-             )

+             print(f"Found blocking issue https://pagure.io/{project}/issue/{i['id']}")

      except Exception as e:

          print(f"Error when querying pagure for blocking issues: {e}")

      return issues

no initial comment

This PR seats on the top of https://pagure.io/fedora-ci/monitor-gating/pull-request/22 which should be reviewed and merged first to make the diff here smaller.

rebased onto 4a797f5

3 years ago

Minor nit: needs spaces around the equal sign.

Metadata Update from @nphilipp:
- Request assigned

3 years ago

Except the formatting thing, looks good to me (I'm assuming you've tested this, as we don't have tests :wink:).

3 new commits added

  • Expand the documentation for those of us that don't measure time in seconds
  • Add support for opening a ticket on a specific project when a test fails
  • Make configurable the project which slows down the subsequent runs
3 years ago

Pull-Request has been merged by nphilipp

3 years ago