From 0b2b8eab51d76404cffb1ffb3448638d53609e96 Mon Sep 17 00:00:00 2001 From: Ralph Bean Date: May 05 2017 20:27:23 +0000 Subject: Add filtering to the config for scraping. This allows us to sync only special kinds of tickets from the bodhi queue to the FACTORY project. Additionally, I added pagination to our github scraping here which previously only grabbed the latest 100 tickets. Fixes #16. --- diff --git a/ansible/files/sync2jira.py b/ansible/files/sync2jira.py index 3c72f68..45f32df 100644 --- a/ansible/files/sync2jira.py +++ b/ansible/files/sync2jira.py @@ -26,6 +26,12 @@ config = { 'legacy_matching': True, #'jira': { # See /etc/fedmsg.d/sync2jira-credentials.py }, + 'filters': { + 'github': { + # Only sync multi-type tickets from bodhi. + 'fedora-infra/bodhi': { 'state': 'open', 'milestone': 4, }, + }, + }, 'map': { 'pagure': { 'pungi': { 'project': 'COMPOSE', 'component': 'Pungi', }, @@ -52,6 +58,8 @@ config = { 'fedora-modularity/product-definition-center': { 'project': 'FACTORY', 'component': None, }, 'fedora-modularity/BPO': { 'project': 'FACTORY', 'component': None, }, + 'fedora-infra/bodhi': { 'project': 'FACTORY', 'component': None, }, + # Handy utils #'ralphbean/finishline': { 'project': 'FACTORY', 'component': None, }, diff --git a/fedmsg.d/sync2jira.py b/fedmsg.d/sync2jira.py index 53feba4..e9128f9 100644 --- a/fedmsg.d/sync2jira.py +++ b/fedmsg.d/sync2jira.py @@ -35,12 +35,19 @@ config = { #'koji': { 'project': 'BREW', 'component': None, }, }, 'github': { + 'fedora-infra/bodhi': { 'project': 'FACTORY', 'component': None, }, #'product-definition-center/product-definition-center': { # 'project': 'PDC', 'component': 'General', }, #'product-definition-center/pdc-client': { # 'project': 'PDC', 'component': 'General', }, }, }, + 'filters': { + 'github': { + # Only sync multi-type tickets from bodhi. + 'fedora-infra/bodhi': { 'state': 'open', 'milestone': 4, }, + }, + }, }, 'logging': dict( version=1, diff --git a/sync2jira/upstream.py b/sync2jira/upstream.py index 9c29e0a..559f8e5 100644 --- a/sync2jira/upstream.py +++ b/sync2jira/upstream.py @@ -17,9 +17,13 @@ # # Authors: Ralph Bean - import logging +try: + from urllib.parse import urlencode # py3 +except ImportError: + from urllib import urlencode # py2 + import requests import sync2jira.intermediary as i @@ -33,18 +37,44 @@ def handle_github_message(msg, config): repo = msg['msg']['repository']['name'] upstream = '{owner}/{repo}'.format(owner=owner, repo=repo) mapped_repos = config['sync2jira']['map']['github'] + if upstream not in mapped_repos: log.info("%r not in github map: %r" % (upstream, mapped_repos.keys())) return None + + filter = config['sync2jira']\ + .get('filters', {})\ + .get('github', {})\ + .get(upstream, {'state': 'open'}) + + for key, expected in filter.items(): + actual = msg['msg']['issue'].get(key) + if actual != expected: + log.info("Actual %r %r != expected %r" % (key, actual, expected)) + return None + return i.Issue.from_github(upstream, msg['msg']['issue'], config) def handle_pagure_message(msg, config): upstream = msg['msg']['project']['name'] mapped_repos = config['sync2jira']['map']['pagure'] + if upstream not in mapped_repos: log.info("%r not in pagure map: %r" % (upstream, mapped_repos.keys())) return None + + filter = config['sync2jira']\ + .get('filters', {})\ + .get('pagure', {})\ + .get(upstream, {'status': 'Open'}) + + for key, expected in filter.items(): + actual = msg['msg']['issue'].get(key) + if actual != expected: + log.info("Actual %r %r != expected %r" % (key, actual, expected)) + return None + return i.Issue.from_pagure(upstream, msg['msg']['issue'], config) @@ -52,7 +82,10 @@ def pagure_issues(upstream, config): base = config['sync2jira'].get('pagure_url', 'https://pagure.io') url = base + '/api/0/' + upstream + '/issues' - params = dict(status='Open') + params = config['sync2jira']\ + .get('filters', {})\ + .get('pagure', {})\ + .get(upstream, {'status': 'Open'}) response = requests.get(url, params=params) data = response.json()['issues'] @@ -62,22 +95,49 @@ def pagure_issues(upstream, config): def github_issues(upstream, config): - url = 'https://api.github.com/repos/%s/issues' % upstream - - headers = {} token = config['sync2jira'].get('github_token') if not token: + headers = {} log.warning('No github_token found. We will be rate-limited...') else: - headers['Authorization'] = 'token ' + token + headers = {'Authorization': 'token ' + token} - params = dict(per_page=100, state='open') + filter = config['sync2jira']\ + .get('filters', {})\ + .get('github', {})\ + .get(upstream, {'state': 'open'}) + url = 'https://api.github.com/repos/%s/issues' % upstream + url += '?' + urlencode(filter) - response = requests.get(url, params=params, headers=headers) - data = response.json() - issues = ( - i.Issue.from_github(upstream, issue, config) for issue in data + issues = _get_all_github_issues(url, headers) + issues = list(( + i.Issue.from_github(upstream, issue, config) for issue in issues if not 'pull_request' in issue # We don't want to copy these around - ) + )) for issue in issues: yield issue + + +def _get_all_github_issues(url, headers): + """ Pagination utility. Obnoxious. """ + link = dict(next=url) + while 'next' in link: + response = requests.get(link['next'], headers=headers) + for issue in response.json(): + yield issue + link = _github_link_field_to_dict(response.headers.get('link', None)) + + +def _github_link_field_to_dict(field): + """ Utility for ripping apart github's Link header field. + It's kind of ugly. + """ + + if not field: + return dict() + return dict([ + ( + part.split('; ')[1][5:-1], + part.split('; ')[0][1:-1], + ) for part in field.split(', ') + ]) diff --git a/tests/test_upstream.py b/tests/test_upstream.py index b6ac497..2be4811 100644 --- a/tests/test_upstream.py +++ b/tests/test_upstream.py @@ -5,24 +5,26 @@ import sync2jira.upstream as u class TestUpstream(unittest.TestCase): - config = { - 'sync2jira': { - 'map': { - 'github': { - 'org/repo': {}, + def setUp(self): + self.config = { + 'sync2jira': { + 'map': { + 'github': { + 'org/repo': {}, + }, + 'pagure': { + 'some_repo': {}, + }, }, - 'pagure': { - 'some_repo': {}, + 'jira': { + # Nothing, really.. }, }, - 'jira': { - # Nothing, really.. - }, - }, - } + } @mock.patch('sync2jira.upstream.i.Issue.from_github') def test_handle_github_message(self, from_github): + issue_dict = {'state': 'open'} message = { 'msg': { 'repository': { @@ -31,15 +33,16 @@ class TestUpstream(unittest.TestCase): 'login': 'org', }, }, - 'issue': 'some_issue_dict', + 'issue': issue_dict, }, } u.handle_github_message(message, self.config) from_github.assert_called_once_with( - 'org/repo', 'some_issue_dict', self.config) + 'org/repo', issue_dict, self.config) @mock.patch('sync2jira.upstream.i.Issue.from_pagure') def test_handle_pagure_message(self, from_pagure): + issue_dict = {'status': 'Open'} message = { 'msg': { 'project': { @@ -48,12 +51,122 @@ class TestUpstream(unittest.TestCase): 'login': 'org', }, }, - 'issue': 'some_issue_dict', + 'issue': issue_dict, }, } u.handle_pagure_message(message, self.config) from_pagure.assert_called_once_with( - 'some_repo', 'some_issue_dict', self.config) + 'some_repo', issue_dict, self.config) + + @mock.patch('sync2jira.upstream.i.Issue.from_github') + def test_handle_github_filter_positive(self, from_github): + self.config['sync2jira']['filters'] = { + 'github': { + 'org/repo': { + 'some_value': 'present', + }, + }, + } + issue_dict = { + 'state': 'open', + 'some_value': 'present', + } + message = { + 'msg': { + 'repository': { + 'name': 'repo', + 'owner': { + 'login': 'org', + }, + }, + 'issue': issue_dict, + }, + } + u.handle_github_message(message, self.config) + from_github.assert_called_once_with( + 'org/repo', issue_dict, self.config) + + @mock.patch('sync2jira.upstream.i.Issue.from_pagure') + def test_handle_pagure_filter_positive(self, from_pagure): + self.config['sync2jira']['filters'] = { + 'pagure': { + 'some_repo': { + 'some_value': 'present', + }, + }, + } + issue_dict = { + 'status': 'Open', + 'some_value': 'present', + } + message = { + 'msg': { + 'project': { + 'name': 'some_repo', + 'owner': { + 'login': 'org', + }, + }, + 'issue': issue_dict, + }, + } + u.handle_pagure_message(message, self.config) + from_pagure.assert_called_once_with( + 'some_repo', issue_dict, self.config) + + @mock.patch('sync2jira.upstream.i.Issue.from_github') + def test_handle_github_filter_negative(self, from_github): + self.config['sync2jira']['filters'] = { + 'github': { + 'org/repo': { + 'some_value': 'present', + }, + }, + } + issue_dict = { + 'state': 'open', + 'some_value': 'absent', + } + message = { + 'msg': { + 'repository': { + 'name': 'repo', + 'owner': { + 'login': 'org', + }, + }, + 'issue': issue_dict, + }, + } + u.handle_github_message(message, self.config) + assert not from_github.called, 'should not have been called' + + @mock.patch('sync2jira.upstream.i.Issue.from_pagure') + def test_handle_pagure_filter_negative(self, from_pagure): + self.config['sync2jira']['filters'] = { + 'pagure': { + 'some_repo': { + 'some_value': 'present', + }, + }, + } + issue_dict = { + 'status': 'Open', + 'some_value': 'absent', + } + message = { + 'msg': { + 'project': { + 'name': 'some_repo', + 'owner': { + 'login': 'org', + }, + }, + 'issue': issue_dict, + }, + } + u.handle_pagure_message(message, self.config) + assert not from_pagure.called, 'should not have been called' @mock.patch('sync2jira.upstream.requests') @mock.patch('sync2jira.upstream.i.Issue.from_pagure') @@ -89,9 +202,62 @@ class TestUpstream(unittest.TestCase): list(generator) requests.get.assert_called_once_with( - 'https://api.github.com/repos/some_repo/issues', + 'https://api.github.com/repos/some_repo/issues?state=open', + headers={}, + ) + from_github.assert_called_once_with( + 'some_repo', 'some_issue_dict', self.config) + + @mock.patch('sync2jira.upstream.requests') + @mock.patch('sync2jira.upstream.i.Issue.from_pagure') + def test_get_filtered_pagure_issues(self, from_pagure, requests): + self.config['sync2jira']['filters'] = { + 'pagure': { + 'some_repo': { + 'some_value': 'present', + }, + }, + } + response = mock.MagicMock() + response.json.return_value = { + 'issues': ['some_issue_dict'], + } + requests.get.return_value = response + + generator = u.pagure_issues('some_repo', self.config) + # Step through that... + list(generator) + + requests.get.assert_called_once_with( + 'https://pagure.io/api/0/some_repo/issues', + params={'some_value': 'present'}, + ) + from_pagure.assert_called_once_with( + 'some_repo', 'some_issue_dict', self.config) + + @mock.patch('sync2jira.upstream.requests') + @mock.patch('sync2jira.upstream.i.Issue.from_github') + def test_get_filtered_github_issues(self, from_github, requests): + self.config['sync2jira']['filters'] = { + 'github': { + 'some_repo': { + 'some_value': 'present', + }, + }, + } + response = mock.MagicMock() + response.json.return_value = [ + 'some_issue_dict', + ] + requests.get.return_value = response + + generator = u.github_issues('some_repo', self.config) + # Step through that... + list(generator) + + requests.get.assert_called_once_with( + 'https://api.github.com/repos/some_repo/issues?some_value=present', headers={}, - params={'per_page': 100, 'state': 'open'}, ) from_github.assert_called_once_with( 'some_repo', 'some_issue_dict', self.config)