#29 Allow limiting handlers to handle some build targets on some events
Merged 8 years ago by qwan. Opened 8 years ago by qwan.

file modified
+43
@@ -77,6 +77,49 @@ 

  

      SSL_ENABLED = False

  

+     # whitelist and blacklist for handlers to decide whether an artifact

+     # can be built on some events.

+     #

+     # In format of:

+     #

+     # { <handler_name> :

+     #     { <event_name> :

+     #         { <artifact_type>: <list_of_name_branch_dict> }

+     #     }

+     # }

+     #

+     # Here is an example of allowing MBS handler to build any module on

+     # "RPMSpecUpdated" event that module name matches 'base-.*' but not:

+     #   1. module name matches 'base-test-module'

+     # or:

+     #   2. module from branch 'rawhide'

+     #

+     # HANDLER_BUILD_WHITELIST = {

+     #     "MBS": {

+     #         "RPMSpecUpdated": {

+     #             "module": [

+     #                 {

+     #                     'name': 'base-.*',

+     #                 },

+     #             ],

+     #         },

+     #     },

+     # }

+     # HANDLER_BUILD_BLACKLIST = {

+     #     "MBS": {

+     #         "RPMSpecUpdated": {

+     #             "module": [

+     #                 {

+     #                     'name': 'base-test-module',

+     #                 },

+     #                 {

+     #                     'branch': 'rawhide',

+     #                 },

+     #             ],

+     #         },

+     #     },

+     # }

+ 

  

  class DevConfiguration(BaseConfiguration):

      DEBUG = True

file modified
+11 -1
@@ -176,6 +176,16 @@ 

              'type': str,

              'default': '',

              'desc': 'Build owner.'},

+         'handler_build_whitelist': {

+             'type': dict,

+             'default': {},

+             'desc': 'Whitelist for build targets of handlers',

+         },

+         'handler_build_blacklist': {

+             'type': dict,

+             'default': {},

+             'desc': 'Blacklist for build targets of handlers',

+         },

      }

  

      def __init__(self, conf_section_obj):
@@ -222,7 +232,7 @@ 

          if key in self._defaults:

              # type conversion for configuration item

              convert = self._defaults[key]['type']

-             if convert in [bool, int, list, str, set]:

+             if convert in [bool, int, list, str, set, dict]:

                  try:

                      # Do no try to convert None...

                      if value is not None:

@@ -22,9 +22,10 @@ 

  # Written by Jan Kaluza <jkaluza@redhat.com>

  

  import abc

+ import re

  import fedmsg.utils

  

- from freshmaker import conf, db, models

+ from freshmaker import conf, log, db, models

  

  

  def load_handlers():
@@ -73,3 +74,55 @@ 

          ev = models.Event.get_or_create(db.session, event.msg_id)

          models.ArtifactBuild.create(db.session, ev, name, type, build_id, dep_of)

          db.session.commit()

+ 

+     def allow_build(self, event, artifact_type, name, branch):
cqi commented 8 years ago

artifact_type is repeated by passing strings, e.g. 'module', 'image', the same happens in method record_build. Suggest to give these values a name, so that types can be referenced, e.g. ArtifactType.Image or ArtifactType.Module.

I think we can do that in another PR. I've created bug to track this: https://pagure.io/freshmaker/issue/30

+         """

+         Check whether the artifact is allowed to be built by checking

+         HANDLER_BUILD_WHITELIST and HANDLER_BUILD_BLACKLIST in config.

+ 

+         :param event: event instance.

+         :param artifact_type: 'module' or 'image'.

+         :param name: name of the artifact.

+         :param branch: branch name of the artifact.

+         :return: True or False.

+         """

+         # If there is a whitelist specified for the (handler, event, artifact_type),

+         # the build target of (name, branch) need to be in that whitelist first.

+         # After that (if the build target is in whitelist), check the build target

+         # is not in the specified blacklist.

+ 

+         # by default we assume the artifact is in whitelist and not in blacklist

+         in_whitelist = True

+         in_blacklist = False

+ 

+         handler_name = self.name

+         event_name = type(event).__name__

+         whitelist_rules = conf.handler_build_whitelist.get(handler_name, {}).get(event_name, {})

+         blacklist_rules = conf.handler_build_blacklist.get(handler_name, {}).get(event_name, {})

+ 

+         def match_rule(name, branch, rule):

+             name_rule = rule.get('name', None)

+             branch_rule = rule.get('branch', None)

+             if name_rule and not re.compile(name_rule).match(name):

+                     return False

+             if branch_rule and not re.compile(branch_rule).match(branch):

+                     return False

+             return True

+ 

+         try:

+             whitelist = whitelist_rules.get(artifact_type, [])

+             if whitelist and not any([match_rule(name, branch, rule) for rule in whitelist]):

+                 in_whitelist = False

+ 

+             # only need to check blacklist when it is in whitelist first

+             if in_whitelist:

+                 blacklist = blacklist_rules.get(artifact_type, [])

+                 if blacklist and any([match_rule(name, branch, rule) for rule in blacklist]):

+                     in_blacklist = True

+ 

+         except re.error as exc:

+             log.error("Error while compiling blacklist/whilelist rule for <handler(%s) event(%s) artifact(%s)>:\n"

+                       "Incorrect regular expression: %s\nBlacklist and Whitelist will not take effect",

+                       handler_name, event_name, artifact_type, str(exc))

+             return True

+         return in_whitelist and not in_blacklist

@@ -38,6 +38,7 @@ 

  

  

  class DockerImageRebuildHandler(BaseHandler):

+     name = 'DockerImageRebuildHandler'

  

      def can_handle(self, event):

          return isinstance(event, DockerfileChanged)
@@ -48,11 +49,16 @@ 

  

          log.info('Start to rebuild docker image %s', event.repo)

  

+         if not self.allow_build(event, 'image', event.repo, event.branch):

+             log.info("Skip rebuild of %s:%s as it's not allowed by configured whitelist/blacklist",

+                      event.repo, event.branch)

+             return []

+ 

          try:

              task_id = self.build_image(repo_url=event.repo_url,

-                                        rev=event.rev,

-                                        branch=event.branch,

-                                        namespace=event.namespace)

+                                         rev=event.rev,

+                                         branch=event.branch,

+                                         namespace=event.namespace)

  

              self.record_build(event, event.repo, 'image', task_id)

  
@@ -61,6 +67,8 @@ 

          except:

              log.exception('Could not create task to build docker image %s', event.repo)

  

+         return []

+ 

      def build_image(self, repo_url, rev, branch, namespace=None):

          with koji_service(profile=conf.koji_profile, logger=log) as service:

              log.debug('Logging into {0} with Kerberos authentication.'.format(service.server))
@@ -83,6 +91,7 @@ 

  

  class DockerImageRebuildHandlerForBodhi(DockerImageRebuildHandler):

      """Rebuild docker images when RPMs are synced by Bodhi"""

+     name = 'DockerImageRebuildForBodhiHandler'

  

      def __init__(self):

          self.pdc_session = pdc.get_client_session(conf)
@@ -100,6 +109,10 @@ 

          log.info('Found docker images to rebuild: %s', containers)

  

          for container in containers:

+             if not self.allow_build(event, 'image', container['name'], container['branch']):
cqi commented 8 years ago

I lost my comment after rebase.

Defining a separate method as filter method would be helpful to do more against one "container" besides calling allow_build, e.g. logging something.

To avoid iterating containers twice, just need to convert get_containers_including_rpms to a generator instead of returning a list object.

qwan commented 8 years ago

Sorry, I'm not very clear with the solution of returning generator here, could you explain more?

cqi commented 8 years ago

Sorry, generator in this case cannot help to reduce a loop.

+                 log.info("Skip rebuild of image %s:%s as it's not allowed by configured whitelist/blacklist",

+                          container['name'], container['branch'])

+                 continue

              try:

                  task_id = self.handle_image_build(container)

                  self.record_build(event, container['name'], 'image', task_id)
@@ -107,23 +120,17 @@ 

                  log.exception('Error when rebuild %s', container)

  

      def handle_image_build(self, container_info):

-         container_detail = pdc.get_release_component(self.pdc_session,

-                                                      container_info['id'])

- 

-         branch = container_detail['dist_git_branch']

-         image_name = container_detail['name']

-         repo_url = '{}/{}/{}'.format(conf.git_base_url,

-                                      'container',

-                                      image_name)

+         name = container_info['name']

+         branch = container_info['branch']

+         repo_url = '{}/{}/{}'.format(conf.git_base_url, 'container', name)

  

-         log.info('Start to rebuild docker image %s from branch %s',

-                  image_name, branch)

+         log.info('Start to rebuild docker image %s from branch %s', name, branch)

  

          with temp_dir(suffix='-rebuild-docker-image') as working_dir:

              self.clone_repository(repo_url, branch, working_dir)

  

              last_commit_hash = get_commit_hash(

-                 os.path.join(working_dir, image_name))

+                 os.path.join(working_dir, name))

  

              return self.build_image(repo_url=repo_url,

                                      branch=branch,
@@ -146,6 +153,8 @@ 

              for container in found:

                  id = container['id']

                  if id not in containers:

+                     container_detail = pdc.get_release_component(self.pdc_session, id)

+                     container['branch'] = container_detail['dist_git_branch']

                      containers[id] = container

  

          return containers.values()

@@ -94,6 +94,10 @@ 

  

      def handle_metadata_update(self, event):

          log.info("Triggering rebuild of %s, metadata updated", event.scm_url)

+         if not self.allow_build(event, 'module', event.name, event.branch):

+             log.info("Skip rebuild of %s:%s as it's not allowed by configured whitelist/blacklist",

+                      event.name, event.branch)

+             return []

          build_id = self.rebuild_module(event.scm_url, event.branch)

          if build_id is not None:

              self.record_build(event, event.name, 'module', build_id)
@@ -139,11 +143,17 @@ 

              for mod in modules:

                  name = mod['variant_name']

                  version = mod['variant_version']

+                 if not self.allow_build(event, 'module', name, version):

+                     log.info("Skip rebuild of %s:%s as it's not allowed by configured whitelist/blacklist",

+                              name, version)

+                     continue

                  commit_msg = "Bump to rebuild because of %s update" % module_name

                  build_id = self.bump_and_rebuild_module(name, version, commit_msg=commit_msg)

                  if build_id is not None:

                      self.record_build(event, name, 'module', build_id)

  

+         return []

+ 

      def handle_rpm_spec_updated(self, event):

          """

          Rebuild module when spec file of rpm in module is updated.
@@ -163,6 +173,10 @@ 

          for mod in modules:

              module_name = mod['variant_name']

              module_branch = mod['variant_version']

+             if not self.allow_build(event, 'module', module_name, module_branch):

+                 log.info("Skip rebuild of %s:%s as it's not allowed by configured whitelist/blacklist",

+                          module_name, module_branch)

+                 continue

              log.info("Going to rebuild module '%s:%s'.", module_name, module_branch)

              commit_msg = "Bump to rebuild because of %s rpm spec update (%s)." % (rpm, rev)

              build_id = self.bump_and_rebuild_module(module_name, module_branch, commit_msg=commit_msg)

@@ -133,12 +133,14 @@ 

      {

          'release': 'fedora-25-updates',

          'id': 5430,

-         'name': 'testimage1'

+         'name': 'testimage1',

+         'branch': 'f25',

      },

      {

          'release': 'fedora-25-updates',

          'id': 5431,

-         'name': 'testimage2'

+         'name': 'testimage2',

+         'branch': 'f25',

      },

  ]

  
@@ -271,6 +273,7 @@ 

  

  class TestContainersIncludingRPMs(unittest.TestCase):

  

+     @patch('freshmaker.pdc.get_release_component', new=mock_get_release_component)

      @patch('freshmaker.handlers.image_builder.pdc.find_containers_by_rpm_name')

      def test_get_containers(self, find_containers_by_rpm_name):

          expected_found_containers = [
@@ -278,11 +281,13 @@ 

                  'release': 'fedora-24-updates',

                  'id': 5430,

                  'name': 'testimage1',

+                 'branch': 'f25',

              },

              {

                  'release': 'fedora-24-updates',

-                 'id': 5432,

+                 'id': 5431,

                  'name': 'testimage2',

+                 'branch': 'f25',

              },

          ]

          find_containers_by_rpm_name.return_value = expected_found_containers
@@ -305,6 +310,7 @@ 

               'release': '2.fc25',

               'version': '5.7.18'},

          ]

+ 

          containers = handler.get_containers_including_rpms(rpms)

  

          self.assertEqual(3, find_containers_by_rpm_name.call_count)

file modified
+66
@@ -307,5 +307,71 @@ 

          # build state updated to 'done'

          self.assertEquals(builds[0].state, models.BUILD_STATES['done'])

  

+     @mock.patch('freshmaker.handlers.mbs.utils')

+     @mock.patch('freshmaker.handlers.mbs.pdc')

+     @mock.patch('freshmaker.handlers.conf')

+     def test_module_is_not_allowed_to_be_built_in_whitelist(self, conf, pdc, utils):

+         conf.handler_build_whitelist = {

+             "MBS": {

+                 "RPMSpecUpdated": {

+                     "module": [

+                         {

+                             'name': 'test-.*',

+                         },

+                     ],

+                 },

+             },

+         }

+         conf.handler_build_blacklist = {}

+         m = helpers.DistGitMessage('rpms', 'bash', 'master', '123')

+         m.add_changed_file('bash.spec', 1, 1)

+         msg = m.produce()

+ 

+         event = self.get_event_from_msg(msg)

+ 

+         mod_info = helpers.PDCModuleInfo('testmodule', 'master', '20170412010101')

+         mod_info.add_rpm("bash-1.2.3-4.f26.rpm")

+         mod = mod_info.produce()

+         pdc.get_latest_modules.return_value = [mod]

+         event = self.get_event_from_msg(msg)

+         handler = MBS()

+         handler.rebuild_module = mock.Mock()

+         handler.rebuild_module.return_value = None

+         handler.handle(event)

+         handler.rebuild_module.assert_not_called()

+ 

+     @mock.patch('freshmaker.handlers.mbs.utils')

+     @mock.patch('freshmaker.handlers.mbs.pdc')

+     @mock.patch('freshmaker.handlers.conf')

+     def test_module_is_not_allowed_to_be_built_in_blacklist(self, conf, pdc, utils):

+         conf.handler_build_whitelist = {}

+         conf.handler_build_blacklist = {

+             "MBS": {

+                 "RPMSpecUpdated": {

+                     "module": [

+                         {

+                             'name': 'testmodule',

+                         },

+                     ],

+                 },

+             },

+         }

+         m = helpers.DistGitMessage('rpms', 'bash', 'master', '123')

+         m.add_changed_file('bash.spec', 1, 1)

+         msg = m.produce()

+ 

+         event = self.get_event_from_msg(msg)

+ 

+         mod_info = helpers.PDCModuleInfo('testmodule', 'master', '20170412010101')

+         mod_info.add_rpm("bash-1.2.3-4.f26.rpm")

+         mod = mod_info.produce()

+         pdc.get_latest_modules.return_value = [mod]

+         event = self.get_event_from_msg(msg)

+         handler = MBS()

+         handler.rebuild_module = mock.Mock()

+         handler.rebuild_module.return_value = None

+         handler.handle(event)

+         handler.rebuild_module.assert_not_called()

+ 

  if __name__ == '__main__':

      unittest.main()

Add HANDLER_BUILD_WHITELIST and HANDLER_BUILD_BLACKLIST options to
support whitelist and blacklist the build target by checking its
name and branch.

For example:

HANDLER_BUILD_WHITELIST = {
    "MBS": {
        "RPMSpecUpdated": {
            "module": [
                {
                    'name': 'base-.*',
                },
            ],
        },
    },
}
HANDLER_BUILD_BLACKLIST = {
    "MBS": {
        "RPMSpecUpdated": {
            "module": [
                {
                    'name': 'base-test-module',
                },
                {
                    'branch': 'rawhide',
                },
            ],
        },
    },
}

This will allow MBS handler to build any module on 'RPMSpecUpdated'
event that name matches 'base-.*' but not:

1. name is not 'base-test-module',
2. branch is not 'rawhide'.

so in this example:

1. "base-mymodule' from any branch can be built
2. "base-test-module" from any branch can not be built
3. any module from 'rawhide' branch can not be built

The two options are empty dicts by default.

artifact_type is repeated by passing strings, e.g. 'module', 'image', the same happens in method record_build. Suggest to give these values a name, so that types can be referenced, e.g. ArtifactType.Image or ArtifactType.Module.

I don't see any problem here, however I have another thought to make this piece of code simpler, just FYI.

Calling allow_build aims to filter out what containers should be rebuilt. From this point of view, that call can happen outside of for loop. For example,

...
containers = self.get_containers_including_rpms(rpms)
containers = [container for container in container
              if allow_build(event, 'image', container['name'], container['branch']]

for container in containers:
    ...

Why not also change the class name as well?

I think it is cosmetic issue, but I like to handle situations like this one following way:

if not self.allow_build(event, 'image', event.repo, event.branch):
    log.info("Skip rebuild of %s:%s as it's not allowed by configured whitelist/blacklist",
        event.repo, event.branch)
    return

try:
    task_id = self.build_image(repo_url=event.repo_url,
    ....

It makes the code easier to read to me, keeps the indentation lower and makes it easier to add another condition on which the module should not be rebuild.

What do you think?

I added names for these two class to prevent errors (because allow_build in this commit need to inspect the hander's name), and don't want to change the existing class name in this commit as I have an proposal on re-organizing the handlers in another way of 'each handler will handle the events from a source, like dist-git, buildsys, mbs', and then can rename some of the classes to follow a consistent style, will discuss this with team later.

The same cosmetic issue I have mentioned above for Docker images.

And here too, something like:

if not self.allow_build(...):
    continue

@cqi: Hm, when thinking about it, we won't get the log.info() here with your code. Not sure how important it is...

@jkaluza, you're right, that's why I didn't filter the modules/images first, skipping the build targets silently is not good, in this way we will need two loops, one for printing the skipping info, and list operation to get modules/images which are/not filtered out.

I think changing the code to following code-flow is the best approach in this situation. I think we should log.info in this case, you are right here.

if not self.allow_build(....):
    log.info(...)
    continue

....

rebased

8 years ago

I lost my comment after rebase.

Defining a separate method as filter method would be helpful to do more against one "container" besides calling allow_build, e.g. logging something.

To avoid iterating containers twice, just need to convert get_containers_including_rpms to a generator instead of returning a list object.

I think we can do that in another PR. I've created bug to track this: https://pagure.io/freshmaker/issue/30

Sorry, I'm not very clear with the solution of returning generator here, could you explain more?

Sorry, generator in this case cannot help to reduce a loop.

Seems, like all comments are addressed, so +1.

Pull-Request has been merged by qwan

8 years ago