From 5a3d3d99d3210b44d4b739a565754342087bdf38 Mon Sep 17 00:00:00 2001 From: Martin Curlej Date: Aug 30 2018 14:02:24 +0000 Subject: ISSUE-991: Added a method to the producer that will move stuck builds into 'failed' state Signed-off-by: Martin Curlej --- diff --git a/module_build_service/config.py b/module_build_service/config.py index 0224175..de3c6cf 100644 --- a/module_build_service/config.py +++ b/module_build_service/config.py @@ -439,6 +439,18 @@ class Config(object): 'default': 180, 'desc': ('Time in days when to cleanup failed module builds and transition them to ' 'the "garbage" state.')}, + 'cleanup_stuck_builds_time': { + 'type': int, + 'default': 7, + 'desc': ('Time in days when to cleanup stuck module builds and transition them to ' + 'the "failed" state. The module has to be in a state defined by the ' + '"cleanup_stuck_builds_states" option.')}, + 'cleanup_stuck_builds_states': { + 'type': list, + 'default': ["init", "build"], + 'desc': ('States of builds which will be considered to move to failed state when a' + ' build is in one of those states longer than the value configured in the ' + '"cleanup_stuck_builds_time"')}, 'resolver': { 'type': str, 'default': 'db', diff --git a/module_build_service/scheduler/producer.py b/module_build_service/scheduler/producer.py index 2b48835..65da5ea 100644 --- a/module_build_service/scheduler/producer.py +++ b/module_build_service/scheduler/producer.py @@ -328,3 +328,38 @@ class MBSProducer(PollingProducer): if delta.total_seconds() > config.koji_target_delete_time: log.info("Removing target of module %r", module) koji_session.deleteBuildTarget(target['id']) + + def cancel_stuck_module_builds(self, config, session): + """ + Method transitions builds which are stuck in one state too long to the "failed" state. + The states are defined with the "cleanup_stuck_builds_states" config option and the + time is defined by the "cleanup_stuck_builds_time" config option. + """ + log.info(('Looking for module builds stuck in the states "{states}" ' + 'more than {days} days').format( + states=' and '.join(config.cleanup_stuck_builds_states), + days=config.cleanup_stuck_builds_time + )) + + delta = timedelta(days=config.cleanup_stuck_builds_time) + now = datetime.utcnow() + threshold = now - delta + states = [module_build_service.models.BUILD_STATES[state] + for state in config.cleanup_stuck_builds_states] + + module_builds = session.query(models.ModuleBuild).filter( + models.ModuleBuild.state.in_(states), + models.ModuleBuild.time_modified < threshold).all() + + log.info(' {0!r} module builds are stuck...'.format(len(module_builds))) + + for build in module_builds: + nsvc = ":".join([build.name, build.stream, build.version, build.context]) + log.info('Transitioning build "{nsvc}" to "Failed" state.'.format(nsvc=nsvc)) + + state_reason = "The module was in {state} for more than {days} days".format( + state=build.state, + days=config.cleanup_stuck_builds_time + ) + build.transition(config, state=models.BUILD_STATES["failed"], state_reason=state_reason) + session.commit() diff --git a/tests/test_scheduler/test_poller.py b/tests/test_scheduler/test_poller.py index 2f2c8f5..2477d85 100644 --- a/tests/test_scheduler/test_poller.py +++ b/tests/test_scheduler/test_poller.py @@ -442,3 +442,39 @@ class TestPoller: assert module_build_one.state == models.BUILD_STATES['failed'] # Make sure that the builder was never instantiated create_builder.assert_not_called() + + @pytest.mark.parametrize('test_state', [models.BUILD_STATES[state] + for state in conf.cleanup_stuck_builds_states]) + def test_cancel_stuck_module_builds(self, create_builder, koji_get_session, global_consumer, + dbg, test_state): + + module_build1 = models.ModuleBuild.query.get(1) + module_build1.state = test_state + under_thresh = conf.cleanup_stuck_builds_time - 1 + module_build1.time_modified = datetime.utcnow() - timedelta( + days=under_thresh, hours=23, minutes=59) + + module_build2 = models.ModuleBuild.query.get(2) + module_build2.state = test_state + module_build2.time_modified = datetime.utcnow() - timedelta( + days=conf.cleanup_stuck_builds_time) + + module_build2 = models.ModuleBuild.query.get(3) + module_build2.state = test_state + module_build2.time_modified = datetime.utcnow() + + db.session.commit() + + consumer = mock.MagicMock() + consumer.incoming = queue.Queue() + global_consumer.return_value = consumer + hub = mock.MagicMock() + poller = MBSProducer(hub) + + assert consumer.incoming.qsize() == 0 + + poller.cancel_stuck_module_builds(conf, db.session) + + module = models.ModuleBuild.query.filter_by(state=4).all() + assert len(module) == 1 + assert module[0].id == 2