PR#224: Remove limit of number of Pulp composes picked up in the pickup_waiting_composes(). - odcs

odcs

#224 Remove limit of number of Pulp composes picked up in the pickup_waiting_composes().

Merged 5 years ago by jkaluza. Opened 5 years ago by jkaluza.

jkaluza/odcs poller-improv into master

Remove limit of number of Pulp composes picked up in the pickup_waiting_composes().

Jan Kaluza • 5 years ago

f434fb7

server/odcs/server/backend.py

file modified

+22 -8

		`@@ -711,23 +711,37 @@`
		`now = datetime.utcnow()`
		`from_time = now - timedelta(days=3)`
		`to_time = now - timedelta(minutes=3)`
		`- # We don't want to be to greedy here, because there are other backends`
		`- # which can handle the lost composes too later, so just take few of`
		`- # them in each run in each backend to balance the load.`
		`+`
		`+ # Get composes which are in 'wait' state for too long.`
		`composes = Compose.query.filter(`
		`Compose.state == COMPOSE_STATES["wait"],`
		`Compose.time_submitted < to_time).order_by(`
		`- Compose.id).limit(4).all()`
		`+ Compose.id).all()`
		`+`
		`+ # We don't want to be to greedy here, because there are other backends`
		`+ # which can handle the lost composes too later, so just take few non-Pulp`
		`+ # composes each time.`
		`+ # Pulp composes are much cheaper to generate - usually just single call to`
		`+ # Pulp, so get all of them.`
		`+ non_pulp_composes_count = 0`

		`for compose in composes:`
		`if compose.time_submitted < from_time:`
		`compose.state = COMPOSE_STATES["failed"]`
		`compose.state_reason = "Compose stuck in 'wait' state for longer than 3 days."`
		`db.session.add(compose)`
		`- else:`
		`- log.info("%r: Going to regenerate compose stuck in 'wait' "`
		`- "state.", compose)`
		`- self.generate_new_compose(compose)`
		`+ continue`
		`+`
		`+ # Take only num_concurrent_pungi * 2 non-Pulp composes to keep some queue`
		`+ # but left something for other backends.`
		`+ if compose.source_type != PungiSourceType.PULP:`
		`+ non_pulp_composes_count += 1`
		`+ if non_pulp_composes_count > conf.num_concurrent_pungi * 2:`
		`+ continue`
		`+`
		`+ log.info("%r: Going to regenerate compose stuck in 'wait' "`
		`+ "state.", compose)`
		`+ self.generate_new_compose(compose)`
		`db.session.commit()`

		`def generate_lost_composes(self):`

server/tests/test_composerthread.py

file modified

+22 -2

		`@@ -373,9 +373,10 @@`
		`super(TestComposerThreadStuckWaitComposes, self).tearDown()`
		`self.patch_generate_new_compose.stop()`

		`- def _add_test_compose(self, state, time_submitted=None):`
		`+ def _add_test_compose(self, state, time_submitted=None,`
		`+ source_type=PungiSourceType.KOJI_TAG):`
		`compose = Compose.create(`
		`- db.session, "unknown", PungiSourceType.KOJI_TAG, "f26",`
		`+ db.session, "unknown", source_type, "f26",`
		`COMPOSE_RESULTS["repository"], 60, "", 0)`
		`compose.state = state`
		`if time_submitted:`
		`@@ -422,3 +423,22 @@`
		`composes = sorted(composes, key=lambda c: c.id)`
		`self.composer.pickup_waiting_composes()`
		`self.generate_new_compose.assert_not_called()`
		`+`
		`+ def test_pickup_waiting_composes_no_limit_for_pulp(self):`
		`+ time_submitted = datetime.utcnow() - timedelta(minutes=5)`
		`+ composes = []`
		`+ for i in range(10):`
		`+ composes.append(self._add_test_compose(`
		`+ COMPOSE_STATES["wait"], time_submitted=time_submitted))`
		`+ for i in range(10):`
		`+ composes.append(self._add_test_compose(`
		`+ COMPOSE_STATES["wait"], time_submitted=time_submitted,`
		`+ source_type=PungiSourceType.PULP))`
		`+ composes = sorted(composes, key=lambda c: c.id)`
		`+ self.composer.pickup_waiting_composes()`
		`+ self.generate_new_compose.assert_has_calls([`
		`+ call(composes[0]), call(composes[1]), call(composes[2]),`
		`+ call(composes[3]), call(composes[10]), call(composes[11]),`
		`+ call(composes[12]), call(composes[13]), call(composes[14]),`
		`+ call(composes[15]), call(composes[16]), call(composes[17]),`
		`+ call(composes[18]), call(composes[19])])`

jkaluza commented 5 years ago

The code to handle the lost composes took only 4 lost composes per backend. Most of the composes ODCS does internally are Pulp composes and they are actually quite cheap to generate. This PR changes the code to handle any number of lost Pulp composes.

It also removes the hardcoded "4" number for other source_types and instead uses conf.num_concurrent_pungi * 2 instead. This means each time the code to handle lost composes is called, it fetches enough work to keep the backend busy for some time depending on configuration.