#224 Remove limit of number of Pulp composes picked up in the pickup_waiting_composes().
Merged 5 years ago by jkaluza. Opened 5 years ago by jkaluza.
jkaluza/odcs poller-improv  into  master

file modified
+22 -8
@@ -711,23 +711,37 @@ 

          now = datetime.utcnow()

          from_time = now - timedelta(days=3)

          to_time = now - timedelta(minutes=3)

-         # We don't want to be to greedy here, because there are other backends

-         # which can handle the lost composes too later, so just take few of

-         # them in each run in each backend to balance the load.

+ 

+         # Get composes which are in 'wait' state for too long.

          composes = Compose.query.filter(

              Compose.state == COMPOSE_STATES["wait"],

              Compose.time_submitted < to_time).order_by(

-                 Compose.id).limit(4).all()

+                 Compose.id).all()

+ 

+         # We don't want to be to greedy here, because there are other backends

+         # which can handle the lost composes too later, so just take few non-Pulp

+         # composes each time.

+         # Pulp composes are much cheaper to generate - usually just single call to

+         # Pulp, so get all of them.

+         non_pulp_composes_count = 0

  

          for compose in composes:

              if compose.time_submitted < from_time:

                  compose.state = COMPOSE_STATES["failed"]

                  compose.state_reason = "Compose stuck in 'wait' state for longer than 3 days."

                  db.session.add(compose)

-             else:

-                 log.info("%r: Going to regenerate compose stuck in 'wait' "

-                          "state.", compose)

-                 self.generate_new_compose(compose)

+                 continue

+ 

+             # Take only num_concurrent_pungi * 2 non-Pulp composes to keep some queue

+             # but left something for other backends.

+             if compose.source_type != PungiSourceType.PULP:

+                 non_pulp_composes_count += 1

+                 if non_pulp_composes_count > conf.num_concurrent_pungi * 2:

+                     continue

+ 

+             log.info("%r: Going to regenerate compose stuck in 'wait' "

+                      "state.", compose)

+             self.generate_new_compose(compose)

          db.session.commit()

  

      def generate_lost_composes(self):

@@ -373,9 +373,10 @@ 

          super(TestComposerThreadStuckWaitComposes, self).tearDown()

          self.patch_generate_new_compose.stop()

  

-     def _add_test_compose(self, state, time_submitted=None):

+     def _add_test_compose(self, state, time_submitted=None,

+                           source_type=PungiSourceType.KOJI_TAG):

          compose = Compose.create(

-             db.session, "unknown", PungiSourceType.KOJI_TAG, "f26",

+             db.session, "unknown", source_type, "f26",

              COMPOSE_RESULTS["repository"], 60, "", 0)

          compose.state = state

          if time_submitted:
@@ -422,3 +423,22 @@ 

          composes = sorted(composes, key=lambda c: c.id)

          self.composer.pickup_waiting_composes()

          self.generate_new_compose.assert_not_called()

+ 

+     def test_pickup_waiting_composes_no_limit_for_pulp(self):

+         time_submitted = datetime.utcnow() - timedelta(minutes=5)

+         composes = []

+         for i in range(10):

+             composes.append(self._add_test_compose(

+                 COMPOSE_STATES["wait"], time_submitted=time_submitted))

+         for i in range(10):

+             composes.append(self._add_test_compose(

+                 COMPOSE_STATES["wait"], time_submitted=time_submitted,

+                 source_type=PungiSourceType.PULP))

+         composes = sorted(composes, key=lambda c: c.id)

+         self.composer.pickup_waiting_composes()

+         self.generate_new_compose.assert_has_calls([

+             call(composes[0]), call(composes[1]), call(composes[2]),

+             call(composes[3]), call(composes[10]), call(composes[11]),

+             call(composes[12]), call(composes[13]), call(composes[14]),

+             call(composes[15]), call(composes[16]), call(composes[17]),

+             call(composes[18]), call(composes[19])])

The code to handle the lost composes took only 4 lost composes per backend. Most of the composes ODCS does internally are Pulp composes and they are actually quite cheap to generate. This PR changes the code to handle any number of lost Pulp composes.

It also removes the hardcoded "4" number for other source_types and instead uses conf.num_concurrent_pungi * 2 instead. This means each time the code to handle lost composes is called, it fetches enough work to keep the backend busy for some time depending on configuration.

Pull-Request has been merged by jkaluza

5 years ago