#342 Do not wait for the result of Pulp compose in busy loop.
Merged 6 months ago by jkaluza. Opened 6 months ago by jkaluza.
jkaluza/freshmaker pulp-compose-no-wait  into  master

@@ -48,13 +48,22 @@ 

              self.force_dry_run()

  

          query = db.session.query(ArtifactBuild).join('composes')

-         first_batch_builds = query.filter(

-             ArtifactBuild.dep_on == None,  # noqa

+         # Get all the builds waiting for this compose in PLANNED state ...

+         builds_ready_to_rebuild = query.filter(

              ArtifactBuild.state == ArtifactBuildState.PLANNED.value,

              Compose.odcs_compose_id == event.compose['id'])

-         if self.dry_run:

-             builds_ready_to_rebuild = first_batch_builds

-         else:

+         # ... and depending on DONE parent image or parent image which is

+         # not planned to be built in this Event (dep_on == None).

+         builds_ready_to_rebuild = [

+             b for b in builds_ready_to_rebuild if

+             b.dep_on is None or b.dep_on.state == ArtifactBuildState.DONE.value

+         ]

+ 

+         if not self.dry_run:

+             # In non-dry-run mode, check that all the composes are ready.

+             # In dry-run mode, the composes are fake, so they are always ready.

              builds_ready_to_rebuild = six.moves.filter(

-                 lambda build: build.composes_ready, first_batch_builds)

+                 lambda build: build.composes_ready, builds_ready_to_rebuild)

+ 

+         # Start the rebuild.

          self.start_to_build_images(builds_ready_to_rebuild)

file modified
+1 -27

@@ -44,7 +44,7 @@ 

  from freshmaker.kojiservice import koji_service

  from freshmaker.consumer import work_queue_put

  from freshmaker.types import ArtifactBuildState

- from freshmaker.utils import krb_context, retry

+ from freshmaker.utils import krb_context

  from freshmaker.events import ODCSComposeStateChangeEvent

  

  

@@ -298,32 +298,6 @@ 

              with krb_context():

                  new_compose = odcs.new_compose(

                      ' '.join(content_sets), 'pulp')

- 

-                 # Pulp composes in ODCS takes just few seconds, because ODCS

-                 # only generates the .repo file after single query to Pulp.

-                 # TODO: Freshmaker is currently not designed to handle

-                 # multiple ODCS composes per rebuild Event and since these

-                 # composes are done in no-time normally, it is OK here to

-                 # block. It would still be nice to redesign that part of

-                 # Freshmaker to do things "right".

-                 # This is tracked here: https://pagure.io/freshmaker/issue/114

-                 @retry(timeout=60 * 30, interval=2)

-                 def wait_for_compose(compose_id):

-                     ret = odcs.get_compose(compose_id)

-                     if ret["state_name"] == "done":

-                         return True

-                     elif ret["state_name"] == "failed":

-                         return False

-                     self.handler.log_info(

-                         "Waiting for Pulp compose to finish: %r", ret)

-                     raise Exception("ODCS compose not finished.")

- 

-                 done = wait_for_compose(new_compose["id"])

-                 if not done:

-                     build.transition(

-                         ArtifactBuildState.FAILED.value, "Cannot generate "

-                         "ODCS PULP compose %s for content_sets %r"

-                         % (str(new_compose["id"]), content_sets))

          else:

              new_compose = self._fake_odcs_new_compose(

                  content_sets, 'pulp')

@@ -119,3 +119,21 @@ 

          args, kwargs = start_to_build_images.call_args

          passed_builds = sorted(args[0], key=lambda build: build.id)

          self.assertEqual([self.build_1, self.build_3], passed_builds)

+ 

+     @patch('freshmaker.models.ArtifactBuild.composes_ready',

+            new_callable=PropertyMock)

+     @patch('freshmaker.handlers.ContainerBuildHandler.start_to_build_images')

+     def test_start_to_build_parent_image_done(self, start_to_build_images, composes_ready):

+         composes_ready.return_value = True

+         self.build_1.state = ArtifactBuildState.DONE.value

+ 

+         event = ODCSComposeStateChangeEvent(

+             'msg-id', {'id': self.compose_1.id, 'state': 'done'}

+         )

+ 

+         handler = RebuildImagesOnODCSComposeDone()

+         handler.handle(event)

+ 

+         args, kwargs = start_to_build_images.call_args

+         passed_builds = sorted(args[0], key=lambda build: build.id)

+         self.assertEqual([self.build_3, self.build_2], passed_builds)

The original code existed because we were not able to store multiple Compose IDs in Freshmaker's DB, so the Freshmaker waited for the result of Pulp compose to workaround this. This is no longer true for some time and therefore the code for waiting for a compose can be removed.

This also fixes the possible race condition between "parent image build" and "child image ODCS compose being done" issue in RebuildImageOnODCSComposeDone handler.

See the commit messages for more info.

Commit 1ad738e fixes this pull-request

Pull-Request has been merged by jkaluza

6 months ago

Pull-Request has been merged by jkaluza

6 months ago