#243 Fail composes in 'generating' state older than 2 * conf.pungi_timeout.
Merged 5 months ago by jkaluza. Opened 5 months ago by jkaluza.
jkaluza/odcs generating-timeout  into  master

@@ -814,6 +814,29 @@ 

                       "state.", compose)

              self.generate_new_compose(compose)

  

+     def fail_lost_generating_composes(self):

+         """

+         Fails the composes in `generating` state in case they are in this

+         state for longer than `2 * conf.pungi_timeout`. Because composes

+         can be generating only for `conf.pungi_timeout` seconds, this is enough

+         time to generate any compose.

+         """

+         max_generating_time = 2 * conf.pungi_timeout

+         now = datetime.utcnow()

+         too_old_datetime = now - timedelta(seconds=max_generating_time)

+ 

+         # Get composes which are in 'generating' state for too long.

+         composes = Compose.query.filter(

+             Compose.state == COMPOSE_STATES["generating"],

+             Compose.time_submitted < too_old_datetime).order_by(

+                 Compose.id).all()

+ 

+         for compose in composes:

+             compose.transition(

+                 COMPOSE_STATES["failed"],

+                 "Compose stuck in 'generating' state for longer than %d "

+                 "seconds." % max_generating_time)

+ 

      def generate_lost_composes(self):

          """

          Gets all the composes in "generating" state and continues with

@@ -119,3 +119,4 @@ 

          elif topic.endswith(conf.internal_messaging_topic):

              self.remove_expired_compose_thread.do_work()

              self.composer.pickup_waiting_composes()

+             self.composer.fail_lost_generating_composes()

@@ -27,7 +27,7 @@ 

  from mock import patch, MagicMock, call

  

  import odcs.server

- from odcs.server import db, app

+ from odcs.server import db, app, conf

  from odcs.server.models import Compose

  from odcs.common.types import COMPOSE_STATES, COMPOSE_RESULTS, COMPOSE_FLAGS

  from odcs.server.backend import ComposerThread, resolve_compose

@@ -442,3 +442,21 @@ 

              call(composes[12]), call(composes[13]), call(composes[14]),

              call(composes[15]), call(composes[16]), call(composes[17]),

              call(composes[18]), call(composes[19])])

+ 

+     def test_fail_lost_generating_composes(self):

+         t = datetime.utcnow() - timedelta(seconds=2 * conf.pungi_timeout)

+ 

+         time_submitted = t - timedelta(minutes=5)

+         compose_to_fail = self._add_test_compose(

+             COMPOSE_STATES["generating"], time_submitted=time_submitted)

+ 

+         time_submitted = t + timedelta(minutes=5)

+         compose_to_keep = self._add_test_compose(

+             COMPOSE_STATES["generating"], time_submitted=time_submitted)

+ 

+         self.composer.fail_lost_generating_composes()

+         db.session.commit()

+         db.session.expire_all()

+ 

+         self.assertEqual(compose_to_fail.state, COMPOSE_STATES["failed"])

+         self.assertEqual(compose_to_keep.state, COMPOSE_STATES["generating"])

@@ -107,9 +107,12 @@ 

  

      @mock.patch("odcs.server.backend.RemoveExpiredComposesThread.do_work")

      @mock.patch("odcs.server.backend.ComposerThread.pickup_waiting_composes")

+     @mock.patch("odcs.server.backend.ComposerThread.fail_lost_generating_composes")

      def test_consumer_processing_internal_cleaup(

-             self, pickup_waiting_composes, remove_expired):

+             self, fail_generating_lost_composes, pickup_waiting_composes,

+             remove_expired):

          msg = self._internal_clean_composes_msg()

          self.consumer.consume(msg)

          remove_expired.assert_called_once()

          pickup_waiting_composes.assert_called_once()

+         fail_generating_lost_composes.assert_called_once()

no initial comment

Consider storing 2 * conf.pungi_timeout into a variable so it can be reused in the message below as well.

I don't follow why doubling is necessary.

rebased onto ad42167

5 months ago

Storing 2 * conf.pungi_timeout in a variable now.

The reason why using bigger time value than just conf.pungi_timeout is that pungi_timeout limits the pungi command execution, but before this command is actually executed, there are some queries to database and koji/mbs, the configuration files are generated and so on.

It probably does not take more than few seconds, but I decided to choose bigger value just to give it more time. In the end, the goal of this commit is to fix the current situation when composes are stuck in generating for months :).

:thumbsup: thanks for the explanation :smiley:

Pull-Request has been merged by jkaluza

5 months ago