#217 Do not try to pick up lost composes older than 3 days.
Merged 11 months ago by jkaluza. Opened 11 months ago by jkaluza.
jkaluza/odcs remove-hardlink  into  master

file modified
+16 -5

@@ -696,19 +696,30 @@ 

          # to the bus and once some backend receives it, it moves it to

          # 'generating'. This should not take more than 3 minutes, so that's

          # the limit we will use to find out the stuck composes.

-         limit = datetime.utcnow() - timedelta(minutes=3)

+         # On the other hand, we don't want to regenerate composes older than

+         # 3 days, because nobody is probably waiting for them. Just mark

+         # them as "failed".

+         now = datetime.utcnow()

+         from_time = now - timedelta(days=3)

+         to_time = now - timedelta(minutes=3)

          # We don't want to be to greedy here, because there are other backends

          # which can handle the lost composes too later, so just take few of

          # them in each run in each backend to balance the load.

          composes = Compose.query.filter(

              Compose.state == COMPOSE_STATES["wait"],

-             Compose.time_submitted < limit).order_by(

+             Compose.time_submitted < to_time).order_by(

                  Compose.id).limit(4).all()

  

          for compose in composes:

-             log.info("%r: Going to regenerate compose stuck in 'wait' "

-                      "state.", compose)

-             self.generate_new_compose(compose)

+             if compose.time_submitted < from_time:

+                 compose.state = COMPOSE_STATES["failed"]

+                 compose.state_reason = "Compose stuck in 'wait' state for longer than 3 days."

+                 db.session.add(compose)

+             else:

+                 log.info("%r: Going to regenerate compose stuck in 'wait' "

+                          "state.", compose)

+                 self.generate_new_compose(compose)

+         db.session.commit()

  

      def generate_lost_composes(self):

          """

@@ -27,11 +27,12 @@ 

  from concurrent.futures import ThreadPoolExecutor

  from datetime import timedelta

  from six.moves.urllib.parse import urlparse

+ from distutils.spawn import find_executable

This is to fix flake8 issues introduced when removing 'hardlink' in previous commits.

  

  from flufl.lock import Lock

  

  from odcs.server import log, conf

- from odcs.server.utils import makedirs, find_executable, execute_cmd

+ from odcs.server.utils import makedirs, execute_cmd

  

  

  class MergeRepo(object):

@@ -27,7 +27,6 @@ 

  import time

  import subprocess

  import shutil

- from distutils.spawn import find_executable

  from threading import Timer

  

  from odcs.server import conf, log

@@ -405,6 +405,16 @@ 

          self.composer.pickup_waiting_composes()

          self.generate_new_compose.assert_not_called()

  

+     def test_pickup_waiting_composes_generating_state_old(self):

+         time_submitted = datetime.utcnow() - timedelta(days=5)

+         composes = []

+         for i in range(10):

+             composes.append(self._add_test_compose(

+                 COMPOSE_STATES["wait"], time_submitted=time_submitted))

+         composes = sorted(composes, key=lambda c: c.id)

+         self.composer.pickup_waiting_composes()

+         self.generate_new_compose.assert_not_called()

+ 

      def test_generate_lost_composes_generating_state(self):

          composes = []

          for i in range(10):

We currently have more than 200 composes in "wait" state in prod, some of them are really old (like 3 months). We don't want to upgrade to new ODCS and start generating those ancient compose requests - nobody is waiting for them anymore.

This is to fix flake8 issues introduced when removing 'hardlink' in previous commits.

Pull-Request has been merged by jkaluza

11 months ago