#494 Log and ignore exceptions in cleanup
Merged 3 years ago by lsedlar. Opened 3 years ago by lsedlar.
lsedlar/odcs cleanup-errors  into  master

@@ -21,6 +21,7 @@ 

  #

  # Written by Jan Kaluza <jkaluza@redhat.com>

  

+ import contextlib

  import ssl

  import os

  import re
@@ -307,6 +308,18 @@ 

      """

      Runs the cleanup.

      """

-     remove_expired_compose_thread.do_work()

-     composer_thread.fail_lost_generating_composes()

-     reschedule_waiting_composes()

+ 

+     @contextlib.contextmanager

+     def log_errors(msg):

+         """Log any exception raised from the block and then ignore it."""

+         try:

+             yield

+         except Exception:

+             log.exception(msg)

+ 

+     with log_errors("Error while removing expired composes"):

+         remove_expired_compose_thread.do_work()

+     with log_errors("Error while marking lost generating composes as failed"):

+         composer_thread.fail_lost_generating_composes()

+     with log_errors("Error while rescheduling waiting composes"):

+         reschedule_waiting_composes()

@@ -5,7 +5,11 @@ 

  from .utils import ModelsBaseTest

  

  from odcs.server import conf, db

- from odcs.server.celery_tasks import TaskRouter, reschedule_waiting_composes

+ from odcs.server.celery_tasks import (

+     TaskRouter,

+     reschedule_waiting_composes,

+     run_cleanup,

+ )

  from odcs.common.types import COMPOSE_STATES, COMPOSE_RESULTS

  from odcs.server.pungi import PungiSourceType

  from odcs.server.models import Compose
@@ -317,3 +321,37 @@ 

          composes = sorted(composes, key=lambda c: c.id)

          reschedule_waiting_composes()

          schedule_compose.assert_not_called()

+ 

+ 

+ class TestRunCleanup:

+     def do_nothing(self):

+         pass

+ 

+     def raise_error(self):

+         raise RuntimeError("It failed")

+ 

+     @patch("odcs.server.celery_tasks.reschedule_waiting_composes")

+     @patch("odcs.server.celery_tasks.composer_thread")

+     @patch("odcs.server.celery_tasks.remove_expired_compose_thread")

+     def test_all_fine(

+         self,

+         remove_expired_compose_thread,

+         composer_thread,

+         reschedule_waiting_composes,

+     ):

+         funcs = [

+             remove_expired_compose_thread.do_work,

+             composer_thread.fail_lost_generating_composes,

+             reschedule_waiting_composes,

+         ]

+         num_funcs = len(funcs)

+ 

+         for i in range(num_funcs):

+             # Run cleanup repeatedly, each time setting a different function to fail

+             for x, func in enumerate(funcs):

+                 func.side_effect = self.raise_error if x == i else self.do_nothing

+ 

+             run_cleanup()

+ 

+         for func in funcs:

+             func.assert_has_calls([call()] * num_funcs)

The cleanup has three separate phases. If there is an exception raised from one of them, anything running afterwards will not be triggered.

This patch will log any unhandled exception, but it will continue with executing remaining cleanup steps.

I didn't run tests, but I made a small example and realized something. Doesn't log_errors as ContextMangager require __enter__ method or at least @contextmanager decorator?

Yes, it does. I'll update it.

rebased onto cdcb69b20c7e1a0d421971cf3b8ee07322b0ad6c

3 years ago

Jenkins is green: https://jenkins-fedora-infra.apps.ocp.ci.centos.org/job/odcs/1484/ (which doesn't mean much, this part of code doesn't have any tests).
I'm not sure why it's not reported here.

My modified example shows, that It might work as expected. It will log the message, but will it also log the Exception itself? Is it needed?

rebased onto e7c6419

3 years ago

Pull-Request has been merged by lsedlar

3 years ago