#269 Improve logging of failable deliverables
Merged 8 years ago by ausil. Opened 8 years ago by lsedlar.
lsedlar/pungi failable-logging  into  master

file modified
+35 -13
@@ -25,6 +25,7 @@ 

  import time

  import tempfile

  import shutil

+ import json

  

  import kobo.log

  from productmd.composeinfo import ComposeInfo
@@ -135,8 +136,10 @@ 

  

          # Stores list of deliverables that failed, but did not abort the

          # compose.

-         # {Variant.uid: {Arch: [deliverable]}}

+         # {deliverable: [(Variant.uid, arch, subvariant)]}

          self.failed_deliverables = {}

+         self.attempted_deliverables = {}

+         self.required_deliverables = {}

  

      get_compose_dir = staticmethod(get_compose_dir)

  
@@ -238,11 +241,16 @@ 

          return self._status_file

  

      def _log_failed_deliverables(self):

-         for variant, variant_data in self.failed_deliverables.iteritems():

-             for arch, deliverables in variant_data.iteritems():

-                 for deliverable in deliverables:

-                     self.log_info('Failed %s on variant <%s>, arch <%s>.'

-                                   % (deliverable, variant, arch))

+         for kind, data in self.failed_deliverables.iteritems():

+             for variant, arch, subvariant in data:

+                 self.log_info('Failed %s on variant <%s>, arch <%s>, subvariant <%s>.'

+                               % (kind, variant, arch, subvariant))

+         log = os.path.join(self.paths.log.topdir('global'), 'deliverables.json')

+         with open(log, 'w') as f:

+             json.dump({'required': self.required_deliverables,

+                        'failed': self.failed_deliverables,

+                        'attempted': self.attempted_deliverables},

+                       f, indent=4)

  

      def write_status(self, stat_msg):

          if stat_msg not in ("STARTED", "FINISHED", "DOOMED"):
@@ -257,7 +265,8 @@ 

  

          if stat_msg == 'FINISHED' and self.failed_deliverables:

              stat_msg = 'FINISHED_INCOMPLETE'

-             self._log_failed_deliverables()

+ 

+         self._log_failed_deliverables()

  

          with open(self.status_file, "w") as f:

              f.write(stat_msg + "\n")
@@ -308,12 +317,25 @@ 

          Variant can be None.

          """

          failable = get_arch_variant_data(self.conf, 'failable_deliverables', arch, variant)

-         if deliverable in failable:

-             # Store failed deliverable for later logging.

-             variant_uid = variant.uid if variant else ''

-             self.failed_deliverables.setdefault(variant_uid, {}).setdefault(arch, []).append(deliverable)

-             return True

-         return False

+         return deliverable in failable

+ 

+     def attempt_deliverable(self, variant, arch, kind, subvariant=None):

+         """Log information about attempted deliverable."""

+         variant_uid = variant.uid if variant else ''

+         self.attempted_deliverables.setdefault(kind, []).append(

+             (variant_uid, arch, subvariant))

+ 

+     def require_deliverable(self, variant, arch, kind, subvariant=None):

+         """Log information about attempted deliverable."""

+         variant_uid = variant.uid if variant else ''

+         self.required_deliverables.setdefault(kind, []).append(

+             (variant_uid, arch, subvariant))

+ 

+     def fail_deliverable(self, variant, arch, kind, subvariant=None):

+         """Log information about failed deliverable."""

+         variant_uid = variant.uid if variant else ''

+         self.failed_deliverables.setdefault(kind, []).append(

+             (variant_uid, arch, subvariant))

  

      @property

      def image_release(self):

file modified
+1 -1
@@ -183,7 +183,7 @@ 

  

      def process(self, item, num):

          compose, cmd, variant, arch = item

-         with failable(compose, variant, arch, 'iso', 'Creating ISO'):

+         with failable(compose, variant, arch, 'iso'):

              self.worker(compose, cmd, variant, arch, num)

  

      def worker(self, compose, cmd, variant, arch, num):

file modified
+1 -1
@@ -144,7 +144,7 @@ 

          compose, cmd = item

          variant = cmd["image_conf"]["image-build"]["variant"]

          subvariant = cmd["image_conf"]["image-build"].get("subvariant", variant.uid)

-         with failable(compose, variant, '*', 'image-build'):

+         with failable(compose, variant, '*', 'image-build', subvariant):

              self.worker(num, compose, variant, subvariant, cmd)

  

      def worker(self, num, compose, variant, subvariant, cmd):

file modified
+1 -1
@@ -205,7 +205,7 @@ 

  

      def process(self, item, num):

          compose, cmd, variant, arch = item

-         with failable(compose, variant, arch, 'live', 'Creating live images'):

+         with failable(compose, variant, arch, 'live', cmd.get('subvariant')):

              self.worker(compose, cmd, variant, arch, num)

  

      def worker(self, compose, cmd, variant, arch, num):

@@ -158,7 +158,7 @@ 

          compose, variant, config = item

          subvariant = config.pop('subvariant')

          self.num = num

-         with failable(compose, variant, '*', 'live-media'):

+         with failable(compose, variant, '*', 'live-media', subvariant):

              self.worker(compose, variant, subvariant, config)

  

      def _get_log_file(self, compose, variant, subvariant, config):

@@ -42,7 +42,7 @@ 

      def process(self, item, num):

          compose, variant, arch, config = item

          self.num = num

-         with util.failable(compose, variant, arch, 'ostree-installer', 'Ostree installer'):

+         with util.failable(compose, variant, arch, 'ostree-installer'):

              self.worker(compose, variant, arch, config)

  

      def worker(self, compose, variant, arch, config):

file modified
+14 -5
@@ -459,17 +459,26 @@ 

  

  

  @contextlib.contextmanager

- def failable(compose, variant, arch, deliverable, msg=None):

+ def failable(compose, variant, arch, deliverable, subvariant=None):

      """If a deliverable can fail, log a message and go on as if it succeeded."""

-     msg = msg or deliverable.capitalize()

+     msg = deliverable.replace('-', ' ').capitalize()

+     can_fail = compose.can_fail(variant, arch, deliverable)

+     if can_fail:

+         compose.attempt_deliverable(variant, arch, deliverable, subvariant)

+     else:

+         compose.require_deliverable(variant, arch, deliverable, subvariant)

      try:

          yield

      except Exception as exc:

-         if not compose.can_fail(variant, arch, deliverable):

+         if not can_fail:

              raise

          else:

-             compose.log_info('[FAIL] %s (variant %s, arch %s) failed, but going on anyway.'

-                              % (msg, variant.uid if variant else 'None', arch))

+             compose.fail_deliverable(variant, arch, deliverable, subvariant)

+             ident = 'variant %s, arch %s' % (variant.uid if variant else 'None', arch)

+             if subvariant:

+                 ident += ', subvariant %s' % subvariant

+             compose.log_info('[FAIL] %s (%s) failed, but going on anyway.'

+                              % (msg, ident))

              compose.log_info(str(exc))

              tb = traceback.format_exc()

              compose.log_debug(tb)

file modified
+3
@@ -64,6 +64,9 @@ 

          self.old_composes = []

          self.config_dir = '/home/releng/config'

          self.notifier = None

+         self.attempt_deliverable = mock.Mock()

+         self.fail_deliverable = mock.Mock()

+         self.require_deliverable = mock.Mock()

  

      def get_variants(self, arch=None, types=None, recursive=None):

          return [v for v in self.variants.values() if not arch or arch in v.arches]

file modified
+4 -4
@@ -336,14 +336,14 @@ 

          }

  

          variant = mock.Mock(uid='Server')

-         self.compose.can_fail(variant, 'x86_64', 'live')

-         self.compose.can_fail(None, '*', 'build-image')

+         self.compose.fail_deliverable(variant, 'x86_64', 'live')

+         self.compose.fail_deliverable(None, '*', 'build-image')

  

          self.compose.write_status('FINISHED')

  

          self.logger.log.assert_has_calls(

-             [mock.call(20, 'Failed build-image on variant <>, arch <*>.'),

-              mock.call(20, 'Failed live on variant <Server>, arch <x86_64>.')],

+             [mock.call(20, 'Failed build-image on variant <>, arch <*>, subvariant <None>.'),

+              mock.call(20, 'Failed live on variant <Server>, arch <x86_64>, subvariant <None>.')],

              any_order=True)

  

          with open(os.path.join(self.tmp_dir, 'STATUS'), 'r') as f:

@@ -357,7 +357,7 @@ 

              t.process((compose, cmd, compose.variants['Server'], 'x86_64'), 1)

  

          compose.log_info.assert_has_calls([

-             mock.call('[FAIL] Creating ISO (variant Server, arch x86_64) failed, but going on anyway.'),

+             mock.call('[FAIL] Iso (variant Server, arch x86_64) failed, but going on anyway.'),

              mock.call('BOOM')

          ])

  
@@ -399,7 +399,7 @@ 

              t.process((compose, cmd, compose.variants['Server'], 'x86_64'), 1)

  

          compose.log_info.assert_has_calls([

-             mock.call('[FAIL] Creating ISO (variant Server, arch x86_64) failed, but going on anyway.'),

+             mock.call('[FAIL] Iso (variant Server, arch x86_64) failed, but going on anyway.'),

              mock.call('Runroot task failed: 1234. See {} for more details.'.format(

                  self.topdir + '/logs/x86_64/createiso-image-name.x86_64.log'))

          ])
@@ -479,7 +479,7 @@ 

              t.process((compose, cmd, compose.variants['Server'], 'x86_64'), 1)

  

          compose.log_info.assert_has_calls([

-             mock.call('[FAIL] Creating ISO (variant Server, arch x86_64) failed, but going on anyway.'),

+             mock.call('[FAIL] Iso (variant Server, arch x86_64) failed, but going on anyway.'),

              mock.call('BOOM')

          ])

  

@@ -540,7 +540,7 @@ 

              t.process((compose, cmd), 1)

  

          compose.log_info.assert_has_calls([

-             mock.call('[FAIL] Image-build (variant Client, arch *) failed, but going on anyway.'),

+             mock.call('[FAIL] Image build (variant Client, arch *, subvariant Client) failed, but going on anyway.'),

              mock.call('ImageBuild task failed: 1234. See %s for more details.'

                        % (os.path.join(self.topdir,

                                        'logs/amd64-x86_64/imagebuild-Client-Client-docker.amd64-x86_64.log'))),
@@ -591,7 +591,7 @@ 

              t.process((compose, cmd), 1)

  

          compose.log_info.assert_has_calls([

-             mock.call('[FAIL] Image-build (variant Client, arch *) failed, but going on anyway.'),

+             mock.call('[FAIL] Image build (variant Client, arch *, subvariant Client) failed, but going on anyway.'),

              mock.call('BOOM'),

          ])

  

@@ -598,7 +598,7 @@ 

              t.process((compose, cmd, compose.variants['Client'], 'amd64'), 1)

  

          compose.log_info.assert_has_calls([

-             mock.call('[FAIL] Creating live images (variant Client, arch amd64) failed, but going on anyway.'),

+             mock.call('[FAIL] Live (variant Client, arch amd64, subvariant Client) failed, but going on anyway.'),

              mock.call('LiveImage task failed: 123. See %s/logs/amd64/liveimage-None-None-xyz.amd64.log for more details.'

                        % self.topdir)

          ])
@@ -639,7 +639,7 @@ 

              t.process((compose, cmd, compose.variants['Client'], 'amd64'), 1)

  

          compose.log_info.assert_has_calls([

-             mock.call('[FAIL] Creating live images (variant Client, arch amd64) failed, but going on anyway.'),

+             mock.call('[FAIL] Live (variant Client, arch amd64, subvariant Client) failed, but going on anyway.'),

              mock.call('BOOM')

          ])

  

file modified
+2 -2
@@ -418,7 +418,7 @@ 

              t.process((compose, compose.variants['Server'], config), 1)

  

          compose.log_info.assert_has_calls([

-             mock.call('[FAIL] Live-media (variant Server, arch *) failed, but going on anyway.'),

+             mock.call('[FAIL] Live media (variant Server, arch *, subvariant KDE) failed, but going on anyway.'),

              mock.call('Live media task failed: 1234. See %s for more details.'

                        % (os.path.join(self.topdir, 'logs/amd64-x86_64/livemedia-Server-KDE.amd64-x86_64.log')))

          ])
@@ -460,7 +460,7 @@ 

              t.process((compose, compose.variants['Server'], config), 1)

  

          compose.log_info.assert_has_calls([

-             mock.call('[FAIL] Live-media (variant Server, arch *) failed, but going on anyway.'),

+             mock.call('[FAIL] Live media (variant Server, arch *, subvariant KDE) failed, but going on anyway.'),

              mock.call('BOOM')

          ])

  

no initial comment

The pungi.global.log should show subvariant for each failed deliverable (if available). When the compose finishes, there is a new log file in logs/global/deliverables.json containing details about all deliverables as triples of (variant, arch, subvariant).

  • required lists all deliverables that can not fail
  • attempted lists all failable deliverables that were started
  • failed is a subset of attempted and only contains deliverables that failed

If the compose fails, the lists may be incomplete.

Example output

Pull-Request has been merged by ausil

8 years ago

FWIW, my standard identifier for a single image within a compose is (subvariant, type, arch) - I'm fairly sure that combination is never duplicated for any image in a Fedora compose. Thinking about it variant+subvariant+arch probably works too, but it doesn't match any of the other stuff I've been doing. Ah, well. I'll take a closer look at this tomorrow and see if I see any major problems. Thanks!

I could change it to that format (I did not realize there are differences).

The main problem I see with the current logging and general setup of failable stuff is that it needs to work not just for images, but other deliverables: there is at least ostree repo where neither subvariant nor type make much sense.

One final issue is that in some parts we don't have the needed granularity: live media are a single task in koji for all architectures. If any of them fail, all of them fail. That is why the logs show arch as *. There is an open issue about this (see #128), but no clear solution exists yet.

I use that form kinda because it tends to be what a human would say - "the x86_64 Workstation live', say. Still, it's not perfect and I might have to change it some time.

How about simply providing all the available metadata for the failed deliverable? Then it's up to consumers to decide what they want to do with it, what identifier suits their purpose.