From 7e6bed9713b8a011dccb5bc164084b3547d796cd Mon Sep 17 00:00:00 2001 From: Haibo Lin Date: Jul 21 2020 09:15:25 +0000 Subject: Retry buildinstall tasks on losetup error JIRA: RHELCMP-1394 Signed-off-by: Haibo Lin --- diff --git a/pungi/phases/buildinstall.py b/pungi/phases/buildinstall.py index 63dd7e3..dfaac45 100644 --- a/pungi/phases/buildinstall.py +++ b/pungi/phases/buildinstall.py @@ -801,6 +801,10 @@ class BuildinstallThread(WorkerThread): weight=compose.conf["runroot_weights"].get("buildinstall"), ) else: + try: + lorax_log_dir = _get_log_dir(compose, variant, arch) + except Exception: + lorax_log_dir = None runroot.run( cmd, log_file=log_file, @@ -809,6 +813,7 @@ class BuildinstallThread(WorkerThread): mounts=[compose.topdir], weight=compose.conf["runroot_weights"].get("buildinstall"), chown_paths=chown_paths, + log_dir=lorax_log_dir, ) if final_output_dir != output_dir: diff --git a/pungi/runroot.py b/pungi/runroot.py index fb1a6df..322a1b8 100644 --- a/pungi/runroot.py +++ b/pungi/runroot.py @@ -74,12 +74,38 @@ class Runroot(kobo.log.LoggingBase): run(command, show_cmd=True, logfile=log_file) self._result = True + def _has_losetup_error(self, log_dir): + """ + Check if there's losetup error in log. + + This error happens if the Koji builder runs out of loopback devices. + This can happen if too many tasks that require them are scheduled on + the same builder. A retried task might end up on a different builder, + or maybe some other task will have finished already. + + :param str log_dir: path to buildinstall log dir, + e.g. logs/s390x/buildinstall-BaseOS-logs/ + """ + if not log_dir: + return False + + log_file = os.path.join(log_dir, "program.log") + try: + with open(log_file) as f: + for line in f: + if "losetup: cannot find an unused loop device" in line: + return True + except Exception: + pass + return False + def _run_koji(self, command, log_file=None, packages=None, arch=None, **kwargs): """ Runs the runroot command in Koji. """ runroot_channel = self.compose.conf.get("runroot_channel") runroot_tag = self.compose.conf["runroot_tag"] + log_dir = kwargs.pop("log_dir", None) koji_wrapper = kojiwrapper.KojiWrapper(self.compose.conf["koji_profile"]) koji_cmd = koji_wrapper.get_runroot_cmd( @@ -92,13 +118,19 @@ class Runroot(kobo.log.LoggingBase): **kwargs ) - output = koji_wrapper.run_runroot_cmd(koji_cmd, log_file=log_file) - if output["retcode"] != 0: - raise RuntimeError( - "Runroot task failed: %s. See %s for more details." - % (output["task_id"], log_file) - ) - self._result = output + attempt = 0 + max_retries = 3 + while True: + output = koji_wrapper.run_runroot_cmd(koji_cmd, log_file=log_file) + if output["retcode"] == 0: + self._result = output + return + elif attempt >= max_retries or not self._has_losetup_error(log_dir): + raise RuntimeError( + "Runroot task failed: %s. See %s for more details." + % (output["task_id"], log_file) + ) + attempt += 1 def _ssh_run(self, hostname, user, command, fmt_dict=None, log_file=None): """ diff --git a/tests/test_runroot.py b/tests/test_runroot.py index 2328904..95f4ca1 100644 --- a/tests/test_runroot.py +++ b/tests/test_runroot.py @@ -198,3 +198,37 @@ class TestRunrootOpenSSH(helpers.PungiTestCase): ), ] ) + + +class TestRunrootKoji(helpers.PungiTestCase): + def setUp(self): + super(TestRunrootKoji, self).setUp() + self.compose = helpers.DummyCompose( + self.topdir, {"runroot": True, "runroot_tag": "f28-build"}, + ) + + self.runroot = Runroot(self.compose) + + def test_has_losetup_error(self): + self.assertFalse(self.runroot._has_losetup_error(None)) + + with mock.patch("pungi.runroot.open", mock.mock_open(read_data="")): + self.assertFalse(self.runroot._has_losetup_error("/foo_log_dir")) + + with mock.patch( + "pungi.runroot.open", + mock.mock_open(read_data="losetup: cannot find an unused loop device"), + ): + self.assertTrue(self.runroot._has_losetup_error("/bar_log_dir")) + + @mock.patch("pungi.runroot.kojiwrapper.KojiWrapper") + def test_run_koji_retry(self, mock_kojiwrapper): + self.compose.conf["koji_profile"] = "test" + mock_kojiwrapper.return_value.get_runroot_cmd.return_value = ["df -h"] + mock_kojiwrapper.return_value.run_runroot_cmd.side_effect = [ + {"retcode": 1, "task_id": 1}, + {"retcode": 0, "task_id": 2}, + ] + self.runroot._has_losetup_error = mock.Mock(side_effect=[True, False]) + self.runroot._run_koji("") + self.assertEqual(mock_kojiwrapper.return_value.run_runroot_cmd.call_count, 2)