#1661 Fixing copr_prune_repo.py
Merged 3 years ago by msuchy. Opened 3 years ago by praiskup.
Unknown source prunerepo-fixes  into  master

@@ -28,7 +28,7 @@


          self.components = ["spawner", "terminator", "vmm", "build_dispatcher",

                             "action_dispatcher", "backend", "actions", "worker",

-                            "modifyrepo"]

+                            "modifyrepo", "pruner"]


      def setup_logging(self):


@@ -8,7 +8,6 @@

  import pwd

  import time

  import argparse

- import signal


  import json

  import multiprocessing
@@ -16,12 +15,13 @@

  from copr.exceptions import CoprException

  from copr.exceptions import CoprRequestException


- from copr_backend.helpers import BackendConfigReader

+ from copr_backend.helpers import BackendConfigReader, get_redis_logger

  from copr_backend.helpers import uses_devel_repo, get_persistent_status, get_auto_prune_status

  from copr_backend.frontend import FrontendClient

  from copr_backend.createrepo import createrepo


- log = multiprocessing.get_logger()

+ LOG = multiprocessing.log_to_stderr()

+ LOG.setLevel(logging.INFO)


  DEF_DAYS = 14

@@ -39,118 +39,106 @@

      dir_names = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]

      return dir_names, map(lambda x: os.path.join(path, x), dir_names)


- def logdebug(msg):

-     print(msg)

-     log.debug(msg)


- def loginfo(msg):

-     print(msg)

-     log.info(msg)


- def logerror(msg):

-     print(msg, file=sys.stderr)

-     log.error(msg)


- def logexception(msg):

-     print(msg, file=sys.stderr)

-     log.exception(msg)


  def runcmd(cmd):


      Run given command in a subprocess


-     loginfo('Executing: '+' '.join(cmd))

+     LOG.info('Executing: %s', cmd)

      process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8")

      (stdout, stderr) = process.communicate()

      if process.returncode != 0:

-         logerror(stderr)

+         LOG.error(stderr)

          raise Exception("Got non-zero return code ({0}) from prunerepo with stderr: {1}".format(process.returncode, stderr))

      return stdout


- def run_prunerepo(cmd, chroot_path, username, projectname, projectdir, sub_dir_name, prune_days):

-     # ignore the SIGINT otherwise prunerepo will print to broken pipe when Ctrl+C

-     signal.signal(signal.SIGINT, signal.SIG_IGN)

+ def run_prunerepo(chroot_path, username, projectname, projectdir, sub_dir_name, prune_days):

+     """

+     Running prunerepo in background worker.  We don't check the return value, so

+     the best we can do is that we return useful success/error message that will

+     be logged by parent process.

+     """


-         result = runcmd(cmd)

+         LOG.info("Pruning of %s/%s/%s started", username, projectdir, sub_dir_name)

+         cmd = ['prunerepo', '--verbose', '--days', str(prune_days), '--nocreaterepo', chroot_path]

+         stdout = runcmd(cmd)

+         LOG.info("Prunerepo stdout:\n%s", stdout)

          createrepo(path=chroot_path, username=username,


          clean_copr(chroot_path, prune_days, verbose=True)

-         return result

-     except Exception as err:

-         logexception(err)

-         logerror("Error pruning chroot {}/{}:{}".format(username, projectdir, sub_dir_name))

+     except Exception as err:  # pylint: disable=broad-except

+         LOG.exception(err)

+         LOG.error("Error pruning chroot %s/%s/%s", username, projectdir,

+                   sub_dir_name)


+     LOG.info("Pruning finished for projectdir %s/%s/%s",

+              username, projectdir, sub_dir_name)


  class Pruner(object):

      def __init__(self, opts, cmdline_opts=None):

          self.opts = opts

          self.prune_days = getattr(self.opts, "prune_days", DEF_DAYS)

          self.chroots = {}

-         self.frontend_client = FrontendClient(self.opts)

+         self.frontend_client = FrontendClient(self.opts, try_indefinitely=True,

+                                               logger=LOG)

          self.mtime_optimization = True

          self.max_processes = getattr(self.opts, "max_prune_processes", MAX_PROCESS)

          self.pool = multiprocessing.Pool(processes=self.max_processes)

          if cmdline_opts:

              self.mtime_optimization = not cmdline_opts.no_mtime_optimization


-     def __del__(self):

-         # see the warning at

-         # https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool

-         self.pool.close()

-         self.pool.join()


      def run(self):

          response = self.frontend_client.get("chroots-prunerepo-status")

          self.chroots = json.loads(response.content)


          results_dir = self.opts.destdir

-         loginfo("Pruning results dir: {} ".format(results_dir))

+         LOG.info("Pruning results dir: %s", results_dir)

          user_dir_names, user_dirs = list_subdir(results_dir)


-         loginfo("Going to process total number: {} of user's directories".format(len(user_dir_names)))

-         loginfo("Going to process user's directories: {}".format(user_dir_names))

+         LOG.info("Going to process total number: %s of user's directories", len(user_dir_names))

+         LOG.info("Going to process user's directories: %s", user_dir_names)


-         loginfo("--------------------------------------------")

+         LOG.info("--------------------------------------------")

          for username, subpath in zip(user_dir_names, user_dirs):

-             loginfo("For user `{}` exploring path: {}".format(username, subpath))

+             LOG.info("For user '%s' exploring path: %s", username, subpath)

              for projectdir, project_path in zip(*list_subdir(subpath)):

-                 loginfo("Exploring projectdir `{}` with path: {}".format(projectdir, project_path))

+                 LOG.info("Exploring projectdir '%s' with path: %s", projectdir, project_path)

                  self.prune_project(project_path, username, projectdir)

-                 loginfo("--------------------------------------------")

+                 LOG.info("--------------------------------------------")


-         loginfo("Setting final_prunerepo_done for deactivated chroots")

+         LOG.info("Setting final_prunerepo_done for deactivated chroots")

          chroots_to_prune = []

          for chroot, active in self.chroots.items():

              if not active:


-         self.frontend_client.post(chroots_to_prune, "final-prunerepo-done")

+         self.frontend_client.post("final-prunerepo-done", chroots_to_prune)




-         loginfo("--------------------------------------------")

-         loginfo("Pruning finished")

+         LOG.info("--------------------------------------------")

+         LOG.info("Pruning finished")


      def prune_project(self, project_path, username, projectdir):

-         loginfo("Going to prune {}/{}".format(username, projectdir))

+         LOG.info("Going to prune %s/%s", username, projectdir)


          projectname = projectdir.split(':', 1)[0]

-         loginfo("projectname = {}".format(projectname))

+         LOG.info("projectname = %s", projectname)



              if uses_devel_repo(self.opts.frontend_base_url, username, projectname):

-                 loginfo("Skipped {}/{} since auto createrepo option is disabled"

-                           .format(username, projectdir))

+                 LOG.info("Skipped %s/%s since auto createrepo option is disabled",

+                          username, projectdir)


              if get_persistent_status(self.opts.frontend_base_url, username, projectname):

-                 loginfo("Skipped {}/{} since the project is persistent"

-                           .format(username, projectdir))

+                 LOG.info("Skipped %s/%s since the project is persistent",

+                          username, projectdir)


              if not get_auto_prune_status(self.opts.frontend_base_url, username, projectname):

-                 loginfo("Skipped {}/{} since auto-prunning is disabled for the project"

-                           .format(username, projectdir))

+                 LOG.info("Skipped %s/%s since auto-prunning is disabled for the project",

+                          username, projectdir)


          except (CoprException, CoprRequestException) as exception:

-             logerror("Failed to get project details for {}/{} with error: {}".format(

-                 username, projectdir, exception))

+             LOG.error("Failed to get project details for %s/%s with error: %s",

+                       username, projectdir, exception)



          for sub_dir_name in os.listdir(project_path):
@@ -163,7 +151,8 @@



              if sub_dir_name not in self.chroots:

-                 loginfo("Final pruning already done for chroot {}/{}:{}".format(username, projectdir, sub_dir_name))

+                 LOG.info("Final pruning already done for chroot %s/%s:%s",

+                          username, projectdir, sub_dir_name)



              if self.mtime_optimization:
@@ -179,25 +168,20 @@

                  # error, I/O problems...) we rather wait 10 more days till we

                  # really start to ignore the directory.

                  if touched_before > int(self.prune_days) + 10:

-                     loginfo("Skipping {} - not changed for {} days".format(

-                         sub_dir_name, touched_before))

+                     LOG.info("Skipping %s - not changed for %s days",

+                              sub_dir_name, touched_before)


-             cmd = ['prunerepo', '--verbose', '--days', str(self.prune_days), '--nocreaterepo', chroot_path]

-             self.pool.apply_async(run_prunerepo,

-                                   (cmd, chroot_path, username, projectname,

-                                    projectdir, sub_dir_name, self.prune_days),

-                                   callback=loginfo, error_callback=logerror)


-             # this does not make sense unless max_prune_processes is set to 1

-             #loginfo("Pruning done for chroot {}/{}:{}".format(username, projectdir, sub_dir_name))

-         #loginfo("Pruning finished for projectdir {}/{}".format(username, projectdir))

+             self.pool.apply_async(run_prunerepo,

+                                   (chroot_path, username, projectname,

+                                    projectdir, sub_dir_name, self.prune_days))



  def clean_copr(path, days=DEF_DAYS, verbose=True):


      Remove whole copr build dirs if they no longer contain a RPM file


-     loginfo("Cleaning COPR repository...")

+     LOG.info("Cleaning COPR repository...")

      for dir_name in os.listdir(path):

          dir_path = os.path.abspath(os.path.join(path, dir_name))

@@ -211,7 +195,7 @@



          if verbose:

-             loginfo('Removing: ' + dir_path)

+             LOG.info('Removing: %s', dir_path)



          # also remove the associated log in the main dir
@@ -226,7 +210,7 @@

      Remove file given its absolute path


      if verbose:

-         loginfo("Removing: "+path)

+         LOG.info("Removing: %s", path)

      if os.path.exists(path) and os.path.isfile(path):


@@ -237,22 +221,28 @@

      return any([f for f in files if f.endswith(".rpm") and not f.endswith(srpm_ex)])



+ def redirect_logging(opts):

+     """

+     Redirect all logging to RedisLogHandler using BackendConfigReader options

+     """

+     global LOG  # pylint: disable=global-statement

+     LOG = get_redis_logger(opts, "copr_prune_results", "pruner")



  def main():

      args = parser.parse_args()

      config_file = os.environ.get("BACKEND_CONFIG", "/etc/copr/copr-be.conf")

-     pruner = Pruner(BackendConfigReader(config_file).read(), args)

+     opts = BackendConfigReader(config_file).read()

+     redirect_logging(opts)

+     pruner = Pruner(opts, args)



      except Exception as e:

-         logexception(e)

+         LOG.exception(e)


  if __name__ == "__main__":

      if pwd.getpwuid(os.getuid())[0] != "copr":

          print("This script should be executed under the `copr` user")



-         logging.basicConfig(

-             filename="/var/log/copr-backend/copr_prune_results.log",

-             format='[%(asctime)s][%(levelname)6s]: %(message)s',

-             level=logging.INFO)


no initial comment

6 new commits added

  • backend: pruner: correctly deliver final prunerepo stamp
  • backend: pruner: logging through RedisLogHandler
  • backend: pruner: fix-up logging
  • backend: pruner: simplify the pool logic
  • backend: pruner: enable logging in FrontendClient
  • backend: pruner: re-try Frontend communication
3 years ago

rebased onto de8487d

3 years ago

Pull-Request has been merged by msuchy

3 years ago