#545 make copr_prune_results.py skip outdated chroots
Merged 8 months ago by msuchy. Opened 9 months ago by dturecek.
copr/ dturecek/copr prune-repo  into  master

@@ -7,6 +7,8 @@ 

  import subprocess

  import pwd

  

+ import json

+ 

  log = logging.getLogger(__name__)

  

  from copr.exceptions import CoprException

@@ -16,6 +18,7 @@ 

  

  from backend.helpers import BackendConfigReader

  from backend.helpers import get_auto_createrepo_status,get_persistent_status,get_auto_prune_status

+ from backend.frontend import FrontendClient

  

  DEF_DAYS = 14

  

@@ -56,8 +59,13 @@ 

      def __init__(self, opts):

          self.opts = opts

          self.prune_days = getattr(self.opts, "prune_days", DEF_DAYS)

+         self.chroots = {}

+         self.frontend_client = FrontendClient(self.opts)

  

      def run(self):

+         response = self.frontend_client._post_to_frontend_repeatedly("", "chroots-prunerepo-status")

+         self.chroots = json.loads(response.content)

+ 

          results_dir = self.opts.destdir

          loginfo("Pruning results dir: {} ".format(results_dir))

          user_dir_names, user_dirs = list_subdir(results_dir)

@@ -73,6 +81,14 @@ 

                  self.prune_project(project_path, username, projectdir)

                  loginfo("--------------------------------------------")

  

+         loginfo("Setting final_prunerepo_done for deactivated chroots")

+         chroots_to_prune = []

+         for chroot, active in self.chroots.items():

+             if not active:

+                 chroots_to_prune.append(chroot)

+         self.frontend_client._post_to_frontend_repeatedly(chroots_to_prune, "final-prunerepo-done")

+ 

+         loginfo("--------------------------------------------")

          loginfo("Pruning finished")

  

      def prune_project(self, project_path, username, projectdir):

@@ -108,6 +124,10 @@ 

              if not os.path.isdir(chroot_path):

                  continue

  

+             if sub_dir_name not in self.chroots:

+                 loginfo("Final pruning already done for chroot {}/{}:{}".format(username, projectdir, sub_dir_name))

+                 continue

+ 

              try:

                  cmd = ['prunerepo', '--verbose', '--days={0}'.format(self.prune_days), '--cleancopr', chroot_path]

                  stdout = runcmd(cmd)

@@ -0,0 +1,22 @@ 

+ """add column final_prunerepo_done to mock_chroot table

+ 

+ Revision ID: b64659389c54

+ Revises: ca76b7902c2f

+ Create Date: 2019-02-28 11:57:48.674072

+ 

+ """

+ 

+ # revision identifiers, used by Alembic.

+ revision = 'b64659389c54'

+ down_revision = 'ca76b7902c2f'

+ 

+ from alembic import op

+ import sqlalchemy as sa

+ 

+ 

+ def upgrade():

+     op.add_column('mock_chroot', sa.Column('final_prunerepo_done', sa.Boolean, nullable=False, server_default='0'))

+ 

+ 

+ def downgrade():

+     op.drop_column('mock_chroot', 'final_prunerepo_done')

@@ -791,3 +791,23 @@ 

              split_name.append(None)

  

          return tuple(split_name)

+ 

+     @classmethod

+     def prunerepo_finished(cls, chroots_pruned):

+         for chroot_name in chroots_pruned:

+             chroot = cls.get_from_name(chroot_name).one()

+             if not chroot.is_active:

+                 chroot.final_prunerepo_done = True

+ 

+         db.session.commit()

+         return True

+ 

+     @classmethod

+     def chroots_prunerepo_status(cls):

+         query = models.MockChroot.query

+         chroots = {}

+         for chroot in query:

+             if chroot.is_active or not chroot.final_prunerepo_done:

+                 chroots[chroot.name] = chroot.is_active

+ 

+         return chroots

@@ -978,6 +978,10 @@ 

      distgit_branch = db.relationship("DistGitBranch",

              backref=db.backref("chroots"))

  

+     # After a mock_chroot is EOLed, this is set to true so that copr_prune_results

+     # will skip all projects using this chroot

+     final_prunerepo_done = db.Column(db.Boolean, default=False, server_default="0", nullable=False)

+ 

      @classmethod

      def latest_fedora_branched_chroot(cls, arch='x86_64'):

          return (cls.query

@@ -10,6 +10,8 @@ 

  from coprs.logic.builds_logic import BuildsLogic

  from coprs.logic.complex_logic import ComplexLogic

  from coprs.logic.packages_logic import PackagesLogic

+ from coprs.logic.coprs_logic import MockChrootsLogic

+ from coprs.exceptions import MalformedArgumentException

  

  from coprs.views import misc

  from coprs.views.backend_ns import backend_ns

@@ -342,3 +344,13 @@ 

              response["msg"] = "build chroot is not in running states, ignoring"

  

      return flask.jsonify(response)

+ 

+ @backend_ns.route("/chroots-prunerepo-status/", methods=["POST", "PUT"])

+ def chroots_prunerepo_status():

+     return flask.jsonify(MockChrootsLogic.chroots_prunerepo_status())

+ 

+ @backend_ns.route("/final-prunerepo-done/", methods=["POST", "PUT"])

+ @misc.backend_authenticated

+ def final_prunerepo_done():

+     chroots_pruned = flask.request.get_json()

+     return flask.jsonify(MockChrootsLogic.prunerepo_finished(chroots_pruned)) 

\ No newline at end of file

I've added a column to the copr_chroot table to record when a chroot has been pruned so that the copr_prune_results.py script will skip it.

could we have something like final_prunerepo_done = True?

I'm afraid that this will create non-trivial amount of traffic from backend to frontend; because for each copr project (regardless of the is_pruned flag) there will be at least one api call to frontend for copr_chroot ... is this OK?

If I would do touch final_prunerepo_done for each EOLed chroot and just check if the file exists as was suggested at first, then I would only need to query frontend for mock_chroot.is_active. What do you think about this?

Right, I'd probably vote for touch here, I think. I sort of dislike the fact that it would still have to read the obsoleted directories, even though those are long time untouched (e.g. you have to still read the fedora-22-x86_64 directory, and check for final_prunerepo_done file). But this could be optimized based on modify times in future...

rebased onto c95bed11d486dab3a7731aba0be498cc9511cb18

8 months ago

I've moved the final_prunerepo_done column to the mock_chroot table and changed the script so that it sets the value to True at the very end.

I've tested the changes on dev and it seems it is working.

This is now fairly optimal (one API call per mock-chroot only, thanks!).

What happens if there are e.g. two sub-dirs named fedora-rawhide-x86_64 and foo-test-directory (the later is not really related to any chroot and not tracked by frontend)?

What would happen if we returned a list of chroots to be prunned (== chroots which don't have final-prunerepo-done flag)? In that case the code in prunerepo would be much simpler, and we could live with single query per prunerepo run.

If a sub-dir doesn't correspond to any chroot, then frontend returns final-prunerepo-done = None, and the script always runs prunerepo on the subdir and skips it when querying frontend to set it to true in the db.

What would happen if we returned a list of chroots to be prunned

It didn't occur to me to do it this way, I will change it, thanks for the suggestion.

1 new commit added

  • [frontend][backend] send prunerepo status for all chroots at once
8 months ago

So I've changed it as per your suggestion. There are only two API calls now -- one at the start to get the status for all chroots, and one at the end to change the final_prunerepo_done flag.

However, in the first call, frontend returns status for all the chroots (not only for the ones that should be pruned). Otherwise, the script would skip all sub-directories that don't correspond to any chroots.

Otherwise, the script would skip all sub-directories that don't correspond to any chroots.

Isn't this what we actually want? I see it now. good. Please ping me on irc on this, I need to be sure (I can't find you on irc now). IMO we should return only active || NOT final_prunerepo_done otherwise the list will be getting larger with time.

The empty argument looks weird, and using _post_to_frontend_repeatedly for a decent sql query too. Seems like we should have a better way to ask frontend, but it doesn't mean it must happen here ..

It would be IMO safer if we specified the list of actually processed chroots here, though.

Otherwise it looks really promising now, thanks for the work @dturecek! I'd rather ask @msuchy because we lost the "granularity" we talked about before, but except for the details +1 from me.

This should IMO be False. Running prunerepo here doesn't make much sense to me, if that's entirely random directory name.

2 new commits added

  • [frontend] fix down_revision in alembic
  • [frontend][backend] make frontend send only chroots that are to be pruned
8 months ago

I've addressed all your points (except using _post_to_frontend_repeatedly) and fixed the alembic revision to point to the latest one currently in the master branch.

nit, but chroots_pruned would be slightly better

Is this working without db.session.add()? Commit() could be theoretically called once.

Otherwise I like this PR, thank you! I'd like to test the final (preferably squashed) version.

2 new commits added

  • [frontend][backend] make copr_prune_results skip already pruned outdated chroots
  • [frontend] add column final_prunerepo_done to mock_chroot
8 months ago

Is this working without db.session.add()?

It does, as it doesn't add new rows to the table.

I moved commit() to the end, so it's called only once, renamed the method to chroots_pruned and rebased the commits.

Probably final note, what about if chroot.is_active: chroot.final_prunerepo_done = True, just in case we did some mistake, and marked some "active" chroot as pruned?

I like this, thanks a lot.

Btw., rebase against origin/master would help with testing (I do git checkout dturecek/prune-repo where alembic-3 upgrade head doesn't work, even though the upgrade hashes are correct, because in your branch is one migration missing).

rebased onto fb37d25ef1914aa85e19f95754bcea8d3e922a12

8 months ago

2 new commits added

  • [frontend][backend] make copr_prune_results skip already pruned outdated chroots
  • [frontend] add column final_prunerepo_done to mock_chroot
8 months ago

I've just added the check for chroot.is_active in the last rebase.

Thanks, is the two-commit approach desired? I'd squash them personaly. Otherwise lgtm, +1.

Well, I guess I don't have any real reason to split this into two commits, I'll squash them then.

rebased onto 58b3785d1a36bbb2d9715c6df94ae77df6a14ce0

8 months ago

rebased onto 81583c56ae195df6d15d1883da6f3e28f56eb322

8 months ago

I renamed chroots_pruned(cls, chroots_to_prune) to prunerepo_finished(cls, chroots_pruned) as it makes more sense this way.

rebased onto 09bbcd9

8 months ago

Pull-Request has been merged by msuchy

8 months ago