#474 Remove data from outdated chroots
Merged 5 years ago by msuchy. Opened 5 years ago by frostyx.
copr/ frostyx/copr delete-outdated-chroots  into  master

@@ -289,6 +289,22 @@ 

              except CreateRepoError:

                  self.log.exception("Error making local repo: %s", full_path)

  

+     def handle_delete_chroot(self):

+         self.log.info("Action delete project chroot.")

+ 

+         ext_data = json.loads(self.data["data"])

+         ownername = ext_data["ownername"]

+         projectname = ext_data["projectname"]

+         chrootname = ext_data["chrootname"]

+ 

+         chroot_path = os.path.join(self.destdir, ownername, projectname, chrootname)

+         self.log.info("Going to delete: %s", chroot_path)

+ 

+         if not os.path.isdir(chroot_path):

+             self.log.error("Directory %s not found", chroot_path)

+             return

+         shutil.rmtree(chroot_path)

+ 

      def handle_generate_gpg_key(self, result):

          ext_data = json.loads(self.data["data"])

          self.log.info("Action generate gpg key: %s", ext_data)
@@ -456,6 +472,8 @@ 

                  self.handle_delete_project(result)

              elif self.data["object_type"] == "build":

                  self.handle_delete_build()

+             elif self.data["object_type"] == "chroot":

+                 self.handle_delete_chroot()

  

              result.result = ActionResult.SUCCESS

  

@@ -0,0 +1,24 @@ 

+ """Add index for delete_after a delete_notify

+ 

+ Revision ID: 8bf844cc7135

+ Revises: 6fed8655d074

+ Create Date: 2019-01-04 10:35:26.263517

+ 

+ """

+ 

+ # revision identifiers, used by Alembic.

+ revision = '8bf844cc7135'

+ down_revision = '6fed8655d074'

In the current master, there is a broken migration path.
It isn't required, but PR#480 should be resolved first.

+ 

+ from alembic import op

+ import sqlalchemy as sa

+ 

+ 

+ def upgrade():

+     op.create_index(op.f('ix_copr_chroot_delete_after'), 'copr_chroot', ['delete_after'], unique=False)

+     op.create_index(op.f('ix_copr_chroot_delete_notify'), 'copr_chroot', ['delete_notify'], unique=False)

+ 

+ 

+ def downgrade():

+     op.drop_index(op.f('ix_copr_chroot_delete_notify'), table_name='copr_chroot')

+     op.drop_index(op.f('ix_copr_chroot_delete_after'), table_name='copr_chroot')

@@ -0,0 +1,34 @@ 

+ from flask_script import Command, Option

+ from coprs import db

+ from coprs.logic import coprs_logic, actions_logic

+ 

+ 

+ class DeleteOutdatedChrootsCommand(Command):

+     """

+     Delete data in all chroots that are considered as outdated. That means, the chroot is EOL

+     and the preservation period is over because admin of the project didn't extend its duration.

+     """

+     option_list = [

+         Option("--dry-run", action="store_true",

+                help="Do not actually remove any data, but rather print information on stdout"),

+     ]

+ 

+     def run(self, dry_run):

+         self.dry_run = dry_run

+ 

+         chroots = coprs_logic.CoprChrootsLogic \

+             .filter_outdated_to_be_deleted(coprs_logic.CoprChrootsLogic.get_multiple())

+         for i, chroot in enumerate(chroots, start=1):

+             # This command will possibly delete a lot of chroots and can be a performance issue when committing

+             # all at once. We are going to commit every x actions to avoid that.

+             if i % 1000 == 0 and not self.dry_run:

+                 db.session.commit()

+             self.delete(chroot)

+         db.session.commit()

+ 

+     def delete(self, chroot):

+         if self.dry_run:

+             print("Add delete_chroot action for {} in {}".format(chroot.name, chroot.copr.full_name))

+         else:

+             actions_logic.ActionsLogic.send_delete_chroot(chroot)

+             chroot.delete_after = None

@@ -261,3 +261,25 @@ 

              created_on=int(time.time()),

          )

          db.session.add(action)

+ 

+     @classmethod

+     def send_delete_chroot(cls, copr_chroot):

+         """

+         Schedules deletion of a chroot directory from project

+         Useful to remove outdated chroots

+         :type build: models.CoprChroot

+         """

+         data_dict = {

+             "ownername": copr_chroot.copr.owner_name,

+             "projectname": copr_chroot.copr.name,

+             "chrootname": copr_chroot.name,

+         }

+ 

+         action = models.Action(

+             action_type=ActionTypeEnum("delete"),

+             object_type="chroot",

+             object_id=None,

+             data=json.dumps(data_dict),

+             created_on=int(time.time())

+         )

+         db.session.add(action)

@@ -678,6 +678,10 @@ 

      def filter_outdated(cls, query):

          return query.filter(models.CoprChroot.delete_after >= datetime.datetime.now())

  

+     @classmethod

+     def filter_outdated_to_be_deleted(cls, query):

+         return query.filter(models.CoprChroot.delete_after < datetime.datetime.now())

Can you add an index to DB on the column delete_after?

+ 

  

  class MockChrootsLogic(object):

      @classmethod

@@ -1050,8 +1050,8 @@ 

  

      # Once mock_chroot gets EOL, copr_chroots are going to be deleted

      # if their owner doesn't extend their time span

-     delete_after = db.Column(db.DateTime)

-     delete_notify = db.Column(db.DateTime)

+     delete_after = db.Column(db.DateTime, index=True)

+     delete_notify = db.Column(db.DateTime, index=True)

  

      def update_comps(self, comps_xml):

          if isinstance(comps_xml, str):

@@ -36,6 +36,7 @@ 

      "update_graphs": "UpdateGraphsDataCommand",

      "vacuum_graphs": "RemoveGraphsDataCommand",

      "notify_outdated_chroots": "NotifyOutdatedChrootsCommand",

+     "delete_outdated_chroots": "DeleteOutdatedChrootsCommand",

  }

  

  

This is a continuation of a PR#451 which deals with deletion of outdated chroots.

It adds a manage.py command, that should be run via Cron. Try

copr-frontend delete_outdated_chroots --dry-run

to see what chroots should be deleted, and

copr-frontend delete_outdated_chroots

to actually delete them.

The backend action removes the whole

/var/lib/copr/public_html/results/<ownername>/<projectname>/<chrootname>

directory. If you prefer to rather remove everything from it and put some README into it, please let me know. This RFE wasn't strictly defined, so I implemented the most straightforward solution.

1 new commit added

  • [frontend] don't print chroots that are or should be already deleted
5 years ago

There is one issue with this PR that I am aware of. The DeleteOutdatedChrootsCommand creates delete_chroot action even for already deleted chroots. Which is not a problem regarding the functionality - backend finds out, that the directory doesn't exist and skips it. But it unneccesairly spams the action table and also makes the --dry-run a bit confusing.

There is an easy solution to this problem - do chroot.delete_after = None after creating the delete_chroot action. I am going implement it, unless someone has a better idea. I am openning it up for a discussion, because if we NULL the delete_after column, we might lose an interesting statistic of how long particular chroots lived after they were EOL.

This can be a performance issue. This can delete a lot of data and create a huge rollback journal. On the other hand commit in each loop can be a performance issue as well (it wait for sync()). I propose to do commit() every 1000th iteration.

Can you add an index to DB on the column delete_after?

Besides those two comments (see them inline in Files Changed) I see no problem.

rebased onto 828d45daa55cab4d39b4b05a1fc83fadea1dabe1

5 years ago

rebased onto 82ba17e3ef8e7c6c040d0ab55cdfecb4a2721cc8

5 years ago

rebased onto c0dbe3969eb3e397da2c7cf11589a92e1abf21d0

5 years ago

In the current master, there is a broken migration path.
It isn't required, but PR#480 should be resolved first.

This can be a performance issue ...
Can you add an index to DB on the column delete_after?

Fixed. Please take a look.

rebased onto 8dfdf99

5 years ago

4 new commits added

  • [frontend] when deleting an outdated chroot, NULL the remove_after field
  • [frontend] commit after every 1k actions to avoid performance issues
  • [frontend] add index for delete_after a delete_notify
  • [frontend][backend] remove data from outdated chroots
5 years ago

Pull-Request has been merged by msuchy

5 years ago