#1824 additional options to clean database
Merged 2 years ago by tkopecek. Opened 2 years ago by tkopecek.
tkopecek/koji issue1707  into  master

file modified
+134 -12
@@ -3,46 +3,163 @@ 

  import os

  from optparse import OptionParser

  

+ from six.moves import xmlrpc_client

  from six.moves.configparser import RawConfigParser

  

  import koji.db

  

  

- def clean_sessions(cursor, vacuum):

-     q = " FROM sessions WHERE update_time < now() - '1 day'::interval"

+ def clean_sessions(cursor, vacuum, test, age):

+     q = " FROM sessions WHERE update_time < NOW() - '%s days'::interval" % int(age)

      if options.verbose:

          cursor.execute("SELECT COUNT(*) " + q)

          rows = cursor.fetchall()[0][0]

          print("Deleting %d sessions" % rows)

  

-     cursor.execute("DELETE " + q)

-     if vacuum:

-         cursor.execute("VACUUM ANALYZE sessions")

+     if not test:

+         cursor.execute("DELETE " + q)

+         if vacuum:

+             cursor.execute("VACUUM ANALYZE sessions")

  

  

- def clean_reservations(cursor, vacuum):

-     q = " FROM build_reservations WHERE created < now() - '1 day'::interval"

+ def clean_reservations(cursor, vacuum, test, age):

+     q = " FROM build_reservations WHERE created < NOW() - '%s days'::interval" % int(age)

      if options.verbose:

          cursor.execute("SELECT COUNT(*) " + q)

          rows = cursor.fetchall()[0][0]

          print("Deleting %d build reservations" % rows)

  

-     cursor.execute("DELETE " + q)

+     if not test:

+         cursor.execute("DELETE " + q)

+         if vacuum:

+             cursor.execute("VACUUM ANALYZE build_reservations")

+ 

+ 

+ def clean_notification_tasks(cursor, vacuum, test, age):

+     q = " FROM task WHERE method = 'build' AND completion_time < NOW() - '%s days'::interval" % int(age)

+     if options.verbose:

+         cursor.execute("SELECT COUNT(*) " + q)

+         rows = cursor.fetchall()[0][0]

+         print("Deleting %d tagNotification tasks" % rows)

+ 

+     if not test:

+         # cascade

+         cursor.execute("DELETE " + q)

+         if vacuum:

+             cursor.execute("VACUUM ANALYZE task")

+ 

+ 

+ def clean_scratch_tasks(cursor, vacuum, test, age):

+     q = """ FROM task

+               WHERE method = 'build' AND

+               completion_time < NOW()  - '%s days'::interval AND

+               request LIKE '%%%%<name>scratch</name>%%%%'""" % int(age)

+     if options.verbose:

+         cursor.execute("SELECT COUNT(*) " + q)

+         rows = cursor.fetchall()[0][0]

+         print("Deleting %d scratch build tasks" % rows)

+ 

+     if test:

+         return

+ 

+     # we should check, that it is really a scratch build

+     ids = []

+     # will be dropped automatically in the end of script/connection

+     cursor.execute("CREATE TEMPORARY TABLE temp_scratch_tasks (task_id INTEGER NOT NULL)")

+     cursor.execute("SELECT id, request " + q)

+     for row in cursor.fetchall():

+         task_id, request = row

+         try:

+             params, method = xmlrpc_client.loads(request)

+             opts = params[2]

+             if opts['scratch']:

+                 cursor.execute("INSERT INTO temp_scratch_tasks VALUES (%s)", (task_id,))

+                 ids.append(task_id)

+         except:

+             continue

+ 

+     parents = ids

+     while True:

+         if not parents:

+             break

+         children = []

+         cursor.execute("SELECT id FROM task WHERE parent IN %s", (parents,))

+         for row in cursor.fetchall():

+             children.append(row[0])

+         parents = children

+         if children:

+             values = ', '.join(["(%d)" % task_id for task_id in children])

+             cursor.execute("INSERT INTO temp_scratch_tasks VALUES %s" % values)

+ 

+     if not ids:

+         return

+ 

+     # delete standard buildroots

+     cursor.execute("DELETE FROM standard_buildroot WHERE task_id IN (SELECT task_id FROM temp_scratch_tasks)")

+ 

+     # delete tasks finally

+     cursor.execute("DELETE FROM task WHERE id IN (SELECT task_id FROM temp_scratch_tasks)")

+ 

      if vacuum:

-         cursor.execute("VACUUM ANALYZE build_reservations")

+         cursor.execute("VACUUM ANALYZE task")

+ 

+ 

+ def clean_buildroots(cursor, vacuum, test):

+     q = " FROM buildroot WHERE cg_id IS NULL AND id NOT IN (SELECT buildroot_id FROM standard_buildroot)"

+ 

+     if options.verbose:

+         cursor.execute("SELECT COUNT(*) " + q)

+         rows = cursor.fetchall()[0][0]

+         print("Deleting %d buildroots" % rows)

+ 

+     if not test:

+         cursor.execute("DELETE FROM buildroot_listing WHERE buildroot_id IN (SELECT id %s)" % q)

+         cursor.execute("DELETE " + q)

+         if vacuum:

+             cursor.execute("VACUUM ANALYZE build_reservations")

  

  

  if __name__ == "__main__":

      global options

      parser = OptionParser("%prog cleans koji database")

      parser.add_option('-v', '--verbose', action="store_true", help="Be verbose")

+     parser.add_option('-t', '--test', action="store_true",

+                       help="Don't delete anything, print estimations (implies -v)")

      parser.add_option('-c', '--conf', default='/etc/koji-hub/hub.conf',

                        action='store', help="Path to koji's hub.conf")

      parser.add_option('--no-vacuum', action="store_false", dest="vacuum",

                        default=True,

                        help="Don't run vacuum on affected tables")

+     parser.add_option('--sessions-age', type=int,

+                       action="store", default=1, metavar="DAYS",

+                       help="Delete sessions older than this (default: 1 day)")

+     parser.add_option('--reservations-age', type=int,

+                       action="store", default=1, metavar="DAYS",

+                       help="Delete CG reservations older than this (default: 1 day)")

+     parser.add_option('--tag-notifications', action="store_true",

+                       help="Delete tagNotification tasks", default=False)

+     parser.add_option('--tag-notifications-age', type=int,

+                       action="store", default=730, metavar="DAYS",

+                       help="Delete tagNotification tasks older than this (default: 2 years)")

+     parser.add_option("--scratch-builds", action="store_true",

+                       dest="scratch", default=False,

+                       help="Delete scratch build tasks")

+     parser.add_option('--scratch-builds-age', type=int, dest="scratch_age",

+                       action="store", default=730, metavar="DAYS",

+                       help="Delete scratch builds' tasks older than this (default: 2 years")

+     parser.add_option('--buildroots', action="store_true",

+                       help="Delete unreferenced buildroots")

+     parser.add_option('-f', '--force', action="store_true",

+                       help="This is needs for all options affecting auditable data")

      options, args = parser.parse_args()

  

+     if options.test:

+         options.verbose = True

+         print("Running in test mode, no changes will be made")

+ 

+     if not options.force and (options.tag_notifications or options.scratch or options.buildroots):

+         parser.error("You need to pass --force to trigger these actions")

+ 

      if not os.path.exists(options.conf):

          parser.error("Config file doesn't exist")

  
@@ -85,6 +202,11 @@ 

      conn.set_session(autocommit=True)

      cursor = conn.cursor()

  

-     clean_sessions(cursor, options.vacuum)

-     clean_reservations(cursor, options.vacuum)

-     conn.commit()

+     clean_sessions(cursor, options.vacuum, options.test, options.sessions_age)

+     clean_reservations(cursor, options.vacuum, options.test, options.reservations_age)

+     if options.tag_notifications:

+         clean_notification_tasks(cursor, options.vacuum, options.test, age=options.tag_notifications_age)

+     if options.scratch:

+         clean_scratch_tasks(cursor, options.vacuum, options.test, age=options.scratch_age)

+     if options.buildroots:

+         clean_buildroots(cursor, options.vacuum, options.test)

As mentioned in the issue, would partitioning this old data to some archive tables make more sense than outright deletion?

Out of what's there, deleting old sessions data makes the most sense to me.

Yes, partitioning is ok for most usecases. I wanted to provide additional options, how to deal with old data. Especially in some test CI envs, where is no need for audit, deleting old data could prove useful. I've tried to be more verbose about potentially wrong tables with adding --force, but maybe we shouldn't provide this at all?

it looks possible to make the sql too long at the first run. maybe a temp table make more sense?

Metadata Update from @tkopecek:
- Pull-request tagged with: testing-ready

2 years ago

rebased onto 39a47d63c2dbb07d99b766d97e9a58630b2157f5

2 years ago

rebased onto a0238cca51e98fb1666d649688f1df0e6178617c

2 years ago

rebased onto 4af1c19101c0ada8cc56a7f30d8f9a547c143e2f

2 years ago

Metadata Update from @jcupova:
- Pull-request tagged with: testing-done

2 years ago

rebased onto b09fd01

2 years ago

Commit 72044c1 fixes this pull-request

Pull-Request has been merged by tkopecek

2 years ago