From 7c1aa443a0bc3ed6162b48e7805ae5d17be3cb14 Mon Sep 17 00:00:00 2001 From: Adam Williamson Date: Nov 18 2016 20:48:42 +0000 Subject: Add option to generate a short file list for fedfind See https://pagure.io/fedora-qa/fedfind/issue/2 . At present, fedfind parses rsync output to discover images for composes for which we have no reliable metadata (any compose older than Fedora 24, and any Fedora 24+ compose that's synced to the mirrors and split into two or more parts and has its metadata removed). This is bad for all the reasons quick-fedora-mirror cites - it's very heavy on both the server and the client. stickster suggested having an option for fedfind to parse the `fullfilelist` that `create-filelist` produces, for things that would use fedfind frequently and have fast connections to the server (so they wouldn't care about the size). But because all fedfind wants is images, we can actually do better, by creating a smaller file list which filters out packages (.rpm, .drpm), ARM Device Tree boot files (.dtb), and HTML files (.html). This should never filter out anything fedfind cares about, but makes far smaller lists, small enough that we could just have fedfind always use these lists and remove the rsync scraping code. --- diff --git a/create-filelist b/create-filelist index eeba9d0..8fc3367 100755 --- a/create-filelist +++ b/create-filelist @@ -57,7 +57,9 @@ def recursedir(path='.', skip=[], alwaysskip=['.~tmp~']): def parseopts(): null = open(os.devnull, 'w') p = argparse.ArgumentParser( - description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror.') + description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror, ' + 'and a much smaller list with packages, Device Tree boot files, HTML files and ' + 'directories filtered out, for consumption by fedfind.') p.add_argument('-c', '--checksum', action='store_true', help='Include checksums of all repomd.xml files in the file list.') p.add_argument('-C', '--checksum-file', action='append', dest='checksum_files', @@ -73,6 +75,8 @@ def parseopts(): help='Filename of the file list with times (default: stdout).') p.add_argument('-f', '--filelist', type=argparse.FileType('w'), default=null, help='Filename of the file list without times (default: no plain file list is generated).') + p.add_argument('-F', '--filterlist', type=argparse.FileType('w'), default=null, + help='Filename of the filtered file list for fedfind (default: not generated).') opts = p.parse_args() @@ -107,6 +111,10 @@ def main(): for entry in recursedir(skip=opts.skip_files): # opts.filelist.write(entry.path + '\n') print(entry.path, file=opts.filelist) + # write to filtered list if appropriate + skips = ('.rpm', '.drpm', '.dtb', '.html') + if not any(entry.path.endswith(skip) for skip in skips) and not (entry.is_dir()): + print(entry.path, file=opts.filterlist) if entry.name in opts.checksum_files: checksums[entry.path[2:]] = True info = entry.stat(follow_symlinks=False)