From 5e9b556183aeb2bf809a7f5300f560fd87e143be Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Dec 17 2017 21:02:14 +0000 Subject: Paralellize This brings down the wall-clock time down approximately proportionally to the number of CPUs (or possibly even more, e.g. here 16 min 44 went down to less then 1 min with 12 CPUs). I added a loop on failure. Unfortunately sometimes the server returns an invalid answer. That also happens when running serially, but it seems to happen more often in parallelized mode. --- diff --git a/find-package-maintainers b/find-package-maintainers index 00820b4..d16393e 100755 --- a/find-package-maintainers +++ b/find-package-maintainers @@ -3,6 +3,7 @@ import argparse import requests import sys +import multiprocessing VERBOSE = False @@ -37,7 +38,6 @@ def get_maintainers(pkg): vprint('Fetched maintainers of {}: {}'.format(pkg, ', '.join(maintainers))) return maintainers - def options_parse(): p = argparse.ArgumentParser( description='Find maintainers given package names') @@ -62,13 +62,14 @@ def main(): by_package = {} opts = options_parse() - for package in opts.infile: - package = package.strip() - try: - by_package[package] = get_maintainers(package) - except PkgdbError as e: - print('ERR: {}'.format(e), file=sys.stderr) - continue + packages = [line.strip() for line in opts.infile] + with multiprocessing.Pool() as pool: + mapped = pool.map(get_maintainers, packages) + for package, mapping in zip(packages, mapped): + if isinstance(mapping, Exception): + print('ERR: {}: {}'.format(package, mapping), file=sys.stderr) + else: + by_package[package] = mapping if not by_package: print('No valid packages given.')