Adam Miller e71f802
#!/usr/bin/python
Adam Miller e71f802
# -*- coding: utf-8 -*-
Adam Miller e71f802
Adam Miller e71f802
Adam Miller e71f802
import sys
Adam Miller e71f802
import fnmatch
Adam Miller e71f802
import optparse
Adam Miller e71f802
import lxml.etree
Adam Miller e71f802
import re
Adam Miller e71f802
from io import StringIO
Adam Miller e71f802
Adam Miller e71f802
Adam Miller e71f802
class CompsFilter(object):
Adam Miller e71f802
    def __init__(self, file_obj, reindent=False):
Adam Miller e71f802
        self.reindent = reindent
Adam Miller e71f802
        parser = None
Adam Miller e71f802
        if self.reindent:
Adam Miller e71f802
            parser = lxml.etree.XMLParser(remove_blank_text=True)
Adam Miller e71f802
        self.tree = lxml.etree.parse(file_obj, parser=parser)
Adam Miller e71f802
        self.encoding = "utf-8"
Adam Miller e71f802
Adam Miller e71f802
    def _filter_elements_by_arch(self, xpath, arch, only_arch=False):
Adam Miller e71f802
        if only_arch:
Adam Miller e71f802
            # remove all elements without the 'arch' attribute
Adam Miller e71f802
            for i in self.tree.xpath(xpath + "[not(@arch)]"):
Adam Miller e71f802
                i.getparent().remove(i)
Adam Miller e71f802
Adam Miller e71f802
        for i in self.tree.xpath(xpath + "[@arch]"):
Adam Miller e71f802
            arches = i.attrib.get("arch")
Adam Miller e71f802
            arches = re.split(r"[, ]+", arches)
Adam Miller e71f802
            arches = [j for j in arches if j]
Adam Miller e71f802
            if arch not in arches:
Adam Miller e71f802
                # remove elements not matching the arch
Adam Miller e71f802
                i.getparent().remove(i)
Adam Miller e71f802
            else:
Adam Miller e71f802
                # remove the 'arch' attribute
Adam Miller e71f802
                del i.attrib["arch"]
Adam Miller e71f802
Adam Miller e71f802
    def filter_packages(self, arch, only_arch=False):
Adam Miller e71f802
        """
Adam Miller e71f802
        Filter packages according to arch.
Adam Miller e71f802
        If only_arch is set, then only packages for the specified arch are preserved.
Adam Miller e71f802
        Multiple arches separated by comma can be specified in the XML.
Adam Miller e71f802
        """
Adam Miller e71f802
        self._filter_elements_by_arch("/comps/group/packagelist/packagereq", arch, only_arch)
Adam Miller e71f802
Adam Miller e71f802
    def filter_groups(self, arch, only_arch=False):
Adam Miller e71f802
        """
Adam Miller e71f802
        Filter groups according to arch.
Adam Miller e71f802
        If only_arch is set, then only groups for the specified arch are preserved.
Adam Miller e71f802
        Multiple arches separated by comma can be specified in the XML.
Adam Miller e71f802
        """
Adam Miller e71f802
        self._filter_elements_by_arch("/comps/group", arch, only_arch)
Adam Miller e71f802
Adam Miller e71f802
    def filter_category_groups(self):
Adam Miller e71f802
        """
Adam Miller e71f802
        Remove undefined groups from categories.
Adam Miller e71f802
        """
Adam Miller e71f802
        all_groups = self.tree.xpath("/comps/group/id/text()")
Adam Miller e71f802
        for category in self.tree.xpath("/comps/category"):
Adam Miller e71f802
            for group in category.xpath("grouplist/groupid"):
Adam Miller e71f802
                if group.text not in all_groups:
Adam Miller e71f802
                    group.getparent().remove(group)
Adam Miller e71f802
Adam Miller e71f802
    def remove_empty_groups(self, keep_empty=None):
Adam Miller e71f802
        """
Adam Miller e71f802
        Remove all groups without packages.
Adam Miller e71f802
        """
Adam Miller e71f802
        keep_empty = keep_empty or []
Adam Miller e71f802
        for group in self.tree.xpath("/comps/group"):
Adam Miller e71f802
            if not group.xpath("packagelist/packagereq"):
Adam Miller e71f802
                group_id = group.xpath("id/text()")[0]
Adam Miller e71f802
                found = False
Adam Miller e71f802
                for pattern in keep_empty:
Adam Miller e71f802
                    if fnmatch.fnmatch(group_id, pattern):
Adam Miller e71f802
                        found = True
Adam Miller e71f802
                        break
Adam Miller e71f802
                if found:
Adam Miller e71f802
                    continue
Adam Miller e71f802
                group.getparent().remove(group)
Adam Miller e71f802
Adam Miller e71f802
    def remove_empty_categories(self):
Adam Miller e71f802
        """
Adam Miller e71f802
        Remove all categories without groups.
Adam Miller e71f802
        """
Adam Miller e71f802
        for category in self.tree.xpath("/comps/category"):
Adam Miller e71f802
            if not category.xpath("grouplist/groupid"):
Adam Miller e71f802
                category.getparent().remove(category)
Adam Miller e71f802
Adam Miller e71f802
    def remove_categories(self):
Adam Miller e71f802
        """
Adam Miller e71f802
        Remove all categories.
Adam Miller e71f802
        """
Adam Miller e71f802
        categories = self.tree.xpath("/comps/category")
Adam Miller e71f802
        for i in categories:
Adam Miller e71f802
            i.getparent().remove(i)
Adam Miller e71f802
Adam Miller e71f802
    def remove_langpacks(self):
Adam Miller e71f802
        """
Adam Miller e71f802
        Remove all langpacks.
Adam Miller e71f802
        """
Adam Miller e71f802
        langpacks = self.tree.xpath("/comps/langpacks")
Adam Miller e71f802
        for i in langpacks:
Adam Miller e71f802
            i.getparent().remove(i)
Adam Miller e71f802
Adam Miller e71f802
    def remove_translations(self):
Adam Miller e71f802
        """
Adam Miller e71f802
        Remove all translations.
Adam Miller e71f802
        """
Adam Miller e71f802
        for i in self.tree.xpath("//*[@xml:lang]"):
Adam Miller e71f802
            i.getparent().remove(i)
Adam Miller e71f802
Adam Miller e71f802
    def filter_environment_groups(self):
Adam Miller e71f802
        """
Adam Miller e71f802
        Remove undefined groups from environments.
Adam Miller e71f802
        """
Adam Miller e71f802
        all_groups = self.tree.xpath("/comps/group/id/text()")
Adam Miller e71f802
        for environment in self.tree.xpath("/comps/environment"):
Adam Miller e71f802
            for group in environment.xpath("grouplist/groupid"):
Adam Miller e71f802
                if group.text not in all_groups:
Adam Miller e71f802
                    group.getparent().remove(group)
Adam Miller e71f802
Adam Miller e71f802
    def remove_empty_environments(self):
Adam Miller e71f802
        """
Adam Miller e71f802
        Remove all environments without groups.
Adam Miller e71f802
        """
Adam Miller e71f802
        for environment in self.tree.xpath("/comps/environment"):
Adam Miller e71f802
            if not environment.xpath("grouplist/groupid"):
Adam Miller e71f802
                environment.getparent().remove(environment)
Adam Miller e71f802
Adam Miller e71f802
    def remove_environments(self):
Adam Miller e71f802
        """
Adam Miller e71f802
        Remove all langpacks.
Adam Miller e71f802
        """
Adam Miller e71f802
        environments = self.tree.xpath("/comps/environment")
Adam Miller e71f802
        for i in environments:
Adam Miller e71f802
            i.getparent().remove(i)
Adam Miller e71f802
Adam Miller e71f802
    def write(self, file_obj):
Adam Miller e71f802
        self.tree.write(file_obj, pretty_print=self.reindent, xml_declaration=True, encoding=self.encoding)
Adam Miller e71f802
        file_obj.write("\n")
Adam Miller e71f802
Adam Miller e71f802
    def pprint(self):
Adam Miller e71f802
        self.write(sys.stdout)
Adam Miller e71f802
Adam Miller e71f802
    def xml(self):
Adam Miller e71f802
        io = StringIO()
Adam Miller e71f802
        self.write(io)
Adam Miller e71f802
        io.seek(0)
Adam Miller e71f802
        return io.read()
Adam Miller e71f802
Adam Miller e71f802
Adam Miller e71f802
def main():
Adam Miller e71f802
    parser = optparse.OptionParser("%prog [options] <comps.xml>")
Adam Miller e71f802
    parser.add_option("--output", help="redirect output to a file")
Adam Miller e71f802
    parser.add_option("--arch", help="filter groups and packagews according to an arch")
Adam Miller e71f802
    parser.add_option("--arch-only-groups", default=False, action="store_true", help="keep only arch groups, remove the rest")
Adam Miller e71f802
    parser.add_option("--arch-only-packages", default=False, action="store_true", help="keep only arch packages, remove the rest")
Adam Miller e71f802
    parser.add_option("--remove-categories", default=False, action="store_true", help="remove all categories")
Adam Miller e71f802
    parser.add_option("--remove-langpacks", default=False, action="store_true", help="remove the langpacks section")
Adam Miller e71f802
    parser.add_option("--remove-translations", default=False, action="store_true", help="remove all translations")
Adam Miller e71f802
    parser.add_option("--remove-environments", default=False, action="store_true", help="remove all environment sections")
Adam Miller e71f802
    parser.add_option("--keep-empty-group", default=[], action="append", metavar="[GROUPID]", help="keep groups even if they are empty")
Adam Miller e71f802
    parser.add_option("--no-cleanup", default=False, action="store_true", help="don't remove empty groups and categories")
Adam Miller e71f802
    parser.add_option("--no-reindent", default=False, action="store_true", help="don't re-indent the output")
Adam Miller e71f802
Adam Miller e71f802
    opts, args = parser.parse_args()
Adam Miller e71f802
Adam Miller e71f802
    if len(args) != 1:
Adam Miller e71f802
        parser.error("please specify exactly one comps file")
Adam Miller e71f802
Adam Miller e71f802
    comps_file = args[0]
Adam Miller e71f802
Adam Miller e71f802
    if opts.arch is None:
Adam Miller e71f802
        parser.error("please specify arch")
Adam Miller e71f802
Adam Miller e71f802
    file_obj = open(comps_file, "r")
Adam Miller e71f802
    f = CompsFilter(file_obj, reindent=not opts.no_reindent)
Adam Miller e71f802
    f.filter_packages(opts.arch, opts.arch_only_packages)
Adam Miller e71f802
    f.filter_groups(opts.arch, opts.arch_only_groups)
Adam Miller e71f802
Adam Miller e71f802
    if not opts.no_cleanup:
Adam Miller e71f802
        f.remove_empty_groups(keep_empty=opts.keep_empty_group)
Adam Miller e71f802
        f.filter_category_groups()
Adam Miller e71f802
        f.remove_empty_categories()
Adam Miller e71f802
        f.filter_environment_groups()
Adam Miller e71f802
        f.remove_empty_environments()
Adam Miller e71f802
Adam Miller e71f802
    if opts.remove_categories:
Adam Miller e71f802
        f.remove_categories()
Adam Miller e71f802
Adam Miller e71f802
    if opts.remove_langpacks:
Adam Miller e71f802
        f.remove_langpacks()
Adam Miller e71f802
Adam Miller e71f802
    if opts.remove_translations:
Adam Miller e71f802
        f.remove_translations()
Adam Miller e71f802
Adam Miller e71f802
    if opts.remove_environments:
Adam Miller e71f802
        f.remove_environments()
Adam Miller e71f802
Adam Miller e71f802
    if opts.output:
Adam Miller e71f802
        out = open(opts.output, "w")
Adam Miller e71f802
        f.write(out)
Adam Miller e71f802
    else:
Adam Miller e71f802
        f.pprint()
Adam Miller e71f802
Adam Miller e71f802
Adam Miller e71f802
if __name__ == "__main__":
Adam Miller e71f802
    main()