#701 Add a koji_shadow_dir config option
Closed 6 years ago by lsedlar. Opened 6 years ago by otaylor.
otaylor/pungi koji-shadow-dir  into  master

file modified
+5
@@ -712,6 +712,11 @@ 

  **koji_profile**

      (*str*) -- koji profile name

  

+ **koji_shadow_dir**
ausil commented 6 years ago

@lsedler would like your thoughts but I think koji_local_dir is a better name

+     (*str*) -- directory in which to locally shadow packages from koji's topdir.

+     Setting this allows testing Pungi without a local koji instance, but can

+     result in very large amounts of network and disk usage.

+ 

  **runroot** [mandatory]

      (*bool*) -- run some tasks such as buildinstall or createiso in koji build root (True) or locally (False)

  

file modified
+1
@@ -726,6 +726,7 @@ 

              "pdc_insecure": {"type": "boolean", "default": False},

  

              "koji_profile": {"type": "string"},

+             "koji_shadow_dir": {"type": "string"},

  

              "pkgset_koji_tag": {"$ref": "#/definitions/strings"},

              "pkgset_koji_inherit": {

@@ -208,10 +208,12 @@ 

  

  

  class KojiPackageSet(PackageSetBase):

-     def __init__(self, koji_wrapper, sigkey_ordering, arches=None, logger=None):

+     def __init__(self, koji_wrapper, sigkey_ordering, arches=None, logger=None,

+                  downloader=None):

          super(KojiPackageSet, self).__init__(sigkey_ordering=sigkey_ordering,

                                               arches=arches, logger=logger)

          self.koji_wrapper = koji_wrapper

+         self.downloader = downloader

  

      def __getstate__(self):

          result = self.__dict__.copy()
@@ -236,7 +238,12 @@ 

      def get_package_path(self, queue_item):

          rpm_info, build_info = queue_item

          pathinfo = self.koji_wrapper.koji_module.pathinfo

+ 

          paths = []

+         if self.downloader:

+             rpm_path = os.path.join(pathinfo.build(build_info), pathinfo.rpm(rpm_info))

+             return self.downloader.download(rpm_path)

+ 

          for sigkey in self.sigkey_ordering:

              if sigkey is None:

                  # we're looking for *signed* copies here

@@ -19,6 +19,8 @@ 

  import json

  import re

  from kobo.shortcuts import force_list

+ import kobo.log

+ import subprocess

  

  import pungi.wrappers.kojiwrapper

  import pungi.phases.pkgset.pkgsets
@@ -123,6 +125,50 @@ 

      return module

  

  

+ class ShadowPkgDownloader(kobo.log.LoggingBase):

+     def __init__(self, compose, koji_wrapper):

+         super(ShadowPkgDownloader, self).__init__(logger=compose._logger)

+ 

+         self.koji_topdir = koji_wrapper.koji_module.config.topdir

+         self.koji_topurl = koji_wrapper.koji_module.config.topurl

+         self.shadow_dir = compose.conf["koji_shadow_dir"]

+ 

+     def download(self, rpm_path):

+         if not rpm_path.startswith(self.koji_topdir):

+             raise RuntimeError("Unexpected path returned from Koji")

+ 

+         partial_path = os.path.relpath(rpm_path, self.koji_topdir)

+ 

+         shadow_path = os.path.join(self.shadow_dir, partial_path)

+         if os.path.isfile(shadow_path):

+             return shadow_path

+ 

+         if self.koji_topurl.endswith('/'):

+             url = self.koji_topurl + partial_path

+         else:

+             url = self.koji_topurl + '/' + partial_path

+ 

+         shadow_dir = os.path.dirname(shadow_path)

+         if not os.path.isdir(shadow_dir):

+             try:

+                 os.makedirs(shadow_dir)

+             except OSError:

+                 pass # multiple threads can race, causing EEXIST

+ 

+         self.log_info("Downloading %s" % url)

+         subprocess.check_call(['curl', '-s', '-S', '-o', shadow_path, url])

+ 

+         return shadow_path

+ 

+     def __getstate__(self):

+         result = self.__dict__.copy()

+         del result["_logger"]

+         return result

+ 

+     def __setstate__(self, data):

+         self._logger = None

+         self.__dict__.update(data)

Are the __(get|set)state__ methods actually needed?

This was done because otherwise the pickling of the global packageset failed because it couldn't pickle the downloader, because it couldn't pickle the logger. Looking into the code structure some more, it probably makes sense to delete the downloader in KojiPackageSet.setstate - the downloader shouldn't be needed if populating the packageset is skipped and the pickle is read again.

+ 

  class PkgsetSourceKoji(pungi.phases.pkgset.source.PkgsetSourceBase):

      enabled = True

  
@@ -130,15 +176,23 @@ 

          compose = self.compose

          koji_profile = compose.conf["koji_profile"]

          self.koji_wrapper = pungi.wrappers.kojiwrapper.KojiWrapper(koji_profile)

+         if 'koji_shadow_dir' in compose.conf:

+             path_prefix = compose.conf["koji_shadow_dir"]

+             downloader = ShadowPkgDownloader(self.compose, self.koji_wrapper)

+         else:

+             path_prefix = self.koji_wrapper.koji_module.config.topdir

+             downloader = None

          # path prefix must contain trailing '/'

-         path_prefix = self.koji_wrapper.koji_module.config.topdir.rstrip("/") + "/"

-         package_sets = get_pkgset_from_koji(self.compose, self.koji_wrapper, path_prefix)

+         path_prefix = path_prefix.rstrip("/") + "/"

+         package_sets = get_pkgset_from_koji(self.compose, self.koji_wrapper, path_prefix,

+                                             downloader=downloader)

          return (package_sets, path_prefix)

  

  

- def get_pkgset_from_koji(compose, koji_wrapper, path_prefix):

+ def get_pkgset_from_koji(compose, koji_wrapper, path_prefix, downloader=None):

      event_info = get_koji_event_info(compose, koji_wrapper)

-     pkgset_global = populate_global_pkgset(compose, koji_wrapper, path_prefix, event_info)

+     pkgset_global = populate_global_pkgset(compose, koji_wrapper, path_prefix, event_info,

+                                            downloader=downloader)

      package_sets = populate_arch_pkgsets(compose, path_prefix, pkgset_global)

      package_sets["global"] = pkgset_global

  
@@ -150,7 +204,8 @@ 

      return package_sets

  

  

- def populate_global_pkgset(compose, koji_wrapper, path_prefix, event_id):

+ def populate_global_pkgset(compose, koji_wrapper, path_prefix, event_id,

+                            downloader=None):

      all_arches = set(["src"])

      for arch in compose.get_arches():

          is_multilib = is_arch_multilib(compose.conf, arch)
@@ -167,7 +222,7 @@ 

      for variant in compose.all_variants.values():

          variant.pkgset = pungi.phases.pkgset.pkgsets.KojiPackageSet(

              koji_wrapper, compose.conf["sigkeys"], logger=compose._logger,

-             arches=all_arches)

+             arches=all_arches, downloader=downloader)

          variant_tags[variant] = []

  

          # Find out all modules in every variant and add their compose tags
@@ -209,7 +264,7 @@ 

      else:

          global_pkgset = pungi.phases.pkgset.pkgsets.KojiPackageSet(

              koji_wrapper, compose.conf["sigkeys"], logger=compose._logger,

-             arches=all_arches)

+             arches=all_arches, downloader=downloader)

          # Get package set for each compose tag and merge it to global package

          # list. Also prepare per-variant pkgset, because we do not have list

          # of binary RPMs in module definition - there is just list of SRPMs.
@@ -218,7 +273,7 @@ 

                               "'%s'" % compose_tag)

              pkgset = pungi.phases.pkgset.pkgsets.KojiPackageSet(

                  koji_wrapper, compose.conf["sigkeys"], logger=compose._logger,

-                 arches=all_arches)

+                 arches=all_arches, downloader=downloader)

              # Create a filename for log with package-to-tag mapping. The tag

              # name is included in filename, so any slashes in it are replaced

              # with underscores just to be safe.

[This PR is basically a RFC - it's very useful for what I'm doing right now -
developing and testing changes to OSBS/atomic-reactor/ODCS to create
Flatpaks out of modules in Fedora Koji, but I don't know if it's generally
useful. For a compose of the entire distribution rather than a module, the
amount of downloading would be extreme.

If there's interest, I'll a) add tests b) possibly do something better than
subprocess.check_call(['curl', ...]) for the actual download.]

In some cases, it can be interesting to test Pungi against an instance
of Koji without having access to mount the 'topdir' of that Koji
instance. If koji_shadow_dir is set, packages that are needed from
Koji are downloaded into the shadow directory if they don't already
exist there. This option can obviously involve large amounts of network
and disk usage.

rebased

6 years ago

@lsedler would like your thoughts but I think koji_local_dir is a better name

Does koji generate a yum repository for the tag you are interested in? If yes, I'm wondering if you could instead change the pkgset_source to repos and configure pkgset_repos like follows:

pkgset_repos = {
    "x86_64": ["https://my.koji.example.com/repos/.../"],
    "ppc64le": ["…"],
    …
}

Ideally for each architecture in the mapping you should list also repos with source packages and debuginfo (which are not generated by Koji IMO). Without that those packages will not be included in the compose, but depending on the use case it might not be an issue.

Thanks for the suggestion, @lsedlar ! - while I don't fully understand the logic for when Koji generates repositories and cleans them up, I think I can't count on the necessary repositories being there, even for testing purposes. I checked for the test modules that I'm using currently (built a few weeks ago) and the repositories aren't present - they were never built or cleaned up.

Using pkgset_repos would also require restructuring the code in Pungi so that the module gather source isn't tied to the koji pkgset source.

Oh, I missed that you are working with modules. In that case Koji really can not be avoided. I think this approach is a good one.

If I understand correctly, it downloads all packages from the tag, so that may be unsuitable for tags with lots of content, but in general that's not a blocker.

BTW the last comment looks like it should have gone to some other PR on ODCS.

Are the __(get|set)state__ methods actually needed?

As for the actual implementation: what is the expected usage of the local directory? Will it be reused for multiple composes?

This was done because otherwise the pickling of the global packageset failed because it couldn't pickle the downloader, because it couldn't pickle the logger. Looking into the code structure some more, it probably makes sense to delete the downloader in KojiPackageSet.setstate - the downloader shouldn't be needed if populating the packageset is skipped and the pickle is read again.

As for the actual implementation: what is the expected usage of the local directory? Will it be reused for multiple composes?

My expectation is yes. This is basically a development-mode thing, so you might run the same compose over many times until you get right. But also, different modular composes will frequently include the same Koji tags - most composes of a module will pull in the packages from the platform (base-runtime) module, for example.

Cleanup of the downloaded rpms would be left up to the user. You can just remove the contents of the directory at any point and start over, as the simplest method.

Cool, that all sounds good to me.

I'll close this due to long inactivity. If you would like this feature, please reopen this PR and we can work on getting it merged.

Pull-Request has been closed by lsedlar

6 years ago