#14 Attempt to resolve module build dependencies
Merged 6 years ago by ncoghlan. Opened 6 years ago by ncoghlan.
modularity/ ncoghlan/fedmod issue-12-build-dependency-resolution  into  master

file modified
+15 -63
@@ -436,7 +436,8 @@ 

                      lnc[cl_match] = str(m)

          log.info(tb)

  

- def solve(solver, pkgnames, selfhost=False):

+ def _solve(solver, pkgnames):

+     """Given a set of package names, returns a list of solvables to install"""

      pool = solver.pool

  

      # We have to =(
@@ -455,62 +456,17 @@ 

          sys.exit(1)

  

      print_transaction(pool, solver.transaction())

-     candq = [s for s in solver.transaction().newpackages() if s.arch not in ("src", "nosrc")]

-     sources = set(get_sourcepkg(s) for s in candq)

- 

-     if not selfhost:

-         return set(candq), sources

- 

-     # We already solved runtime requires, no need to do that twice

-     selfhosting = set(candq)

-     selfhosting_srcs = set()

-     candq = list(sources)

-     # We will store text-based view of processed srcs for better performance,

-     # because selections are not free

-     srcs_done = set()

-     while candq:

Technically now you can't get builddeps of A which buildrequires compat-openssl10-devel and B which buildrequires openssl-devel... That's why that code was there.

-         jobs = [pool.Job(solv.Job.SOLVER_INSTALL | solv.Job.SOLVER_SOLVABLE | solv.Job.SOLVER_WEAK, p.id) for p in candq]

-         solver.solve(jobs)

-         print_transaction(pool, solver.transaction())

-         # We are interested to operate only on really new packages below

-         newpkgs = set(solver.transaction().newpackages()) - selfhosting

-         for p in newpkgs.copy():

-             if p.arch in ("src", "nosrc"):

-                 srcs_done.add(str(p))

-                 selfhosting_srcs.add(p)

-                 newpkgs.remove(p)

-                 continue

-         selfhosting |= newpkgs

- 

-         # SOLVER_FAVOR packages which we already solved which will help us to get small dependency chain

-         pool.setpooljobs(pool.getpooljobs() + [pool.Job(solv.Job.SOLVER_FAVOR | solv.Job.SOLVER_SOLVABLE, p.id) for p in newpkgs])

- 

-         # In new queue only non-solvables are left

-         raw_decisions = solver.raw_decisions(1)

-         if not raw_decisions:

-             # At this point, nothing can be resolved anymore, so let's show problems

-             for p in candq:

-                 job = pool.Job(solv.Job.SOLVER_INSTALL | solv.Job.SOLVER_SOLVABLE, p.id)

-                 problems = solver.solve([job])

-                 # In some cases, even solvable jobs are disabled

-                 # https://github.com/openSUSE/libsolv/issues/204

-                 #assert not problems

-                 for problem in problems:

-                     print(problem)

-             sys.exit(1)

-         candq = [s for s in candq if s.id not in raw_decisions]

- 

-         srcs_queued = set(str(p) for p in candq if p.arch in ("src", "nosrc"))

-         for p in newpkgs:

-             s = get_sourcepkg(p, only_name=True)

-             if s in srcs_done or s in srcs_queued:

-                 continue

-             src = get_sourcepkg(p, s)

-             srcs_queued.add(str(src))

-             candq.append(src)

- 

-     return selfhosting, selfhosting_srcs

- 

+     return {s for s in solver.transaction().newpackages() if s.arch not in ("src", "nosrc")}

+ 

+ def ensure_buildable(pkgset, pool=None):

+     """Given a set of solvables, returns a set of source packages & build deps"""

+     # The given package set may not be installable on its own

+     # That's OK, since other modules will provide those packages

+     # The goal of *this* method is to report the SRPMs that need to be

+     # built, and their build dependencies

+     sources = set(get_sourcepkg(s) for s in pkgset)

+     builddeps = ensure_installable(sources, pool=pool)

+     return sources, builddeps

  

  def make_pool(arch):

      _populate_module_reverse_lookup() # TODO: Integrate this into the Pool abstraction
@@ -518,15 +474,13 @@ 

  

  _DEFAULT_HINTS = ("glibc-minimal-langpack",)

  

- def resolve(pkgnames, hints=_DEFAULT_HINTS, recommendations=False, builddeps=False, pool=None):

+ def ensure_installable(pkgnames, hints=_DEFAULT_HINTS, recommendations=False, pool=None):

      """Iterate over the resolved dependency set for the given packages

  

      *hints*:  Packages that have higher priority when more than one package

                could satisfy a dependency.

      *recommendations*: Whether or not to report recommended dependencies as well

                   as required dependencies (Default: required deps only)

-     *builddeps*: Whether or not to report build dependencies as well

-                  as runtime dependencies (Default: runtime deps only)

      """

      if pool is None:

          pool = make_pool("x86_64")
@@ -542,9 +496,7 @@ 

          # Ignore weak deps

          solver.set_flag(solv.Solver.SOLVER_FLAG_IGNORE_RECOMMENDED, 1)

  

-     binary, source = solve(solver, pkgnames, selfhost=builddeps)

-     for p in itertools.chain(binary, source or ()):

-         yield p

+     return _solve(solver, pkgnames)

  

  def print_reldeps(pool, pkg):

      sel = pool.select(pkg, solv.Selection.SELECTION_NAME | solv.Selection.SELECTION_DOTARCH)

file modified
+56 -26
@@ -8,15 +8,17 @@ 

  def _categorise_deps(pool, all_rpm_deps):

      module_deps = set()

      remaining_rpm_deps = set()

-     for pkgname in all_rpm_deps:

-         modname = _depchase.get_module_for_rpm(pkgname)

+     for pkg in all_rpm_deps:

+         modname = _depchase.get_module_for_rpm(pkg)

          if modname is not None:

              module_deps.add(modname)

          else:

-             remaining_rpm_deps.add(pkgname)

-     srpm_deps = {_depchase.get_srpm_for_rpm(pool, dep) for dep in remaining_rpm_deps}

-     return module_deps, srpm_deps

+             remaining_rpm_deps.add(pkg)

+     return module_deps, remaining_rpm_deps

  

+ def _name_only(rpm_name):

+     name, version, release = rpm_name.rsplit("-", 2)

+     return name

  

  class ModuleGenerator(object):

  
@@ -24,12 +26,43 @@ 

          self.pkgs = pkgs

          self.pkg = None

          self.mmd = modulemd.ModuleMetadata()

-         self._pool = pool = _depchase.make_pool("x86_64")

-         # Dependency issue with the test build deps, so skip that for now...

-         all_deps = {s.name for s in _depchase.resolve(pkgs, builddeps=False, pool=pool)}

-         runtime_deps = {s.name for s in _depchase.resolve(pkgs, pool=pool)}

-         self.run_deps = runtime_deps

-         self.build_deps = all_deps - runtime_deps

+         self._calculate_dependencies()

+ 

+     def _calculate_dependencies(self):

+         pkgs = self.pkgs

+         pool = _depchase.make_pool("x86_64")

+         self.api_srpms = {_name_only(_depchase.get_srpm_for_rpm(pool, dep)) for dep in pkgs}

+         run_deps = {s.name:s for s in _depchase.ensure_installable(pkgs, pool=pool)}

+         module_run_deps, rpm_run_deps = _categorise_deps(pool, run_deps)

+         self.module_run_deps = module_run_deps

+         run_srpms, build_deps = _depchase.ensure_buildable(rpm_run_deps)

+         module_build_deps = set()

+         resolved_build_deps = set()

+         build_srpms = set()

+         for i in range(10):

+             # Arbitrary bound of 10 levels of SRPM bootstrapping

+             new_module_build_deps, remaining_build_deps = _categorise_deps(pool, build_deps)

+             module_build_deps |= new_module_build_deps

+             resolved_build_deps |= (build_deps - remaining_build_deps)

+             build_deps -= resolved_build_deps

+             if build_deps:

+                 new_build_srpms, remaining_build_deps = _depchase.ensure_buildable(build_deps)

+                 build_srpms |= new_build_srpms

+                 resolved_build_deps |= (build_deps - remaining_build_deps)

+                 build_deps -= resolved_build_deps

+             if not build_deps:

+                 break

+         else:

+             logging.warn("Failed to close out build dependencies after 10 iteration")

+         self.module_run_deps = module_run_deps

+         self.module_build_deps = module_build_deps

+         run_srpm_names = {_name_only(s.name) for s in run_srpms}

+         build_srpm_names = {_name_only(s.name) for s in build_srpms}

+         self.run_srpms = run_srpm_names - build_srpm_names

+         self.build_srpms = build_srpm_names - run_srpm_names

+         self.build_and_run_srpms = run_srpm_names & build_srpm_names

+         self.unresolved_build_rpms = {_name_only(s.name) for s in build_deps}

+ 

  

      def _get_pkg_info(self):

          """Function loads package from dnf"""
@@ -78,35 +111,32 @@ 

              self.mmd.add_content_license(str(self.pkg.license))

  

          # Declare the public API

-         for pkg in self.pkgs:

+         for pkg in self.api_srpms:

              self.mmd.api.add_rpm(pkg)

-             # TODO: Restore resolution of pkg to the actual provider

              self.mmd.components.add_rpm(pkg, "Package in api", buildorder=self._get_build_order(pkg))

  

-         # Resolve dependencies using other modules wherever possible

-         module_build_deps, srpm_build_deps = _categorise_deps(self._pool, self.build_deps)

-         module_run_deps, srpm_run_deps = _categorise_deps(self._pool, self.run_deps)

- 

          # Declare module level dependencies

-         for modname in module_build_deps:

+         for modname in self.module_build_deps:

              self.mmd.buildrequires[modname] = "f27"

-         for modname in module_run_deps:

+         for modname in self.module_run_deps:

              self.mmd.requires[modname] = "f27"

  

          # Add any other RPMs not available from existing modules as components

-         for pkg in srpm_build_deps.intersection(srpm_run_deps):

+         for pkg in self.build_and_run_srpms:

              self.mmd.components.add_rpm(pkg, "Build and runtime dependency.", buildorder=self._get_build_order(pkg))

  

-         for pkg in (srpm_build_deps - srpm_run_deps):

-             self.mmd.components.add_rpm(pkg, "Build dependency.", buildorder=self._get_build_order(pkg))

- 

-         for pkg in (srpm_run_deps - srpm_build_deps):

+         for pkg in self.run_srpms:

              self.mmd.components.add_rpm(pkg, "Runtime dependency.", buildorder=self._get_build_order(pkg))

  

-         # Filter out any build-only packages

-         for pkg in (srpm_build_deps - srpm_run_deps - self.mmd.api.rpms):

+         for pkg in self.build_srpms:

+             self.mmd.components.add_rpm(pkg, "Build dependency.", buildorder=self._get_build_order(pkg))

+             # Filter out any build-only packages

+             # TODO: This won't filter out all the RPMs, only the one matching the SRPM name

+             #       See https://pagure.io/modulemd/issue/54 for discussion

              self.mmd.filter.add_rpm(pkg)

  

+         # TODO: Emit something for non-empty self.unresolved_build_rpms

+ 

  

      def _get_build_order(self, pkg):

          if pkg in self.mmd.api.rpms:

@@ -60,7 +60,7 @@ 

  class TestMultiplePackageInput(object):

  

      def setup(self):

-         self.input_rpms = input_rpms = ('grep', 'samba')

+         self.input_rpms = input_rpms = ('grep', 'mariadb')

          self.md, self.output_fname = _generate_modulemd(input_rpms)

  

      def teardown(self):
@@ -77,7 +77,7 @@ 

          assert modmd.summary == ""

          assert modmd.description == ""

  

-         # Expected licenses for grep + samba

+         # Expected licenses for grep + mariadb

          assert len(modmd.module_licenses) == 1

          assert sorted(modmd.module_licenses) == sorted(['MIT'])

          assert len(modmd.content_licenses) == 0 # This doesn't seem right...
@@ -85,12 +85,11 @@ 

          # Only given modules are listed in the public API

          assert sorted(modmd.api.rpms) == sorted(self.input_rpms)

  

-         # Expected components for grep + samba

+         # Expected components for grep + mariadb

          expected_components = set(self.input_rpms)

-         # TODO: Figure out where the "krb5" dependency is coming from

-         expected_components.add("unknown-component-krb5")

          assert set(modmd.components.rpms) == expected_components

  

-         # Expected module dependencies for grep + samba

+         # Expected module dependencies for grep + mariadb

          assert set(modmd.buildrequires) == set()

-         assert set(modmd.requires) == set(('platform', 'samba', 'python2'))

+         expected_modules = set(('platform', 'mariadb', 'perl', 'networking-base'))

+         assert set(modmd.requires) == expected_modules

First pass at fixing https://pagure.io/modularity/fedmod/issue/12 by
recursively resolving SRPM build dependencies after filtering out
the dependencies satisfied by other prebuilt modules.

Robust testing of this won't be possible until the metadata issues
covered in https://pagure.io/modularity/fedmod/issue/13 are fixed.

Pull-Request has been merged by ncoghlan

6 years ago

Technically now you can't get builddeps of A which buildrequires compat-openssl10-devel and B which buildrequires openssl-devel... That's why that code was there.

@ignatenkobrain The problem with doing the build requirement recursion at that point is that we want to filter out any build dependencies that can be provided by other modules before we decide we need to build them as part of the module being generated.

Due to that problem, I moved the recursive build dependency resolution out into the module generator at https://pagure.io/modularity/fedmod/pull-request/14#_2,47.

On each iteration, it splits the identified build dependencies into "things we can get from other modules" and "things we have to build from source as part of this module". To guard against circular build dependencies, it has an arbitrary limit of 10 levels of build dependencies before it expects everything to be resolved.

Now, this isn't really tested yet, so it's entirely possible there are still bugs in the logic, but this step does need to be module aware if we want it to do the right thing.