#105 Add bundled() provider for vendoring
Merged 5 days ago by zbyszek. Opened 2 years ago by aplanas.
fedora-rust/ aplanas/rust2rpm workspace  into  master

@@ -0,0 +1,3 @@ 

+ %__cargo_bundled_provides %{_bindir}/cargo-inspector --provides-vendor --path %{_builddir}

+ %__cargo_bundled_flags    exeonly

+ %__cargo_bundled_magic    ^(setuid,? )?(setgid,? )?(sticky )?ELF (32|64)-bit.*executable

file modified
+6
@@ -261,6 +261,9 @@ 

              toml, crate, version, doc_files, license_files = local_toml(crate, version)

              diff = make_patch(toml, enabled=patch, tmpfile=True)

              metadata = Metadata.from_file(toml)

+             if len(metadata) > 1:

+                 print(f"Warning: multiple metadata for {toml}")

+             metadata = metadata[0]

              return metadata.name, diff, metadata, doc_files, license_files

      else:

          cratef, crate, version = download(crate, version)
@@ -268,6 +271,9 @@ 

      with files_from_crate(cratef, crate, version) as (toml, doc_files, license_files):

          diff = make_patch(toml, enabled=patch)

          metadata = Metadata.from_file(toml)

+         if len(metadata) > 1:

+             print(f"Warning: multiple metadata for {toml}")

+         metadata = metadata[0]

      if store:

          shutil.copy2(cratef, os.path.join(os.getcwd(), f"{metadata.name}-{version}.crate"))

      return crate, diff, metadata, doc_files, license_files

file modified
+94 -24
@@ -1,8 +1,53 @@ 

  import argparse

+ import glob

+ import os

  import sys

  

  from . import Metadata

- from .metadata import normalize_deps

+ from .metadata import normalize_deps, Dependency

+ 

+ 

+ def _get_binaries(cargo_toml):

+     manifest = Metadata.manifest(cargo_toml, check=False)

+     return [

+         t['name'] for t in manifest.get('targets', []) if 'bin' in t['kind']

+     ]

+ 

+ 

+ def _cargo_toml(source_path, path, exclude_vendor=True):

+     """Find a Cargo.toml related with a binary."""

+     # Finding the Cargo.toml is a bit tricky, and the general idea is

+     # like this:

+     #

+     #   - `source_path`: is set but the dependecy generator call

+     #     (cargo_bundled.attr) to point to `%{_builddir}`, so where

+     #     the sources are (`.../rpmbuild/BUILD`)

+     #

+     #   - `path`: can point to an executable when called by

+     #     cargo_bundler.attr, that lives in `%{_buildroot}`, for

+     #     example `.../rpmbuild/BUILDROOT/<PKGNAME>/usr/bin/<BINNAME>`

+     #

+     #   - `path`: can also point to a Cargo.toml when called from

+     #     command line

+     #

+     #   - If `path` is a binary, we search the Cargo.toml from

+     #     `source_path` that generates this specific binary

+ 

+     # If path is already a Cargo.toml file, we are done

+     binary_or_cargo_toml = os.path.basename(path)

+     if binary_or_cargo_toml == 'Cargo.toml':

+         return os.path.join(source_path, path)

+ 

+     cargo_tomls = glob.glob(os.path.join(source_path, '**/Cargo.toml'),

+                             recursive=True)

+     for cargo_toml in cargo_tomls:

+         if exclude_vendor and 'vendor' in cargo_toml.split(os.sep):

+             continue

+         if binary_or_cargo_toml in _get_binaries(cargo_toml):

+             return cargo_toml

+ 

+     raise FileNotFoundError(f'Cargo.toml not found for binary {binary_or_cargo_toml}')

+ 

  

  def main():

      parser = argparse.ArgumentParser()
@@ -16,56 +61,81 @@ 

      group.add_argument("-R", "--requires", action="store_true", help="Print Requires")

      group.add_argument("-BR", "--build-requires", action="store_true", help="Print BuildRequires")

      group.add_argument("-TR", "--test-requires", action="store_true", help="Print TestRequires")

+     group.add_argument("-PV", "--provides-vendor", action="store_true", help="Print Provides when vendoring")

      fgroup = parser.add_mutually_exclusive_group()

      fgroup.add_argument("-a", "--all-features", action="store_true", help="Activate all features")

      fgroup.add_argument("-f", "--features", default="default", help="Feature(s) to work on (comma-separated)")

+     parser.add_argument("--vendor", action="store_true", help="Print vendoring provides (even when not vendored)")

+     parser.add_argument("--include-workspaces", action="store_true", help="Include workspaces in the analysis")

+     parser.add_argument("-p", "--path", help="Path to the source project")

      parser.add_argument("file", nargs="*", help="Path(s) to Cargo.toml")

      args = parser.parse_args()

  

-     files = args.file or sys.stdin.readlines()

+     args.path = os.path.abspath(args.path) if args.path else os.getcwd()

+ 

+     files = args.file or [f.rstrip() for f in sys.stdin.readlines()]

+     files = [_cargo_toml(args.path, f) for f in files]

  

      features = set()

      for f in args.features.split(","):

          features.add(f or None)

  

-     def print_deps(deps):

-         if len(deps) > 0:

-             print("\n".join(sorted(normalize_deps(deps))))

- 

-     for f in files:

-         f = f.rstrip()

-         md = Metadata.from_file(f)

+     def process_metadata(md):

+         data = []

          if args.name:

-             print(md.name)

+             data.append(md.name)

          if args.version:

-             print(md._version)

+             data.append(md._version)

          if args.rpm_version:

-             print(md.version)

+             data.append(md.version)

          if args.target_kinds:

-             print("\n".join(set(tgt.kind for tgt in md.targets)))

+             data.extend(tgt.kind for tgt in md.targets)

          if args.list_features:

-             for f in sorted(f for f in md.dependencies if f is not None):

-                 print(f)

+             data.extend(f for f in md.dependencies if f is not None)

          if args.provides:

-             for f in features:

-                 print(md.provides(f))

+             data.extend(md.provides(f) for f in features)

          if args.requires:

              # Someone should own /usr/share/cargo/registry

-             print("cargo")

+             data.append('cargo')

              if args.all_features:

-                 print_deps(md.all_dependencies)

+                 data.extend(md.all_dependencies)

              else:

                  for f in features:

-                     print_deps(md.requires(f))

+                     data.extend(md.requires(f))

          if args.build_requires:

-             print("rust-packaging")

+             data.append("rust-packaging")

              if args.all_features:

-                 print_deps(md.all_dependencies)

+                 data.extend(md.all_dependencies)

              else:

                  for f in features:

-                     print_deps(md.requires(f, resolve=True))

+                     data.extend(md.requires(f, resolve=True))

          if args.test_requires:

-             print_deps(md.dev_dependencies)

+             data.extend(md.dev_dependencies)

+         if args.provides_vendor:

+             # Print the vendoring providers only if the 'vendor'

+             # directory is present

+             if args.vendor or os.path.isdir('vendor'):

+                 data.extend(md.resolved_dependencies())

+         return data

+ 

+     for f in files:

+         data = set()

+         mds = Metadata.from_file(f, include_members=args.include_workspaces)

+         for md in mds:

+             # process_metadata can return an [string], but can be also

+             # a [Dependency] instance, that once presented have

+             # multiple substrings.  If we want to order the data and

+             # remove all the duplicates we should first normalize it

+             metadata_lines = []

+             for metadata in process_metadata(md):

+                 if isinstance(metadata, Dependency):

+                     metadata_lines.extend(metadata.normalize())

+                 else:

+                     metadata_lines.append(metadata)

+             data.update(metadata_lines)

+         for line in sorted(data):

+             print(line)

+ 

  

  if __name__ == "__main__":

      main()

file modified
+147 -9
@@ -3,8 +3,10 @@ 

  import collections

  import copy

  import json

+ import os.path

  import re

  import subprocess

+ from urllib.parse import urlparse

  

  

  Requirement = collections.namedtuple('Requirement', ('kind',
@@ -162,6 +164,35 @@ 

              raise ValueError(f'Found unhandled kind: {requirement}')

          return normalized

  

+     @staticmethod

+     def eval_(v1, op, v2):

+         if op == CargoSemVer.KIND_SHORTEQ:

+             return all((v1.major == v2.major,

+                         v1.minor == v2.minor,

+                         v1.patch == v2.patch))

+         elif op == CargoSemVer.KIND_GT:

+             return ((v1.major > v2.major) or

+                     (v1.major == v2.major and v1.minor > v2.minor) or

+                     (v1.major == v2.major and v1.minor == v2.minor and

+                      v1.patch > v2.patch))

+         elif op == CargoSemVer.KIND_GTE:

+             return ((v1.major >= v2.major) or

+                     (v1.major == v2.major and v1.minor >= v2.minor) or

+                     (v1.major == v2.major and v1.minor == v2.minor and

+                      v1.patch >= v2.patch))

+         elif op == CargoSemVer.KIND_LT:

+             return ((v1.major < v2.major) or

+                     (v1.major == v2.major and v1.minor < v2.minor) or

+                     (v1.major == v2.major and v1.minor == v2.minor and

+                      v1.patch < v2.patch))

+         elif op == CargoSemVer.KIND_LTE:

+             return ((v1.major <= v2.major) or

+                     (v1.major == v2.major and v1.minor <= v2.minor) or

+                     (v1.major == v2.major and v1.minor == v2.minor and

+                      v1.patch <= v2.patch))

+         else:

+             raise ValueError(f'Cannot evaluate operator: {op}')

+ 

  

  class Target:

      def __init__(self, name, kind):
@@ -173,11 +204,13 @@ 

  

  

  class Dependency:

-     def __init__(self, name, req=None, features=(), optional=False):

+     def __init__(self, name, req=None, features=(), optional=False,

+                  bundled=False):

          self.name = name

          self.req = req

          self.features = features

          self.optional = optional

+         self.bundled = bundled

  

      @classmethod

      def from_json(cls, metadata):
@@ -191,9 +224,11 @@ 

          return cls(**kwargs)

  

      @staticmethod

-     def _apply_reqs(name, reqs, feature=None):

+     def _apply_reqs(name, reqs, feature=None, bundled=False):

          fstr = f"/{feature}" if feature is not None else ""

          cap = f"crate({name}{fstr})"

+         if bundled:

+             cap = f"bundled({cap})"

          if not reqs:

              return cap

          deps = ' with '.join(
@@ -206,11 +241,13 @@ 

  

      def normalize(self):

          semver = CargoSemVer(self.req)

-         return [self._apply_reqs(self.name, semver.normalized, feature)

+         return [self._apply_reqs(self.name, semver.normalized, feature,

+                                  self.bundled)

                  for feature in self.features or (None,)]

  

      def __repr__(self):

-         return f"<Dependency: {self.name} {self.req} ({', '.join(sorted(self.features))})>"

+         features = sorted(feature for feature in self.features if feature)

+         return f"<Dependency: {self.name} {self.req} ({', '.join(features)})>"

  

      def __str__(self):

          return "\n".join(self.normalize())
@@ -230,6 +267,7 @@ 

          self.targets = set()

          self.dependencies = {}

          self.dev_dependencies = set()

+         self._path = None

  

      @property

      def description(self):
@@ -272,9 +310,10 @@ 

          self._summary = description[:p]

  

      @classmethod

-     def from_json(cls, metadata):

+     def from_json(cls, metadata, path):

          md = metadata

          self = cls(md["name"], md["version"])

+         self._path = path

  

          self.license = md["license"]

          self.license_file = md["license_file"]
@@ -333,10 +372,44 @@ 

          return self

  

      @classmethod

-     def from_file(cls, path):

-         metadata = subprocess.check_output(["cargo", "read-manifest",

-                                             f"--manifest-path={path}"])

-         return cls.from_json(json.loads(metadata))

+     def from_file(cls, path, include_members=False):

+         instances = []

+         members = Metadata.members(path) if include_members else []

+         for member in (members or [path]):

+             instance = cls.from_json(Metadata.manifest(member), member)

+             instances.append(instance)

+         return instances

+ 

+     @staticmethod

+     def manifest(path, check=True):

+         output = subprocess.run(

+             ["cargo", "read-manifest", f"--manifest-path={path}"],

+             check=check, capture_output=True

+         )

+         try:

+             result = json.loads(output.stdout)

+         except json.decoder.JSONDecodeError:

+             # Pure virtual manifest cannot be read, we need to use one

+             # from the different workspaces

+             result = {}

+         return result

+ 

+     @staticmethod

+     def metadata(path, deps=False):

+         cmd = ["cargo", "metadata", "--format-version=1",

+                f"--manifest-path={path}"]

+         if not deps:

+             cmd.append("--no-deps")

+         return json.loads(subprocess.check_output(cmd))

+ 

+     @staticmethod

+     def members(path):

+         members = []

+         metadata = Metadata.metadata(path)

+         for workspace in metadata.get('workspace_members', []):

+             path = re.search(r'\((.*)\)', workspace).group(1)

+             members.append(os.path.join(urlparse(path).path, 'Cargo.toml'))

+         return members

  

      @property

      def all_dependencies(self):
@@ -369,6 +442,71 @@ 

                          for feature in features)

              return fdeps | deps

  

+     @staticmethod

+     def _match_crate(dependency, metadata):

+         for crate in metadata['resolve']['nodes']:

+             name, version, _ = crate['id'].split()

+             if name != dependency.name:

+                 continue

+             v1 = CargoSemVer.parse_version(version)

+             normalized = CargoSemVer(dependency.req).normalized

+             if all(CargoSemVer.eval_(v1, op, v2) for op, v2 in normalized):

+                 return crate

+ 

+     @staticmethod

+     def _find_crate(dependency, metadata):

+         for crate in metadata['resolve']['nodes']:

+             if dependency == crate['id']:

+                 return crate

+ 

+     @staticmethod

+     def _closure(dependencies, metadata):

+         # It is not very clear how to decide, for a workspace, what

+         # features are enabled for a package after resolving all the

+         # dependencies. We can trace back from the initial set of

+         # dependencies / features, until the final set of packages

+         # listed in `cargo metadata`, but this will imply replicate

+         # the resolution algorithm in cargo.

+         #

+         # For now we will do a simple closure for all the dependencies

+         # declared in the toml file, over the resolved dependencies

+         # from resolve/nodes/deps from the metadata, and will include

+         # all the features enabled for each package.

+         #

+         closure = []

+         # Find the correct version of the initial dependencies

+         for dep in dependencies:

+             crate = Metadata._match_crate(dep, metadata)

+             if not crate:

+                 raise ValueError(f'Cannot find crate for {dep}')

+             closure.append(crate)

+ 

+         # Close over the initial packages

+         for crate in closure:

+             for dep in crate['dependencies']:

+                 crate = Metadata._find_crate(dep, metadata)

+                 if not crate:

+                     raise ValueError(f'Cannot find crate for {dep}')

+                 if crate not in closure:

+                     closure.append(crate)

+ 

+         # Transform the crate information to a dependency

+         dependencies = []

+         for crate in closure:

+             name, version, _ = crate['id'].split()

+             dependencies.append(Dependency(name, f'={version}',

+                                            crate['features'] or ('default',),

+                                            bundled=True))

+         return dependencies

+ 

+     def resolved_dependencies(self, feature=None):

+         if not self._path:

+             raise ValueError('Metadata instance without Cargo.toml associated')

+ 

+         initial_deps = self._resolve(self.dependencies, feature)[1]

+         metadata = Metadata.metadata(self._path, deps=True)

+         return Metadata._closure(initial_deps, metadata)

+ 

  

  def normalize_deps(deps):

      return set().union(*(d.normalize() for d in deps))

  • support virtual manifest

    Some Cargo.toml manifest files are only references to workspaces. Metadata now recognize those and replace it with all the Cargo.toml files that compose this workspace.

  • find Cargo.toml from binary

    We can pass as a parameter a binary name, instead of a Cargo.toml. cargo-inspector will dig into all the Cargo.toml in the workspace (if any), and find the one that declare a binary target that match the binary file.

  • add simple version evaluator

    Extend the CargoSemVer class to check if a version march the requirements. Will be used in future commits

  • Add bundled() provider for vendoring

    This commit add basic support for crate vendoring in Rust. The user will
    use something like cargo vendor to create a vendor directory (that can
    later be deployed as a tgz) that contains all the dependencies.

    This patch will analyze the output of cargo manifest to calculate the
    closure of dependencies, and via the new parameter --provides-vendor,
    print all the 'bundled(crate(NAME/FEATURE)) = 0.0.0' provided by the
    binary.

    The algorithm is not perfect, as today it will include all the features
    resolved for the crate (not all the availables, tho), but basically is
    something like:

    1.- A dependency generator macro, cargo_bundled, will call
    cargo-inspector like this:

    # In STDIN we will provide the name of the binary
    cargo-inspector --provides-vendor --path %{_builddir}

    2.- cargo-inspector will search inside the 'path' tree a Cargo.toml that
    generate the binary name send via STDIN.

    3.- From this point, we go up to the tree to find the top-most
    Cargo.toml, as this will be the directory where .cargo/config is living.
    We make this directory our cwd.

    4.- Using the metadata from cargo manifest, we generate the closure of
    dependencies required by this binary. To simplify the problem, the
    current code do not resolve the features, and accept the one resolved by
    cargo as valid. Most of the time this will be OK, maybe will include
    some extra features needed for other binaries.

    5.- Print the 'bundled()' data.

    This code will only be executed in the directory 'vendor' is present in
    the top-most directory found on step 3.

Any chance for a review? We can discuss also the convenience of this approach too.

Hey Alberto,

First of all, thank you for taking time to work on this project and sending this PR :)

I wanted to test this on Zola, but this did not really work. I guess because you have done this only for provides and not for requires.

Can we also extend this to requires/buildrequires? Basically one thing I would love to see is support for workspaces.


Regarding the bundled provides, I think what you have described in here is good enough and can be improved later. Obviously the problem would be if you do %cargo_install and then mv /usr/bin/foo /usr/libexec/foo-init and then pass this thing.. We do that for stratisd, for instance. Also, the features are not going to be propagated (e.g. if you build binary with --no-default-features, the code will still look for default features).

At this point I start to think we need to design some format which would say which binaries to build, in which format and with which features as part of rust2rpm.conf so that inspector could take advantage of this.

That said, I like the idea (though I did not have time to look into the code, hopefully this weekend I will) and I would like to make workspaces handling better as part of this PR. If you will make zola.spec to build with support of workspace/path dependencies, it would be great.

Igor, thanks a lot for making an effort and reviewing this PR that comes from no-where, to support a couple of features that were never discussed : ))

I wanted to test this on Zola, but this did not really work. I guess because you have done this only for provides and not for requires.
Can we also extend this to requires/buildrequires?

Oh I get lost. The last commit only makes sense for Provides no? When vendoring the binary will contain libraries that are Provides: bundled(). This only makes sense when there is a binary in the package, not for crates / libraries. So in the binary, there is not requires anymore, and because is vendored, no buildrequires neither. Is this correct?

The case of use that I have in mind is like this one:

https://build.opensuse.org/package/show/home:firstyear:kanidm/kanidm

The developer decided for a first iteration to provide a vendor.tgz as part of the project. The script will takes care of not publishing any bundled(crates()) if there is not a vendor directory in the BUILD project.

Basically one thing I would love to see is support for workspaces.

This is the first commit. This is an easy hack: take the workspace information and iterate over the different Cargo.toml that compose it. Do you think that we can do more?

Regarding the bundled provides, I think what you have described in here is good enough and can be improved later.

Obviously the problem would be if you do %cargo_install and then mv /usr/bin/foo /usr/libexec/foo-init and then pass this thing..

Ouch! Indeed, the binary will not be found. Very good point. No idea how can I track this ...

We do that for stratisd, for instance.

Do you think that there is a clean fix for this?

Also, the features are not going to be propagated (e.g. if you build binary with --no-default-features, the code will still look for default features).

I need to learn about this. IIUC this will not enable de default feature, but still your Cargo.toml can require packages with some features enabled.

But you are right, cargo build can have this --no-default-features enabled, but my call to manifest is always without this parameter. Again, not sure how to detect that.

Another problem that I see is that the metadata is giving me the resolved dependencies for all the project, not for the sub-workspace that generate this binary. In other words: cargo metadata --manifest-path sub-workspace/Cargo.toml is giving me the same output from cargo metadata --manifest-path another-sub-workspace/Cargo.toml

At this point I start to think we need to design some format which would say which binaries to build, in which format and with which features as part of rust2rpm.conf so that inspector could take advantage of this.

This can be an interesting idea. Are you suggesting that some magic %cargo_build macro will track the compilation options to this file, so cargo-inspect that have later this info?

That said, I like the idea (though I did not have time to look into the code, hopefully this weekend I will) and I would like to make workspaces handling better as part of this PR. If you will make zola.spec to build with support of workspace/path dependencies, it would be great.

I will check!

4 new commits added

  • Add bundled() provider for vendoring
  • metadata: add simple version evaluator
  • inspector: find Cargo.toml from binary
  • metadata: support virtual manifest
2 years ago

1 new commit added

  • metadata: support workspaces for non-virtual crates
2 years ago

5 new commits added

  • metadata: support workspaces for non-virtual crates
  • Add bundled() provider for vendoring
  • metadata: add simple version evaluator
  • inspector: find Cargo.toml from binary
  • metadata: support virtual manifest
2 years ago

rebased onto fa3961b

2 years ago

@ignatenkobrain sorry for the delay, a Christmas holiday crossed in my way. I extended the PR to support workspaces for non-virtual crates like Zola.

I am worried that the refactor is a bit big, but the current test coverage very low, so I do not know what I am breaking.

@ignatenkobrain did you have a chance to test the new code?

@aplanas So this looks mostly fine, but how do we handle license tag generation now? We need "license math" for all the dependencies factored in.

This has been confirmed to be a requirement in one package by @spot already.

@aplanas So this looks mostly fine, but how do we handle license tag generation now? We need "license math" for all the dependencies factored in.

Very good point. I will address this in this same PR.

rebased onto aa3beb5

2 years ago

In [2]: issubclass(KeyboardInterrupt, Exception)
Out[2]: False

hm?

@aplanas The code seems to be fine except of small comment above...

Oh, and btw -- I think it does not ignore dev-dependencies when generating bundled provides. Or did I misunderstand that part?

In [2]: issubclass(KeyboardInterrupt, Exception)
Out[2]: False
hm?

Good catch. Both are from BaseException, so we need to be explicit. I will update.

@aplanas The code seems to be fine except of small comment above...
Oh, and btw -- I think it does not ignore dev-dependencies when generating bundled provides. Or did I misunderstand that part?

I need to check, but this can be true.

I need to check, but this can be true.

If you can make it to ignore them, would be cool. The devel stuff should not leak into the resulting binary license / provides.

Can you please remove those bits from the patch? They are not relevant, and make both the patch and the resulting code harder to read.

Sorry for the review in bits and pieces. Pagure unfortunately isn't suited to anything except the most trivial reviews. I'll just write my comments here in a single comment. I hope that'll be easier to read.

I think the first patch should just be dropped. The formatting changes are for the worse, and the change to exception handling introduces a bug.

 -             for f in features:
+             data.extend(md.provides(f) for f in features)

Not terribly important, but this is O(n²). Maybe data += [md.provides(f) for f in features]?

+ for line in sorted({d for dat in data for d in str(dat).splitlines()}): 

Hmm, that looks very iffy. I assume the set is there to remove duplication. OK, great, so why not use a set from the beginning? Also, you first join lines into a string and then split them... Please just don't join them in the first place, but extend the list (or set preferably) with separate items.

+         if not members:
+             members = [path]
+         for member in members:

Maybe for member in (members or [path]): ?

def _go_to_top_cargo_toml(source_path, path):
   ...

That isn't nice. Please just make the code work relative to a specified directory. Jumping around like this only make sense in an interactive shell, and not in library code which should never modify any global state.

 dependencies = []
 for ... in ...:
    dependencies.append(...)

Please use a list comprehension.

 +     files = [_cargo_toml(args.path, f.rstrip()) for f in files] 

Why this chunk?

"Force print vendoring provides"
– that's not clear at all.

"Path where the source project is living"
– "Path to the source project" ?

 +         if not output.returncode:
+             manifest = json.loads(output.stdout)

So... if there's an error, this just silences it and returns an empty manifest. Sorry, but proper error handling needs to be implemented.

 +         f = f.rstrip() 

Why this line? Is something feeding badly formatted data?

+         mds = Metadata.from_file(f)
+         if isinstance(mds, list):
+             for md in mds:
+                 process_metadata(md)
+         else:
+             process_metadata(mds)

from_file() should just return a list.

 +                 return [
+                     cls.from_json(Metadata.manifest(m)) for m in members
+                 ]

Please make this one line.

rebased onto a2d766f

a year ago

5 new commits added

  • metadata: parse read-manifest directly
  • inspector: update path flag help
  • inspector: rename force flag
  • metadata: simplify expression
  • inspector: work directly with a set
a year ago

I think the first patch should just be dropped. The formatting changes are for the worse, and the change to exception handling introduces a bug.

Uhmm dropped.

I feel tho that we should use a linter. Today black is a tool that any lsp for Python is using, and impose a normalization in the formatting. I will try to do a separate commit for the format adjustment.

- for f in features: + data.extend(md.provides(f) for f in features)
Not terribly important, but this is O(n²). Maybe data += [md.provides(f) for f in features]?

I do not think so... both are O(n) (for n=len(features) in the first case, and n=len(data + features) in the second.

timeit shows that both grows linearly.

+ for line in sorted({d for dat in data for d in str(dat).splitlines()}):
Hmm, that looks very iffy. I assume the set is there to remove duplication. OK, great, so why not use a set from the beginning?. Also, you first join lines into a string and then split them... Please just don't join them in the first place, but extend the list (or set preferably) with separate items.

Indeed. Done.

+ if not members: + members = [path] + for member in members:
Maybe for member in (members or [path]): ?

IMHO the internal or hide a bit the intention, but I do not mind to change it. Done.

def _go_to_top_cargo_toml(source_path, path): ...
That isn't nice. Please just make the code work relative to a specified directory. Jumping around like this only make sense in an interactive shell, and not in library code which should never modify any global state.

I will try, but some cargo commands require to be in some specific place. If I address this, will be in a future commit.

dependencies = [] for ... in ...: dependencies.append(...)
Please use a list comprehension.

Not possible. I am fetching name and version form one field of the element that I iterate, that is used later to build the Dependency object. Or do you see a way?

+ files = [_cargo_toml(args.path, f.rstrip()) for f in files]
Why this chunk?

What do you mean? This is collecting all the "Cargo.toml" present in the project, but searching from the executable name. The input can be from the command line parameter, or from stdin. IIRC the rstrip() was needed because depending of the input method, one will add a \n at the end.

"Force print vendoring provides"
– that's not clear at all.

Right. I changed --force with --vendor and updated the help. Done.

"Path where the source project is living"
– "Path to the source project" ?

Done

+ if not output.returncode: + manifest = json.loads(output.stdout)
So... if there's an error, this just silences it and returns an empty manifest. Sorry, but proper error handling needs to be implemented.

I changed this a bit, but actually yes. If it fails (because a corner case in the read-manifest or metadata subcomands) we want to continue with the next Cargo.toml, and generate an RPM, even with incomplete information.

I see this code as an approximation, that I hope will iterate more later.

+ f = f.rstrip()
Why this line? Is something feeding badly formatted data?

This line is a deletion, I removed it. But yes, as commented before is a difference between when the data comes from a parameter or comes from stdin

+ mds = Metadata.from_file(f) + if isinstance(mds, list): + for md in mds: + process_metadata(md) + else: + process_metadata(mds)
from_file() should just return a list.

It does, I think that you are commenting in an old code.

+ return [ + cls.from_json(Metadata.manifest(m)) for m in members + ]
Please make this one line.

Uhm, I think that this is not there anymore. But this is an indication that a run for black is needed, so we can avoid point to this kind of changes.

Done all except the one about the change dir one. Thanks for the reviews!

@ignatenkobrain @aplanas What else is left here to get this to land?

@ignatenkobrain @aplanas What else is left here to get this to land?

I am deeply ashamed. There are some changes that I need to do in this PR (as I recall not mutch), but I am always with something more to do in a different area.

Maybe a problem of this PR is that is mixing two topics: the bundled() for tracking and the improvement of the workspaces. I can try to split it, I just need to invest time for getting the context again and finalizing the job.

I am close to finalizing a project and I can try to put this PR as a top priority before moving to the next (I am very bad working on parallel with non-trivial problems : ( ). I am still very very interested in resolving this problem.

No worries, we appreciate you working on this! If you need help, just feel free to reach out. :smile:

rebased onto fb71388

7 months ago

2 new commits added

  • Document _cargo_toml
  • Drop _go_to_top_cargo_toml
7 months ago

16 new commits added

  • Remove duplicated output at the end
  • inspector: dont collect all workspaces by default
  • metadata: virtual manifest dont have JSON
  • metadata: fix __repr__ for None feature
  • Document _cargo_toml
  • Drop _go_to_top_cargo_toml
  • metadata: parse read-manifest directly
  • inspector: update path flag help
  • inspector: rename force flag
  • metadata: simplify expression
  • inspector: work directly with a set
  • metadata: support workspaces for non-virtual crates
  • Add bundled() provider for vendoring
  • metadata: add simple version evaluator
  • inspector: find Cargo.toml from binary
  • metadata: support virtual manifest
7 months ago

I think that this can be reviewed again, sorry for the delay.

I addressed some of the older complains, like for example do not chdir into the Cargo.toml. I tested it against a version of zola with vedoring (creating .cargo/config.toml after calling cargo vendor), and with kanidm as was the principal motivator (as is a good example of package with pure virtual manifest)

There are some missing points:

  • License math. This can go in a different PR, and I think that can first be coupled only to the --provides-vendor option, and later discussed if makes sense in the generic case as the executable will include the crates with independence of is the source RPM has vendoring or not.

  • Propagate in the closure the enabled features of the package. I suspect that the current logic of ._closure() is maybe including too much because mismanagement of features enabled.

  • Full refactoring of the code and full test coverage. This second time I had problems understanding the kind of object that Metadata.from_json is creating. Also I am not 100% sure that we have a right model for features.

@aplanas sorry for the delay. I would love to get this PR merged. I managed to reduce the number of open PRs down to two today, so I think there's hope ;)

Can you check that there are no rebase errors in your patches? I went by the order that pagure shows, and it seems that some patches reintroduce previous versions. Maybe pagure is showing things in the wrong order, in which case some of the comments below can be ignored.

It would be great if you could add some examples of output in the commit messages. This will make it easier to review the code and figure out what it is expected to be doing.

a [Dependency] instances

Typo.

for line in sorted(data):

This assumes that we don't have duplicates. (Before we had a set, so duplicates were removed automatically). Shouldn't this be sorted(set(data)) ?

# Pure virtual manifest cannot be readed, we need to use

"read"

  try:
        result = json.loads(output.stdout)

This is pretty ugly. Can we recognize the non-json output somehow?
It would be nicer to have something like

      if output.stdout == <something>:  
           return {}
      return json.loads(output.stdout)
  • features = sorted(feature or "" for feature in self.features)
  • return f"<Dependency: {self.name} {self.req} ({', '.join(features)})>"

This will generate ", ,", which is not nice. Please do instead:
features = sorted(feature for feature in self.features if features is not None)
or
features = sorted(filter(None, self.features))

Drop _go_to_top_cargo_toml

Please comment why this is OK.

metadata: parse read-manifest directly

This is sloppy. If check==False, we'll try to parse potentially borked output. I think it should still return {} if the command failed.

inspector: rename force flag

This breaks backwards compatibility, but I think it's OK. The old name was rather bad, and it's easy enough to adjust to the new name. If people complain, we can add the old name back as hidden.

metadata: support workspaces for non-virtual crates

Eh, the code is still ugly. In particular, this processing of entries by creating and splitting strings is ugly and hard to understand. But let's leave it like this for now. This is not terribly important and shouldn't block this patchset.

  • if not members:
  • members = [path]

Hmm, one of the previous patches converted that to for member in members or [path]… Why resurrect the old form here?

raise Exception(f'Cargo.toml not found for binary {binary}')

raise FileNotFoundError(f'Cargo.toml not found for binary {binary}')

  • files = [_cargo_toml(args.path, f.rstrip()) for f in files]

That looks iffy. Where does the extra whitespace come from? If files is e.g. read from stdin, then it should be split and stripped properly there.

       ` instance = cls.from_json(Metadata.manifest(path))`
        `instance._path = path `

Please just add path argument to .from_json(). This will also obsolete the code below which sets ._path in a loop.

  • group.add_argument("-p", "--path", default=os.getcwd(), help="Path where the source project is living")

One of the previous commits changed that string to "Path to the source project", which is much better. Why resurrect the old version here?

  • # If fails, we still check that is a workspace

Garbled sentence.

rebased onto dc1935f

20 days ago

@aplanas sorry for the delay. I would love to get this PR merged. I managed to reduce the number of open PRs down to two today, so I think there's hope ;)

No problem! Thanks a lot for taking time to do the review of a no-small patch.

Can you check that there are no rebase errors in your patches?

I rebased the code.

I will have a hard time reading your review tho. The markdown is messing the format here in pagure. I wonder if is possible to do the review in-line, in the code. This will help to get me in the context. I have the email, that shows the raw content, and I will try to work from there, tho.

Typo

Fixed.

This assumes that we don't have duplicates. (Before we had a set, so duplicates were removed automatically). Shouldn't this be sorted(set(data)) ?

You can see in line 140 that data is declared as a set, and the for is already updating it. At this point in the code data is still a set.

Done.

This is pretty ugly. Can we recognize the non-json output somehow?
It would be nicer to have something like
if output.stdout == <something>: return {} return json.loads(output.stdout)

Uhm no. This is the correct way in Python ("it’s easier to ask for forgiveness than permission"). It is the idiomatic way here.

Detecting if a random string is a valid JSON document requires the parse of it, and exit in the first error that invalidate the parse. This is what we are doing.

This will generate ", ,", which is not nice. Please do instead:
features = sorted(feature for feature in self.features if features is not None)
or
features = sorted(filter(None, self.features))

Ouch, indeed! I liked a variation of the first proposal. Fixed

Drop _go_to_top_cargo_toml

Please comment why this is OK.

No need, this commit is removing code introduced before in this same patch set. Instead for altering the old commit, I created a new one. Maybe is better if I squash all the commits in a single one?

Maybe is better if I squash all the commits in a single one?

Please don't squash all commits into one. The commits do many different things, and it'd be very hard to review the whole thing as one giant patch. But if there's some cleanups to earlier patches, then yeah, maybe it'd be nicer to squash them.

Anyway, please let me know when I should review again.

Maybe is better if I squash all the commits in a single one?

Please don't squash all commits into one. The commits do many different things, and it'd be very hard to review the whole thing as one giant patch. But if there's some cleanups to earlier patches, then yeah, maybe it'd be nicer to squash them.

This is a 2 yo patch, and I noticed that you are reviewing intra-changes as they where present in the original code. Also some of the patches are review in reverse time order, making like I did go from the better approach to the worse one, when if you check the final changes the good one is present at the end.

I think that squash here will help the review.

Anyway, please let me know when I should review again.

Sure. Let me finish to address the concerns and I will write here!

raise FileNotFoundError(f'Cargo.toml not found for binary {binary}')

Done.

That looks iffy. Where does the extra whitespace come from? If files is e.g. read from stdin, then it should be split and stripped properly there.

Uhmm ok. I moved the rstrip() to the previous line. For me seems more confusing now, tho.

Please just add path argument to .from_json(). This will also obsolete the code below which sets ._path in a loop.

Yes. Done.

1 new commit added

  • Fixes from the last review
20 days ago

Ok. Rebased and I added a new last commit with the changes. For the next review, please, use the last changes, should be easier:

https://pagure.io/fedora-rust/rust2rpm/pull-request/105#request_diff

The code looks OK. But I tried to make use of this, and I couldn't figure it out… Can you please provide a short instruction how to make use of this for some example project?

I know. Need some time, as I need to spend time on it.

Sorry to be a bother, but this is the last pr standing, and I'd like to release a fresh version for F35 once this is merged.

rebased onto b746a2e

6 days ago

Ouch indeed. Here are some nodes (also I rebased)

Testing the virtual manifest

Some packages, like kanidm or zola, are build only using
workspaces. The current code will fail with this, for example:

cd zola
zola> cargo-inspector -n zola
error: the manifest-path must be a path to a Cargo.toml file
...

But will work with the branch:

zola> cargo-inspector -n zola
zola

The rest of the commands should behave properly.

Testing multi-binaries

Some packages have many binaries, stored in different workspaces /
crates.

cd kanidm
kanidm> cargo-inspector -n kanidm
kanidm_tools
kanidm> cargo-inspector -n kanidm_badlist_preprocess
kanidm_tools
cargo-inspector -n kanidmd
kanidm

Testing the bundled

If the vendor directory is present and the new RPM generators are
installed, it will print the bundled() information in stdout, so it
can be integrated in the packages as extra "provides".

cd zola
cargo vendor
mkdir .cargo
cat <<EOF > .cargo/config.toml
[source.crates-io]
replace-with = "vendored-sources"

[source.vendored-sources]
directory = "vendor"
EOF
cargo-inspector --provides-vendor --path . zola
bundled(crate(adler/default)) = 1.0.2
bundled(crate(adler32/default)) = 1.2.0
bundled(crate(adler32/std)) = 1.2.0
...

Should not break any old behavior

Yeah ...

Commit "metadata: support workspaces for non-virtual crates" creates a type error:

$ PYTHONPATH=$HOME/python/rust2rpm python -m rust2rpm zram-generator                                       
Traceback (most recent call last):
  File "/usr/lib64/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib64/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/zbyszek/python/rust2rpm/rust2rpm/__main__.py", line 472, in <module>
    main()
  File "/home/zbyszek/python/rust2rpm/rust2rpm/__main__.py", line 366, in main
    bins = [tgt for tgt in metadata.targets if tgt.kind == "bin"]
AttributeError: 'list' object has no attribute 'targets'

Some parts of the code were adjusted to expect a list of Metadata objects, but the code here still expects a single Metadata object.

Looks good apart from that.

1 new commit added

  • rust2rpm: return single metadata
5 days ago

Some parts of the code were adjusted to expect a list of Metadata objects, but the code here still expects a single Metadata object.

Indeed! I added commit for address this.

Pull-Request has been merged by zbyszek

5 days ago

\o/

I will not be surprised that there are unrelated problems later because this patch in OBS or in the build system used in Fedora. Feel free to assign bugs to me.