#105 Add bundled() provider for vendoring
Opened a year ago by aplanas. Modified a month ago
fedora-rust/ aplanas/rust2rpm workspace  into  master

@@ -0,0 +1,3 @@ 

+ %__cargo_bundled_provides %{_bindir}/cargo-inspector --provides-vendor --path %{_builddir}

+ %__cargo_bundled_flags    exeonly

+ %__cargo_bundled_magic    ^(setuid,? )?(setgid,? )?(sticky )?ELF (32|64)-bit.*executable

file modified
+93 -23
@@ -1,8 +1,53 @@ 

  import argparse

+ import glob

+ import os

  import sys

  

  from . import Metadata

- from .metadata import normalize_deps

+ from .metadata import normalize_deps, Dependency

+ 

+ 

+ def _get_binaries(cargo_toml):

+     manifest = Metadata.manifest(cargo_toml, check=False)

+     return [

+         t['name'] for t in manifest.get('targets', []) if 'bin' in t['kind']

+     ]

+ 

+ 

+ def _cargo_toml(source_path, path, exclude_vendor=True):

+     """Find a Cargo.toml related with a binary."""

+     # Finding the Cargo.toml is a bit tricky, and the general idea is

+     # like this:

+     #

+     #   - `source_path`: is set but the dependecy generator call

+     #     (cargo_bundled.attr) to point to `%{_builddir}`, so where

+     #     the sources are (`.../rpmbuild/BUILD`)

+     #

+     #   - `path`: can point to an executable when called by

+     #     cargo_bundler.attr, that lives in `%{_buildroot}`, for

+     #     example `.../rpmbuild/BUILDROOT/<PKGNAME>/usr/bin/<BINNAME>`

+     #

+     #   - `path`: can also point to a Cargo.toml when called from

+     #     command line

+     #

+     #   - If `path` is a binary, we search the Cargo.toml from

+     #     `source_path` that generates this specific binary

+ 

+     # If path is already a Cargo.toml file, we are done

+     binary_or_cargo_toml = os.path.basename(path)

+     if binary_or_cargo_toml == 'Cargo.toml':

+         return os.path.join(source_path, path)

+ 

+     cargo_tomls = glob.glob(os.path.join(source_path, '**/Cargo.toml'),

+                             recursive=True)

+     for cargo_toml in cargo_tomls:

+         if exclude_vendor and 'vendor' in cargo_toml.split(os.sep):

+             continue

+         if binary_or_cargo_toml in _get_binaries(cargo_toml):

+             return cargo_toml

+ 

+     raise Exception(f'Cargo.toml not found for binary {binary_or_cargo_toml}')

+ 

  

  def main():

      parser = argparse.ArgumentParser()
@@ -16,56 +61,81 @@ 

      group.add_argument("-R", "--requires", action="store_true", help="Print Requires")

      group.add_argument("-BR", "--build-requires", action="store_true", help="Print BuildRequires")

      group.add_argument("-TR", "--test-requires", action="store_true", help="Print TestRequires")

+     group.add_argument("-PV", "--provides-vendor", action="store_true", help="Print Provides when vendoring")

      fgroup = parser.add_mutually_exclusive_group()

      fgroup.add_argument("-a", "--all-features", action="store_true", help="Activate all features")

      fgroup.add_argument("-f", "--features", default="default", help="Feature(s) to work on (comma-separated)")

+     parser.add_argument("--vendor", action="store_true", help="Print vendoring provides (even when not vendored)")

+     parser.add_argument("--include-workspaces", action="store_true", help="Include workspaces in the analysis")

+     parser.add_argument("-p", "--path", help="Path to the source project")

      parser.add_argument("file", nargs="*", help="Path(s) to Cargo.toml")

      args = parser.parse_args()

  

+     args.path = os.path.abspath(args.path) if args.path else os.getcwd()

+ 

      files = args.file or sys.stdin.readlines()

+     files = [_cargo_toml(args.path, f.rstrip()) for f in files]

  

      features = set()

      for f in args.features.split(","):

          features.add(f or None)

  

-     def print_deps(deps):

-         if len(deps) > 0:

-             print("\n".join(sorted(normalize_deps(deps))))

- 

-     for f in files:

-         f = f.rstrip()

-         md = Metadata.from_file(f)

+     def process_metadata(md):

+         data = []

          if args.name:

-             print(md.name)

+             data.append(md.name)

          if args.version:

-             print(md._version)

+             data.append(md._version)

          if args.rpm_version:

-             print(md.version)

+             data.append(md.version)

          if args.target_kinds:

-             print("\n".join(set(tgt.kind for tgt in md.targets)))

+             data.extend(tgt.kind for tgt in md.targets)

          if args.list_features:

-             for f in sorted(f for f in md.dependencies if f is not None):

-                 print(f)

+             data.extend(f for f in md.dependencies if f is not None)

          if args.provides:

-             for f in features:

-                 print(md.provides(f))

+             data.extend(md.provides(f) for f in features)

          if args.requires:

              # Someone should own /usr/share/cargo/registry

-             print("cargo")

+             data.append('cargo')

              if args.all_features:

-                 print_deps(md.all_dependencies)

+                 data.extend(md.all_dependencies)

              else:

                  for f in features:

-                     print_deps(md.requires(f))

+                     data.extend(md.requires(f))

          if args.build_requires:

-             print("rust-packaging")

+             data.append("rust-packaging")

              if args.all_features:

-                 print_deps(md.all_dependencies)

+                 data.extend(md.all_dependencies)

              else:

                  for f in features:

-                     print_deps(md.requires(f, resolve=True))

+                     data.extend(md.requires(f, resolve=True))

          if args.test_requires:

-             print_deps(md.dev_dependencies)

+             data.extend(md.dev_dependencies)

+         if args.provides_vendor:

+             # Print the vendoring providers only if the 'vendor'

+             # directory is present

+             if args.vendor or os.path.isdir('vendor'):

+                 data.extend(md.resolved_dependencies())

+         return data

+ 

+     for f in files:

+         data = set()

+         mds = Metadata.from_file(f, include_members=args.include_workspaces)

+         for md in mds:

+             # process_metadata can return an [string], but can be also

+             # a [Dependency] instances, that once presented have

+             # multiple substrings.  If we want to order the data and

+             # remove all the duplicates we should first normalize it

+             metadata_lines = []

+             for metadata in process_metadata(md):

+                 if isinstance(metadata, Dependency):

+                     metadata_lines.extend(metadata.normalize())

+                 else:

+                     metadata_lines.append(metadata)

+             data.update(metadata_lines)

+         for line in sorted(data):

+             print(line)

+ 

  

  if __name__ == "__main__":

      main()

file modified
+146 -8
@@ -3,8 +3,10 @@ 

  import collections

  import copy

  import json

+ import os.path

  import re

  import subprocess

+ from urllib.parse import urlparse

  

  

  Requirement = collections.namedtuple('Requirement', ('kind',
@@ -162,6 +164,35 @@ 

              raise ValueError(f'Found unhandled kind: {requirement}')

          return normalized

  

+     @staticmethod

+     def eval_(v1, op, v2):

+         if op == CargoSemVer.KIND_SHORTEQ:

+             return all((v1.major == v2.major,

+                         v1.minor == v2.minor,

+                         v1.patch == v2.patch))

+         elif op == CargoSemVer.KIND_GT:

+             return ((v1.major > v2.major) or

+                     (v1.major == v2.major and v1.minor > v2.minor) or

+                     (v1.major == v2.major and v1.minor == v2.minor and

+                      v1.patch > v2.patch))

+         elif op == CargoSemVer.KIND_GTE:

+             return ((v1.major >= v2.major) or

+                     (v1.major == v2.major and v1.minor >= v2.minor) or

+                     (v1.major == v2.major and v1.minor == v2.minor and

+                      v1.patch >= v2.patch))

+         elif op == CargoSemVer.KIND_LT:

+             return ((v1.major < v2.major) or

+                     (v1.major == v2.major and v1.minor < v2.minor) or

+                     (v1.major == v2.major and v1.minor == v2.minor and

+                      v1.patch < v2.patch))

+         elif op == CargoSemVer.KIND_LTE:

+             return ((v1.major <= v2.major) or

+                     (v1.major == v2.major and v1.minor <= v2.minor) or

+                     (v1.major == v2.major and v1.minor == v2.minor and

+                      v1.patch <= v2.patch))

+         else:

+             raise ValueError(f'Cannot evaluate operator: {op}')

+ 

  

  class Target:

      def __init__(self, name, kind):
@@ -173,11 +204,13 @@ 

  

  

  class Dependency:

-     def __init__(self, name, req=None, features=(), optional=False):

+     def __init__(self, name, req=None, features=(), optional=False,

+                  bundled=False):

          self.name = name

          self.req = req

          self.features = features

          self.optional = optional

+         self.bundled = bundled

  

      @classmethod

      def from_json(cls, metadata):
@@ -191,9 +224,11 @@ 

          return cls(**kwargs)

  

      @staticmethod

-     def _apply_reqs(name, reqs, feature=None):

+     def _apply_reqs(name, reqs, feature=None, bundled=False):

          fstr = f"/{feature}" if feature is not None else ""

          cap = f"crate({name}{fstr})"

+         if bundled:

+             cap = f"bundled({cap})"

          if not reqs:

              return cap

          deps = ' with '.join(
@@ -206,11 +241,13 @@ 

  

      def normalize(self):

          semver = CargoSemVer(self.req)

-         return [self._apply_reqs(self.name, semver.normalized, feature)

+         return [self._apply_reqs(self.name, semver.normalized, feature,

+                                  self.bundled)

                  for feature in self.features or (None,)]

  

      def __repr__(self):

-         return f"<Dependency: {self.name} {self.req} ({', '.join(sorted(self.features))})>"

+         features = sorted(feature or "" for feature in self.features)

+         return f"<Dependency: {self.name} {self.req} ({', '.join(features)})>"

  

      def __str__(self):

          return "\n".join(self.normalize())
@@ -230,6 +267,7 @@ 

          self.targets = set()

          self.dependencies = {}

          self.dev_dependencies = set()

+         self._path = None

  

      @property

      def description(self):
@@ -333,10 +371,45 @@ 

          return self

  

      @classmethod

-     def from_file(cls, path):

-         metadata = subprocess.check_output(["cargo", "read-manifest",

-                                             f"--manifest-path={path}"])

-         return cls.from_json(json.loads(metadata))

+     def from_file(cls, path, include_members=False):

+         instances = []

+         members = Metadata.members(path) if include_members else []

+         for member in (members or [path]):

+             instance = cls.from_json(Metadata.manifest(member))

+             instance._path = member

+             instances.append(instance)

+         return instances

+ 

+     @staticmethod

+     def manifest(path, check=True):

+         output = subprocess.run(

+             ["cargo", "read-manifest", f"--manifest-path={path}"],

+             check=check, capture_output=True

+         )

+         try:

+             result = json.loads(output.stdout)

+         except json.decoder.JSONDecodeError:

+             # Pure virtual manifest cannot be readed, we need to use

+             # one from the different workspaces

+             result = {}

+         return result

+ 

+     @staticmethod

+     def metadata(path, deps=False):

+         cmd = ["cargo", "metadata", "--format-version=1",

+                f"--manifest-path={path}"]

+         if not deps:

+             cmd.append("--no-deps")

+         return json.loads(subprocess.check_output(cmd))

+ 

+     @staticmethod

+     def members(path):

+         members = []

+         metadata = Metadata.metadata(path)

+         for workspace in metadata.get('workspace_members', []):

+             path = re.search(r'\((.*)\)', workspace).group(1)

+             members.append(os.path.join(urlparse(path).path, 'Cargo.toml'))

+         return members

  

      @property

      def all_dependencies(self):
@@ -369,6 +442,71 @@ 

                          for feature in features)

              return fdeps | deps

  

+     @staticmethod

+     def _match_crate(dependency, metadata):

+         for crate in metadata['resolve']['nodes']:

+             name, version, _ = crate['id'].split()

+             if name != dependency.name:

+                 continue

+             v1 = CargoSemVer.parse_version(version)

+             normalized = CargoSemVer(dependency.req).normalized

+             if all(CargoSemVer.eval_(v1, op, v2) for op, v2 in normalized):

+                 return crate

+ 

+     @staticmethod

+     def _find_crate(dependency, metadata):

+         for crate in metadata['resolve']['nodes']:

+             if dependency == crate['id']:

+                 return crate

+ 

+     @staticmethod

+     def _closure(dependencies, metadata):

+         # It is not very clear how to decide, for a workspace, what

+         # features are enabled for a package after resolving all the

+         # dependencies. We can trace back from the initial set of

+         # dependencies / features, until the final set of packages

+         # listed in `cargo metadata`, but this will imply replicate

+         # the resolution algorithm in cargo.

+         #

+         # For now we will do a simple closure for all the dependencies

+         # declared in the toml file, over the resolved dependencies

+         # from resolve/nodes/deps from the metadata, and will include

+         # all the features enabled for each package.

+         #

+         closure = []

+         # Find the correct version of the initial dependencies

+         for dep in dependencies:

+             crate = Metadata._match_crate(dep, metadata)

+             if not crate:

+                 raise ValueError(f'Cannot find crate for {dep}')

+             closure.append(crate)

+ 

+         # Close over the initial packages

+         for crate in closure:

+             for dep in crate['dependencies']:

+                 crate = Metadata._find_crate(dep, metadata)

+                 if not crate:

+                     raise ValueError(f'Cannot find crate for {dep}')

+                 if crate not in closure:

+                     closure.append(crate)

+ 

+         # Transform the crate information to a dependency

+         dependencies = []

+         for crate in closure:

+             name, version, _ = crate['id'].split()

+             dependencies.append(Dependency(name, f'={version}',

+                                            crate['features'] or ('default',),

+                                            bundled=True))

+         return dependencies

+ 

+     def resolved_dependencies(self, feature=None):

+         if not self._path:

+             raise ValueError('Metadata instance without Cargo.toml associated')

+ 

+         initial_deps = self._resolve(self.dependencies, feature)[1]

+         metadata = Metadata.metadata(self._path, deps=True)

+         return Metadata._closure(initial_deps, metadata)

+ 

  

  def normalize_deps(deps):

      return set().union(*(d.normalize() for d in deps))

  • support virtual manifest

    Some Cargo.toml manifest files are only references to workspaces. Metadata now recognize those and replace it with all the Cargo.toml files that compose this workspace.

  • find Cargo.toml from binary

    We can pass as a parameter a binary name, instead of a Cargo.toml. cargo-inspector will dig into all the Cargo.toml in the workspace (if any), and find the one that declare a binary target that match the binary file.

  • add simple version evaluator

    Extend the CargoSemVer class to check if a version march the requirements. Will be used in future commits

  • Add bundled() provider for vendoring

    This commit add basic support for crate vendoring in Rust. The user will
    use something like cargo vendor to create a vendor directory (that can
    later be deployed as a tgz) that contains all the dependencies.

    This patch will analyze the output of cargo manifest to calculate the
    closure of dependencies, and via the new parameter --provides-vendor,
    print all the 'bundled(crate(NAME/FEATURE)) = 0.0.0' provided by the
    binary.

    The algorithm is not perfect, as today it will include all the features
    resolved for the crate (not all the availables, tho), but basically is
    something like:

    1.- A dependency generator macro, cargo_bundled, will call
    cargo-inspector like this:

    # In STDIN we will provide the name of the binary
    cargo-inspector --provides-vendor --path %{_builddir}

    2.- cargo-inspector will search inside the 'path' tree a Cargo.toml that
    generate the binary name send via STDIN.

    3.- From this point, we go up to the tree to find the top-most
    Cargo.toml, as this will be the directory where .cargo/config is living.
    We make this directory our cwd.

    4.- Using the metadata from cargo manifest, we generate the closure of
    dependencies required by this binary. To simplify the problem, the
    current code do not resolve the features, and accept the one resolved by
    cargo as valid. Most of the time this will be OK, maybe will include
    some extra features needed for other binaries.

    5.- Print the 'bundled()' data.

    This code will only be executed in the directory 'vendor' is present in
    the top-most directory found on step 3.

Any chance for a review? We can discuss also the convenience of this approach too.

Hey Alberto,

First of all, thank you for taking time to work on this project and sending this PR :)

I wanted to test this on Zola, but this did not really work. I guess because you have done this only for provides and not for requires.

Can we also extend this to requires/buildrequires? Basically one thing I would love to see is support for workspaces.


Regarding the bundled provides, I think what you have described in here is good enough and can be improved later. Obviously the problem would be if you do %cargo_install and then mv /usr/bin/foo /usr/libexec/foo-init and then pass this thing.. We do that for stratisd, for instance. Also, the features are not going to be propagated (e.g. if you build binary with --no-default-features, the code will still look for default features).

At this point I start to think we need to design some format which would say which binaries to build, in which format and with which features as part of rust2rpm.conf so that inspector could take advantage of this.

That said, I like the idea (though I did not have time to look into the code, hopefully this weekend I will) and I would like to make workspaces handling better as part of this PR. If you will make zola.spec to build with support of workspace/path dependencies, it would be great.

Igor, thanks a lot for making an effort and reviewing this PR that comes from no-where, to support a couple of features that were never discussed : ))

I wanted to test this on Zola, but this did not really work. I guess because you have done this only for provides and not for requires.
Can we also extend this to requires/buildrequires?

Oh I get lost. The last commit only makes sense for Provides no? When vendoring the binary will contain libraries that are Provides: bundled(). This only makes sense when there is a binary in the package, not for crates / libraries. So in the binary, there is not requires anymore, and because is vendored, no buildrequires neither. Is this correct?

The case of use that I have in mind is like this one:

https://build.opensuse.org/package/show/home:firstyear:kanidm/kanidm

The developer decided for a first iteration to provide a vendor.tgz as part of the project. The script will takes care of not publishing any bundled(crates()) if there is not a vendor directory in the BUILD project.

Basically one thing I would love to see is support for workspaces.

This is the first commit. This is an easy hack: take the workspace information and iterate over the different Cargo.toml that compose it. Do you think that we can do more?

Regarding the bundled provides, I think what you have described in here is good enough and can be improved later.

Obviously the problem would be if you do %cargo_install and then mv /usr/bin/foo /usr/libexec/foo-init and then pass this thing..

Ouch! Indeed, the binary will not be found. Very good point. No idea how can I track this ...

We do that for stratisd, for instance.

Do you think that there is a clean fix for this?

Also, the features are not going to be propagated (e.g. if you build binary with --no-default-features, the code will still look for default features).

I need to learn about this. IIUC this will not enable de default feature, but still your Cargo.toml can require packages with some features enabled.

But you are right, cargo build can have this --no-default-features enabled, but my call to manifest is always without this parameter. Again, not sure how to detect that.

Another problem that I see is that the metadata is giving me the resolved dependencies for all the project, not for the sub-workspace that generate this binary. In other words: cargo metadata --manifest-path sub-workspace/Cargo.toml is giving me the same output from cargo metadata --manifest-path another-sub-workspace/Cargo.toml

At this point I start to think we need to design some format which would say which binaries to build, in which format and with which features as part of rust2rpm.conf so that inspector could take advantage of this.

This can be an interesting idea. Are you suggesting that some magic %cargo_build macro will track the compilation options to this file, so cargo-inspect that have later this info?

That said, I like the idea (though I did not have time to look into the code, hopefully this weekend I will) and I would like to make workspaces handling better as part of this PR. If you will make zola.spec to build with support of workspace/path dependencies, it would be great.

I will check!

4 new commits added

  • Add bundled() provider for vendoring
  • metadata: add simple version evaluator
  • inspector: find Cargo.toml from binary
  • metadata: support virtual manifest
a year ago

1 new commit added

  • metadata: support workspaces for non-virtual crates
a year ago

5 new commits added

  • metadata: support workspaces for non-virtual crates
  • Add bundled() provider for vendoring
  • metadata: add simple version evaluator
  • inspector: find Cargo.toml from binary
  • metadata: support virtual manifest
a year ago

rebased onto fa3961b

a year ago

@ignatenkobrain sorry for the delay, a Christmas holiday crossed in my way. I extended the PR to support workspaces for non-virtual crates like Zola.

I am worried that the refactor is a bit big, but the current test coverage very low, so I do not know what I am breaking.

@ignatenkobrain did you have a chance to test the new code?

@aplanas So this looks mostly fine, but how do we handle license tag generation now? We need "license math" for all the dependencies factored in.

This has been confirmed to be a requirement in one package by @spot already.

@aplanas So this looks mostly fine, but how do we handle license tag generation now? We need "license math" for all the dependencies factored in.

Very good point. I will address this in this same PR.

rebased onto aa3beb5

a year ago

In [2]: issubclass(KeyboardInterrupt, Exception)
Out[2]: False

hm?

@aplanas The code seems to be fine except of small comment above...

Oh, and btw -- I think it does not ignore dev-dependencies when generating bundled provides. Or did I misunderstand that part?

In [2]: issubclass(KeyboardInterrupt, Exception)
Out[2]: False
hm?

Good catch. Both are from BaseException, so we need to be explicit. I will update.

@aplanas The code seems to be fine except of small comment above...
Oh, and btw -- I think it does not ignore dev-dependencies when generating bundled provides. Or did I misunderstand that part?

I need to check, but this can be true.

I need to check, but this can be true.

If you can make it to ignore them, would be cool. The devel stuff should not leak into the resulting binary license / provides.

Can you please remove those bits from the patch? They are not relevant, and make both the patch and the resulting code harder to read.

Sorry for the review in bits and pieces. Pagure unfortunately isn't suited to anything except the most trivial reviews. I'll just write my comments here in a single comment. I hope that'll be easier to read.

I think the first patch should just be dropped. The formatting changes are for the worse, and the change to exception handling introduces a bug.

 -             for f in features:
+             data.extend(md.provides(f) for f in features)

Not terribly important, but this is O(n²). Maybe data += [md.provides(f) for f in features]?

+ for line in sorted({d for dat in data for d in str(dat).splitlines()}): 

Hmm, that looks very iffy. I assume the set is there to remove duplication. OK, great, so why not use a set from the beginning? Also, you first join lines into a string and then split them... Please just don't join them in the first place, but extend the list (or set preferably) with separate items.

+         if not members:
+             members = [path]
+         for member in members:

Maybe for member in (members or [path]): ?

def _go_to_top_cargo_toml(source_path, path):
   ...

That isn't nice. Please just make the code work relative to a specified directory. Jumping around like this only make sense in an interactive shell, and not in library code which should never modify any global state.

 dependencies = []
 for ... in ...:
    dependencies.append(...)

Please use a list comprehension.

 +     files = [_cargo_toml(args.path, f.rstrip()) for f in files] 

Why this chunk?

"Force print vendoring provides"
– that's not clear at all.

"Path where the source project is living"
– "Path to the source project" ?

 +         if not output.returncode:
+             manifest = json.loads(output.stdout)

So... if there's an error, this just silences it and returns an empty manifest. Sorry, but proper error handling needs to be implemented.

 +         f = f.rstrip() 

Why this line? Is something feeding badly formatted data?

+         mds = Metadata.from_file(f)
+         if isinstance(mds, list):
+             for md in mds:
+                 process_metadata(md)
+         else:
+             process_metadata(mds)

from_file() should just return a list.

 +                 return [
+                     cls.from_json(Metadata.manifest(m)) for m in members
+                 ]

Please make this one line.

rebased onto a2d766f

6 months ago

5 new commits added

  • metadata: parse read-manifest directly
  • inspector: update path flag help
  • inspector: rename force flag
  • metadata: simplify expression
  • inspector: work directly with a set
6 months ago

I think the first patch should just be dropped. The formatting changes are for the worse, and the change to exception handling introduces a bug.

Uhmm dropped.

I feel tho that we should use a linter. Today black is a tool that any lsp for Python is using, and impose a normalization in the formatting. I will try to do a separate commit for the format adjustment.

- for f in features: + data.extend(md.provides(f) for f in features)
Not terribly important, but this is O(n²). Maybe data += [md.provides(f) for f in features]?

I do not think so... both are O(n) (for n=len(features) in the first case, and n=len(data + features) in the second.

timeit shows that both grows linearly.

+ for line in sorted({d for dat in data for d in str(dat).splitlines()}):
Hmm, that looks very iffy. I assume the set is there to remove duplication. OK, great, so why not use a set from the beginning?. Also, you first join lines into a string and then split them... Please just don't join them in the first place, but extend the list (or set preferably) with separate items.

Indeed. Done.

+ if not members: + members = [path] + for member in members:
Maybe for member in (members or [path]): ?

IMHO the internal or hide a bit the intention, but I do not mind to change it. Done.

def _go_to_top_cargo_toml(source_path, path): ...
That isn't nice. Please just make the code work relative to a specified directory. Jumping around like this only make sense in an interactive shell, and not in library code which should never modify any global state.

I will try, but some cargo commands require to be in some specific place. If I address this, will be in a future commit.

dependencies = [] for ... in ...: dependencies.append(...)
Please use a list comprehension.

Not possible. I am fetching name and version form one field of the element that I iterate, that is used later to build the Dependency object. Or do you see a way?

+ files = [_cargo_toml(args.path, f.rstrip()) for f in files]
Why this chunk?

What do you mean? This is collecting all the "Cargo.toml" present in the project, but searching from the executable name. The input can be from the command line parameter, or from stdin. IIRC the rstrip() was needed because depending of the input method, one will add a \n at the end.

"Force print vendoring provides"
– that's not clear at all.

Right. I changed --force with --vendor and updated the help. Done.

"Path where the source project is living"
– "Path to the source project" ?

Done

+ if not output.returncode: + manifest = json.loads(output.stdout)
So... if there's an error, this just silences it and returns an empty manifest. Sorry, but proper error handling needs to be implemented.

I changed this a bit, but actually yes. If it fails (because a corner case in the read-manifest or metadata subcomands) we want to continue with the next Cargo.toml, and generate an RPM, even with incomplete information.

I see this code as an approximation, that I hope will iterate more later.

+ f = f.rstrip()
Why this line? Is something feeding badly formatted data?

This line is a deletion, I removed it. But yes, as commented before is a difference between when the data comes from a parameter or comes from stdin

+ mds = Metadata.from_file(f) + if isinstance(mds, list): + for md in mds: + process_metadata(md) + else: + process_metadata(mds)
from_file() should just return a list.

It does, I think that you are commenting in an old code.

+ return [ + cls.from_json(Metadata.manifest(m)) for m in members + ]
Please make this one line.

Uhm, I think that this is not there anymore. But this is an indication that a run for black is needed, so we can avoid point to this kind of changes.

Done all except the one about the change dir one. Thanks for the reviews!

@ignatenkobrain @aplanas What else is left here to get this to land?

@ignatenkobrain @aplanas What else is left here to get this to land?

I am deeply ashamed. There are some changes that I need to do in this PR (as I recall not mutch), but I am always with something more to do in a different area.

Maybe a problem of this PR is that is mixing two topics: the bundled() for tracking and the improvement of the workspaces. I can try to split it, I just need to invest time for getting the context again and finalizing the job.

I am close to finalizing a project and I can try to put this PR as a top priority before moving to the next (I am very bad working on parallel with non-trivial problems : ( ). I am still very very interested in resolving this problem.

No worries, we appreciate you working on this! If you need help, just feel free to reach out. :smile:

rebased onto fb71388

a month ago

2 new commits added

  • Document _cargo_toml
  • Drop _go_to_top_cargo_toml
a month ago

16 new commits added

  • Remove duplicated output at the end
  • inspector: dont collect all workspaces by default
  • metadata: virtual manifest dont have JSON
  • metadata: fix __repr__ for None feature
  • Document _cargo_toml
  • Drop _go_to_top_cargo_toml
  • metadata: parse read-manifest directly
  • inspector: update path flag help
  • inspector: rename force flag
  • metadata: simplify expression
  • inspector: work directly with a set
  • metadata: support workspaces for non-virtual crates
  • Add bundled() provider for vendoring
  • metadata: add simple version evaluator
  • inspector: find Cargo.toml from binary
  • metadata: support virtual manifest
a month ago

I think that this can be reviewed again, sorry for the delay.

I addressed some of the older complains, like for example do not chdir into the Cargo.toml. I tested it against a version of zola with vedoring (creating .cargo/config.toml after calling cargo vendor), and with kanidm as was the principal motivator (as is a good example of package with pure virtual manifest)

There are some missing points:

  • License math. This can go in a different PR, and I think that can first be coupled only to the --provides-vendor option, and later discussed if makes sense in the generic case as the executable will include the crates with independence of is the source RPM has vendoring or not.

  • Propagate in the closure the enabled features of the package. I suspect that the current logic of ._closure() is maybe including too much because mismanagement of features enabled.

  • Full refactoring of the code and full test coverage. This second time I had problems understanding the kind of object that Metadata.from_json is creating. Also I am not 100% sure that we have a right model for features.