#1475 allow reuse of added components in some cases
Merged 4 years ago by mikem. Opened 4 years ago by mikem.
mikem/fm-orchestrator reuse-added-components  into  master

@@ -403,8 +403,8 @@ 

          # Create separate lists for the new and previous module build. These lists

          # will have an entry for every build batch *before* the component's

          # batch except for 1, which is reserved for the module-build-macros RPM.

-         # Each batch entry will contain a set of "(name, ref)" with the name and

-         # ref (commit) of the component.

+         # Each batch entry will contain a set of "(name, ref, arches)" with the name,

+         # ref (commit), and arches of the component.

          for i in range(new_module_build_component.batch - 1):

              # This is the first batch which we want to skip since it will always

              # contain only the module-build-macros RPM and it gets built every time
@@ -412,37 +412,34 @@ 

                  continue

  

              new_module_build_components.append({

-                 (value.package, value.ref)

+                 (value.package, value.ref,

+                     tuple(sorted(mmd.get_rpm_component(value.package).get_arches())))

                  for value in new_component_builds

                  if value.batch == i + 1

              })

  

              previous_module_build_components.append({

-                 (value.package, value.ref)

+                 (value.package, value.ref,

+                     tuple(sorted(old_mmd.get_rpm_component(value.package).get_arches())))

                  for value in prev_component_builds

                  if value.batch == i + 1

              })

  

-         # If the previous batches don't have the same ordering and hashes, then the

+         # If the previous batches don't have the same ordering, hashes, and arches, then the

          # component can't be reused

          if previous_module_build_components != new_module_build_components:

              message = ("Cannot reuse the component because a component in a previous"

-                        " batch has been rebuilt")

+                        " batch has been added, removed, or rebuilt")

              new_module_build_component.log_message(db_session, message)

              return None

  

-     for pkg_name in mmd.get_rpm_component_names():

-         pkg = mmd.get_rpm_component(pkg_name)

-         if pkg_name not in old_mmd.get_rpm_component_names():

-             message = ("Cannot reuse the component because a component was added or "

-                        "removed since the compatible module")

-             new_module_build_component.log_message(db_session, message)

-             return None

-         if set(pkg.get_arches()) != set(old_mmd.get_rpm_component(pkg_name).get_arches()):

-             message = ("Cannot reuse the component because the architectures for the package {}"

-                        " have changed since the compatible module build").format(pkg_name)

-             new_module_build_component.log_message(db_session, message)

-             return None

+     # check that arches have not changed

+     pkg = mmd.get_rpm_component(component_name)

+     if set(pkg.get_arches()) != set(old_mmd.get_rpm_component(component_name).get_arches()):

+         message = ("Cannot reuse the component because its architectures"

+                    " have changed since the compatible module build").format(component_name)

+         new_module_build_component.log_message(db_session, message)

+         return None

  

      reusable_component = db_session.query(models.ComponentBuild).filter_by(

          package=component_name, module_id=previous_module_build.id).one()

@@ -163,6 +163,86 @@ 

          tangerine = get_reusable_component(second_module_build, "tangerine")

          assert bool(tangerine is None) != bool(set_current_arch == set_database_arch)

  

+     @pytest.mark.parametrize(

+         "reuse_component",

+         ["perl-Tangerine", "perl-List-Compare", "tangerine"])

+     @pytest.mark.parametrize(

+         "changed_component",

+         ["perl-Tangerine", "perl-List-Compare", "tangerine"])

+     def test_get_reusable_component_different_batch(

+         self, changed_component, reuse_component, db_session

+     ):

+         """

+         Test that we get the correct reuse behavior for the changed-and-after strategy. Changes

+         to earlier batches should prevent reuse, but changes to later batches should not.

+         For context, see https://pagure.io/fm-orchestrator/issue/1298

+         """

+ 

+         if changed_component == reuse_component:

+             # we're only testing the cases where these are different

+             # this case is already covered by test_get_reusable_component_different_component

+             return

+ 

+         second_module_build = models.ModuleBuild.get_by_id(db_session, 3)

+ 

+         # update batch for changed component

+         changed_component = models.ComponentBuild.from_component_name(

+             db_session, changed_component, second_module_build.id)

+         orig_batch = changed_component.batch

+         changed_component.batch = orig_batch + 1

+         db_session.commit()

+ 

+         reuse_component = models.ComponentBuild.from_component_name(

+             db_session, reuse_component, second_module_build.id)

+ 

+         reuse_result = module_build_service.utils.get_reusable_component(

+             db_session, second_module_build, reuse_component.package)

+         # Component reuse should only be blocked when an earlier batch has been changed.

+         # In this case, orig_batch is the earliest batch that has been changed (the changed

+         # component has been removed from it and added to the following one).

+         assert bool(reuse_result is None) == bool(reuse_component.batch > orig_batch)

+ 

+     @pytest.mark.parametrize(

+         "reuse_component",

+         ["perl-Tangerine", "perl-List-Compare", "tangerine"])

+     @pytest.mark.parametrize(

+         "changed_component",

+         ["perl-Tangerine", "perl-List-Compare", "tangerine"])

+     def test_get_reusable_component_different_arch_in_batch(

+         self, changed_component, reuse_component, db_session

+     ):

+         """

+         Test that we get the correct reuse behavior for the changed-and-after strategy. Changes

+         to the architectures in earlier batches should prevent reuse, but such changes to later

+         batches should not.

+         For context, see https://pagure.io/fm-orchestrator/issue/1298

+         """

+         if changed_component == reuse_component:

+             # we're only testing the cases where these are different

+             # this case is already covered by test_get_reusable_component_different_arches

+             return

+ 

+         second_module_build = models.ModuleBuild.get_by_id(db_session, 3)

+ 

+         # update arch for changed component

+         mmd = second_module_build.mmd()

+         component = mmd.get_rpm_component(changed_component)

+         component.reset_arches()

+         component.add_restricted_arch("i686")

+         second_module_build.modulemd = mmd_to_str(mmd)

+         db_session.commit()

+ 

+         changed_component = models.ComponentBuild.from_component_name(

+             db_session, changed_component, second_module_build.id)

+         reuse_component = models.ComponentBuild.from_component_name(

+             db_session, reuse_component, second_module_build.id)

+ 

+         reuse_result = module_build_service.utils.get_reusable_component(

+             db_session, second_module_build, reuse_component.package)

+         # Changing the arch of a component should prevent reuse only when the changed component

+         # is in a batch earlier than the component being considered for reuse.

+         assert bool(reuse_result is None) == bool(reuse_component.batch > changed_component.batch)

+ 

      @pytest.mark.parametrize("rebuild_strategy", models.ModuleBuild.rebuild_strategies.keys())

      def test_get_reusable_component_different_buildrequires_stream(self, rebuild_strategy):

          first_module_build = models.ModuleBuild.get_by_id(db_session, 2)

I still need to add unit tests to cover the new cases, but I'd appreciate feedback on the current change.

@mikem the code in this for loop is what needs adjusting:
https://pagure.io/fork/mikem/fm-orchestrator/blob/1affa4212efeabb69a542c2a17cbc3443bf1ee32/f/module_build_service/utils/reuse.py#_443

The code you modified in this PR does not need to be changed for #1298.

Thanks. I was just realizing absurdity of the current change :smirk:

rebased onto b1f39cac2b8d85465c2aa5dcab1e82a94dd1ca5d

4 years ago

I'm unsure if I'm doing the right thing comparing the batch here

This is checking the batch the module build is currently in compared to the batch the previous module build is currently in.

If the rebuild strategy is changed-and-after, you need to check that if a component was added, that it is in the same batch or later as the component being reused. That way, we know that the component being added cannot influence the component being reused if this module was built from scratch. If it doesn't meet that criteria, the current component cannot be reused.

1 new commit added

  • trying a different approach
4 years ago

I feel like this is probably wrong too. I'm having trouble sorting out the data model and available information here.

@mikem each component build has a batch column. That's what you need to check.

So for example, say there is a module with the following components.
Batch 1 (automatically created by MBS and not in the modulemd file): module-build-macros
Batch 2: A
Batch 3: B
Batch 4: C

Then say you are building the same module again, but with the following components.
Batch 1 (automatically created by MBS and not in the modulemd file): module-build-macros
Batch 2: A
Batch 3: B and D
Batch 4: C

If the rebuild strategy is only-changed, then the fact that component D was added to batch 3 won't affect component reuse for any of the components (A, B, and C).

If the rebuild strategy is changed-and-after, then the fact that component D was added to batch 3 will mean that component C can't be reused because component D was not in the buildroot when component C was built as part of the original module build.

Does this make sense? This is what needs to be implemented.

Edit:
Ideally, the reverse of this scenario should also be added since removing a component from the buildroot could equally affect components in future buildorders.

The existing logic for the changed-and-after case is already generating a list of components and refs for the previous batches and comparing the two. This should already detect added or removed components (despite what the log message says when it denies reuse). It doesn't explicitly check the arches, but I'm wondering if we would reasonably expect that to change with the refs staying the same.

So my current read on this is that we need to:

  • remove that last 'added or removed' check. The changed-and-after check already covers it and the other strategies don't need it.
  • update the log message in the changed-and-after check to reflect the broader range of changes it is already detecting.
  • (possibly) update the changed-and-after check to also compare the arches of those components.

The existing logic for the changed-and-after case is already generating a list of components and refs for the previous batches and comparing the two. This should already detect added or removed components (despite what the log message says when it denies reuse). It doesn't explicitly check the arches, but I'm wondering if we would reasonably expect that to change with the refs staying the same.

This is checked because the modularity spec allows you to set the arches to build per component:
https://github.com/fedora-modularity/libmodulemd/blob/master/spec.v2.yaml#L310

This isn't supported in Koji, but MBS has the code for when it is supported or if another builder that does support it is used by MBS.

So my current read on this is that we need to:

remove that last 'added or removed' check. The changed-and-after check already covers it and the other strategies don't need it.
update the log message in the changed-and-after check to reflect the broader range of changes it is already detecting.
(possibly) update the changed-and-after check to also compare the arches of those components.

Yes, this makes sense to me.

rebased onto 17ef0dfac5f5892aa8523a4b1bae775dd3c5ac64

4 years ago

Ok, I'm feeling much better about this version of the change

One concern I have is the combination of mixing the data from the ModuleBuild objects and the mmd. Seems like I have to dig into the mmd for the get_arches() method, but the batch index of components is based on the object data.

In particular, could mmd.get_rpm_component(value.package) fail? value is coming from module.component_builds rather than mmd.get_rpm_component_names(). Is this data certain to match or could there be subtle differences?

In particular I note that the module-build-macros entry is one one place but not the other. However, we are always going to skip that one in the loop, so I guess that's ok here. With my limited context, it's not clear to me if there might be other variations/exceptions.

One concern I have is the combination of mixing the data from the ModuleBuild objects and the mmd. Seems like I have to dig into the mmd for the get_arches() method, but the batch index of components is based on the object data.
In particular, could mmd.get_rpm_component(value.package) fail? value is coming from module.component_builds rather than mmd.get_rpm_component_names(). Is this data certain to match or could there be subtle differences?

@mikem since the modulemd file is what actually populates those database entries, this is safe. One of those exceptions, as noted above, is module-build-macros. This is an implementation detail and exists as a workaround because the build system doesn't understand modularity. This is why it wouldn't be in the modulemd file.

Not everything is stored in the database that is in the modulemd file which is why some mixing is required.

@mikem the change looks good. Could you add one or more unit tests in TestUtilsComponentReuse?

1 new commit added

  • reuse unit test for batch/arch changes in other components
4 years ago

Build #524 failed (commit: 931e05280b5da8eddd23771ecf9273333bb29477).
Rebase or make new commits to rebuild.

1 new commit added

  • adjust whitespace to appease flake8
4 years ago

Build #527 failed (commit: ec1349d3f4dc2bed26203969e63471cc3d1df16e).
Rebase or make new commits to rebuild.

Optional: For improved readability, it'd be nice if you used second_module_build.id instead of 3. The same goes for below as well.

Could you add a docstring explaining the test? Without the context we have, it's hard to follow.

Could you also add a docstring here as well?

Could you add a comment explaining this assertion? It might be hard to follow otherwise.

Here is a suggestion:

# If changed_component is in a batch previous to reuse_component, reuse_component cannot be reused since
# the arch changed on a component in a previous batch to it
bool(reuse_result is None) == bool(reuse_component.batch > changed_component.batch)

pretty please pagure-ci rebuild

4 years ago

1 new commit added

  • clarify unit tests
4 years ago

Build #553 failed (commit: 294ab865becb9a371884374cb64442d5c6154b07).
Rebase or make new commits to rebuild.

the the => to the

@mikem could you please fix the typos and squash your commits? After that, it's good to merge.

rebased onto f5692d0

4 years ago

Build #572 failed (commit: f5692d0).
Rebase or make new commits to rebuild.

(the current test failure seems to be due to a change in the py3 test image and independent of this change)

Commit 11df325 fixes this pull-request

Pull-Request has been merged by mikem

4 years ago

Pull-Request has been merged by mikem

4 years ago