#394 Group images to deduplicate also according to their repositories.
Merged 4 years ago by lucarval. Opened 4 years ago by jkaluza.
jkaluza/freshmaker models-fix  into  master

file modified
+22 -18
@@ -1360,34 +1360,38 @@ 

              # nvr_to_coordinates["nvr"] = [[0, 3], ...] means that the image with

              # nvr "nvr" is 4th image in the to_rebuild[0] list, ...

              nvr_to_coordinates = {}

-             # Temporary dict mapping the NV to list of NVRs. The List of NVRs

There's lot of nv_ to image_group_ rename going on here...

-             # is always sorted descending.

-             nv_to_nvrs = {}

+             # Temporary dict mapping the NV-repository_key to list of NVRs.

+             # The List of NVRs is always sorted descending.

+             image_group_to_nvrs = {}

              # Temporary dict mapping the NVR to image.

              nvr_to_image = {}

-             # Temporary dict mapping NV to latest released NVR for that NV.

-             nv_to_latest_released_nvr = {}

+             # Temporary dict mapping image_group to latest released NVR for that image_group.

+             image_group_to_latest_released_nvr = {}

  

              # Constructs the temporary dicts as desribed above.

              for image_id, images in enumerate(to_rebuild):

                  for parent_id, image in enumerate(images):

                      nvr = image["brew"]["build"]

+                     # Also include the sorted names of repositories in the image group

+                     # to handle the case when different releases of single name-version are

+                     # included in different container repositories.

+                     repository_key = "-".join(sorted([r["repository"] for r in image["repositories"]]))

                      parsed_nvr = koji.parse_NVR(nvr)

-                     nv = "%s-%s" % (parsed_nvr["name"], parsed_nvr["version"])

-                     if nv not in nv_to_nvrs:

-                         nv_to_nvrs[nv] = []

-                     if nvr not in nv_to_nvrs[nv]:

-                         nv_to_nvrs[nv].append(nvr)

+                     image_group = "%s-%s-%s" % (parsed_nvr["name"], parsed_nvr["version"], repository_key)

This is the key change in this PR.

+                     if image_group not in image_group_to_nvrs:

+                         image_group_to_nvrs[image_group] = []

+                     if nvr not in image_group_to_nvrs[image_group]:

+                         image_group_to_nvrs[image_group].append(nvr)

                      if nvr not in nvr_to_coordinates:

                          nvr_to_coordinates[nvr] = []

                      nvr_to_coordinates[nvr].append([image_id, parent_id])

                      nvr_to_image[nvr] = image

                      if "latest_released" in image and image["latest_released"]:

-                         nv_to_latest_released_nvr[nv] = nvr

+                         image_group_to_latest_released_nvr[image_group] = nvr

  

-             # Sort the lists in nv_to_nvrs dict.

-             for nv in nv_to_nvrs.keys():

-                 nv_to_nvrs[nv] = sorted_by_nvr(nv_to_nvrs[nv], reverse=True)

+             # Sort the lists in image_group_to_nvrs dict.

+             for image_group in image_group_to_nvrs.keys():

+                 image_group_to_nvrs[image_group] = sorted_by_nvr(image_group_to_nvrs[image_group], reverse=True)

  

                  # There might be container image NVRs which are not released yet,

                  # but some released image is already built on top of them.
@@ -1402,7 +1406,7 @@ 

                  # does not exists in the dist-git repo, which should be rare

                  # situation.

                  latest_content_sets = []

-                 for nvr in reversed(nv_to_nvrs[nv]):

+                 for nvr in reversed(image_group_to_nvrs[image_group]):

                      image = nvr_to_image[nvr]

                      if ("content_sets" not in image or

                              not image["content_sets"] or
@@ -1415,13 +1419,13 @@ 

                          latest_content_sets = image["content_sets"]

  

              # Iterate through list of NVs.

-             for nv, nvrs in nv_to_nvrs.items():

+             for image_group, nvrs in image_group_to_nvrs.items():

                  # We want to replace NVRs which are lower than the latest released

                  # NVR with latest released NVR. If there are some higher NVRs, we

                  # want to keep them, because we don't want to rebuild the image

                  # against older NVR than the one it is currently built against.

-                 if nv in nv_to_latest_released_nvr:

-                     latest_released_nvr = nv_to_latest_released_nvr[nv]

+                 if image_group in image_group_to_latest_released_nvr:

+                     latest_released_nvr = image_group_to_latest_released_nvr[image_group]

                  else:

                      latest_released_nvr = nvrs[0]

  

file modified
+61
@@ -1355,53 +1355,62 @@ 

  

          image_a = ContainerImage.create({

              'brew': {'package': 'image-a', 'build': 'image-a-v-r1'},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'image-a-commit'

          })

          image_b = ContainerImage.create({

              'brew': {'package': 'image-b', 'build': 'image-b-v-r1'},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'image-b-commit',

              'parent': image_a,

          })

          image_c = ContainerImage.create({

              'brew': {'package': 'image-c', 'build': 'image-c-v-r1'},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'image-c-commit',

              'parent': image_b,

          })

          image_e = ContainerImage.create({

              'brew': {'package': 'image-e', 'build': 'image-e-v-r1'},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'image-e-commit',

              'parent': image_a,

          })

          image_d = ContainerImage.create({

              'brew': {'package': 'image-d', 'build': 'image-d-v-r1'},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'image-d-commit',

              'parent': image_e,

          })

          image_j = ContainerImage.create({

              'brew': {'package': 'image-j', 'build': 'image-j-v-r1'},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'image-j-commit',

              'parent': image_e,

          })

          image_k = ContainerImage.create({

              'brew': {'package': 'image-k', 'build': 'image-k-v-r1'},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'image-k-commit',

              'parent': image_j,

          })

          image_g = ContainerImage.create({

              'brew': {'package': 'image-g', 'build': 'image-g-v-r1'},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'image-g-commit',

              'parent': None,

          })

          image_f = ContainerImage.create({

              'brew': {'package': 'image-f', 'build': 'image-f-v-r1'},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'image-f-commit',

              'parent': image_g,
@@ -1410,36 +1419,42 @@ 

          leaf_image1 = ContainerImage.create({

              'brew': {'build': 'leaf-image-1-1'},

              'parsed_data': {'layers': ['fake layer']},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'leaf-image1-commit',

          })

          leaf_image2 = ContainerImage.create({

              'brew': {'build': 'leaf-image-2-1'},

              'parsed_data': {'layers': ['fake layer']},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'leaf-image2-commit',

          })

          leaf_image3 = ContainerImage.create({

              'brew': {'build': 'leaf-image-3-1'},

              'parsed_data': {'layers': ['fake layer']},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'leaf-image3-commit',

          })

          leaf_image4 = ContainerImage.create({

              'brew': {'build': 'leaf-image-4-1'},

              'parsed_data': {'layers': ['fake layer']},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'leaf-image4-commit',

          })

          leaf_image5 = ContainerImage.create({

              'brew': {'build': 'leaf-image-5-1'},

              'parsed_data': {'layers': ['fake layer']},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'leaf-image5-commit',

          })

          leaf_image6 = ContainerImage.create({

              'brew': {'build': 'leaf-image-6-1'},

              'parsed_data': {'layers': ['fake layer']},

+             'repositories': [{"repository": "foo/bar"}],

              'repository': 'repo-1',

              'commit': 'leaf-image6-commit',

          })
@@ -1496,6 +1511,7 @@ 

              'brew': {'package': 'image-a', 'build': 'image-a-v-r1'},

              'repository': 'repo-1',

              'commit': 'image-a-commit',

+             'repositories': [{"repository": "foo/bar"}],

              'rpm_manifest': [{

                  "rpms": [

                      {"srpm_name": "dummy"}
@@ -1664,6 +1680,7 @@ 

              'brew': {'build': nvr},

              'content_sets': [],

              'content_sets_source': 'distgit',

+             'repositories': [{"repository": "product/repo1"}],

          })

  

      def _create_imgs(self, nvrs):
@@ -1880,6 +1897,50 @@ 

          ret = self.lb._deduplicate_images_to_rebuild([httpd, perl])

          self.assertEqual(ret, expected_images)

  

+     def test_same_nv_different_r_different_repos(self):

+         httpd = self._create_imgs([

+             "httpd-2.4-12",

+             "s2i-base-1-2",

+             "s2i-core-1-11",

+             "rhel-server-docker-7.4-125",

+         ])

+ 

+         perl = self._create_imgs([

+             "perl-5.7-1",

+             ["s2i-base-1-3", {

+                 "content_sets": ["foo"],

+                 "repositories": [{

+                     "repository": "product/repo2",

+                     "content_sets": ["foo"]

+                 }]}],

+             "s2i-core-2-12",

+             "rhel-server-docker-7.4-150",

+         ])

+ 

+         expected_images = [

+             self._create_imgs([

+                 "httpd-2.4-12",

+                 "s2i-base-1-2",

+                 "s2i-core-1-11",

+                 "rhel-server-docker-7.4-150",

+             ]),

+             self._create_imgs([

+                 "perl-5.7-1",

+                 ["s2i-base-1-3", {

+                     "content_sets": ["foo"],

+                     "repositories": [{

+                         "repository": "product/repo2",

+                         "content_sets": ["foo"]

+                     }]}],

+                 "s2i-core-2-12",

+                 "rhel-server-docker-7.4-150",

+             ])

+         ]

+ 

+         self.maxDiff = None

+         ret = self.lb._deduplicate_images_to_rebuild([httpd, perl])

+         self.assertEqual(ret, expected_images)

+ 

      def test_batches_same_image_in_batch(self):

          httpd = self._create_imgs([

              "httpd-2.4-12",

There is an situation when images with the same name-version but
different release are in the completely different repositories.
For example, foo-1-1 is built against fedora-28, foo-1-2 is built
against fedora-29.

Current Freshmaker deduplication code replaces foo-1-1 with foo-1-2,
because the Release is higher, but this is wrong, because both images
live in different container repositories and are "unrelated".

This commit fixes this by grouping images not only by name-version,
but also by the sorted list of the repositories the images are in.

Thanks to that, foo-1-1 won't be treated as older release of foo-1-2,
because they repositories do not match.

There's lot of nv_ to image_group_ rename going on here...

This is the key change in this PR.

Pull-Request has been merged by lucarval

4 years ago