#529 Reduce memory usage of images to rebuild
Merged 4 years ago by gnaponie. Opened 4 years ago by apaplaus.
apaplaus/freshmaker freshmaker_memory_leak  into  master

file modified
+21 -5
@@ -708,11 +708,22 @@ 

                               "field": "srpm_name",

                               "op": "=",

                               "rvalue": srpm_name

-                          } for srpm_name in srpm_names]}}]

+                          } for srpm_name in srpm_names]},

+                      "project": [

+                          {"field": "srpm_nevra", "include": True},

+                          {"field": "nvra", "include": True},

+                          {"field": "srpm_name", "include": True},

+                      ]

+                      }

+                 ]

              else:

                  projection += [

-                     {"field": "rpm_manifest.*.rpms", "include": True, "recursive": True},

-                     {"field": "rpm_manifest.*.rpms.*.srpm_name", "include": True, "recursive": True},

+                     {"field": "rpm_manifest.*.rpms.*.srpm_nevra",

+                      "include": True, "recursive": True},

+                     {"field": "rpm_manifest.*.rpms.*.nvra",

+                      "include": True, "recursive": True},

+                     {"field": "rpm_manifest.*.rpms.*.srpm_name",

+                      "include": True, "recursive": True},

                  ]

          return projection

  
@@ -1531,17 +1542,22 @@ 

          # For every image, find out all its parent images which contain the

          # srpm_name package and store these lists to to_rebuild.

          to_rebuild = []

+         optimization_base = 50

          with ThreadPoolExecutor(max_workers=conf.max_thread_workers) as executor:

              for result in executor.map(_get_images_to_rebuild, images):

                  to_rebuild.extend(result.values())

- 

+                 # Memory consumption of fully constructed to_rebuild list could

+                 # be large. To prevent this we will periodically use

+                 # deduplication on the list to reduce it size.

+                 if len(to_rebuild) > optimization_base:

+                     self._deduplicate_images_to_rebuild(to_rebuild)

+                     optimization_base += 50

          # The to_rebuild list now contains all the images which need to be

          # rebuilt, but there are lot of duplicates there.

  

          # At first remove duplicated images which share the same name and

          # version, but different release.

          to_rebuild = self._deduplicate_images_to_rebuild(to_rebuild)

- 

          # Get all the directly affected images so that any parents that are not marked as

          # directly affected can be set in _images_to_rebuild_to_batches

          directly_affected_nvrs = {

file modified
+2 -1
@@ -1979,7 +1979,8 @@ 

                              {'field': 'content_sets', 'include': True, 'recursive': True},

                              {'field': 'parent_brew_build', 'include': True, 'recursive': False},

                              {'field': 'architecture', 'include': True, 'recursive': False},

-                             {'field': 'rpm_manifest.*.rpms', 'include': True, 'recursive': True},

+                             {'field': 'rpm_manifest.*.rpms.*.srpm_nevra', 'include': True, 'recursive': True},

+                             {'field': 'rpm_manifest.*.rpms.*.nvra', 'include': True, 'recursive': True},

                              {'field': 'rpm_manifest.*.rpms.*.srpm_name', 'include': True, 'recursive': True}],

               'objectType': 'containerImage'})

  

When list of lists with images to be rebuilt is constructed, we will
sometimes call deduplication function on it. Because we need to reduce
its size during construction, not after it.

For start I've chosen to do deduplication every 50 new tuples of images,
because deduplication doesn't reduce amount of tuples,
it only replaces images in this tuples with newer ones.

RESOLVES: CLOUDWF-1612

Signed-off-by: Andrei Paplauski apaplaus@redhat.com

:thumbsup: I think it doesn't hurt to add this, if there are some images can be deduplicated early, this should helps on reducing the memory usage of to_rebuild.

rebased onto 2b760b7

4 years ago

I've added other approach to resolve this problem, now we will not query Lightblue for all information about rpms inside container image, but will query only for necessary fields of rpms.

:+1: can you update commit message of 2b760b7 to remove the 'WIP'?

rebased onto 9abcdb8

4 years ago

👍 can you update commit message of 2b760b7 to remove the 'WIP'?

done

Pull-Request has been merged by gnaponie

4 years ago