#358 Use only latest repo from Pulp
Merged 3 years ago by lsedlar. Opened 3 years ago by lsedlar.
lsedlar/odcs pulp-repos-version  into  master

file modified
+47 -1
@@ -24,9 +24,10 @@ 

  

  import copy

  import json

+ import re

  import requests

  

- from odcs.server import conf

+ from odcs.server import conf, log

  from odcs.server.mergerepo import MergeRepo

  from odcs.server.utils import retry

  
@@ -183,10 +184,55 @@ 

                  "url": url,

                  "arches": set([arch]),

                  "sigkeys": sigkeys,

+                 "product_versions": notes["product_versions"],

              })

  

          ret = {}

          for cs, repos in per_content_set_repos.items():

+             can_sort = True

+             version_len = None

+             arches = None

+             for repo in repos:

+                 if arches and arches != repo["arches"]:

+                     log.debug("Skipping sorting repos: arch mismatch.")

+                     can_sort = False

+                     break

+                 arches = repo["arches"]

+                 try:

+                     product_versions = json.loads(repo["product_versions"])

+                 except ValueError:

+                     log.debug(

+                         "Skipping sorting repos: invalid JSON in product_versions."

+                     )

+                     can_sort = False

+                     break

+                 if len(product_versions) != 1:

+                     log.debug(

+                         "Skipping sorting repos: more than one version for a repo."

+                     )

+                     can_sort = False

+                     break

+                 if not re.match(r"\d+(\.\d+)*$", product_versions[0]):

+                     log.debug("Skipping sorting repos: unparsable version string.")

+                     can_sort = False

+                     break

+                 # Parse the version into a tuple.

+                 ver_len = product_versions[0].count(".")

+                 if version_len and version_len != ver_len:

+                     log.debug("Skipping sorting repos: incompatible versions.")

+                     can_sort = False

+                     break

+                 version_len = ver_len

+             if can_sort:

+                 repos = [

+                     sorted(

+                         repos,

+                         key=lambda k: tuple(

+                             json.loads(k["product_versions"])[0].split(".")

+                         ),

+                         reverse=True,

+                     )[0]

+                 ]

              # In case there are multiple repos, at first try merging them

              # by replacing arch in repository baseurl with $basearch.

              merged_repos = self._try_arch_merge(repos)

file modified
+7 -2
@@ -488,6 +488,7 @@ 

                      "content_set": "foo-1",

                      "arch": "x86_64",

                      "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  },

              },

              {
@@ -496,6 +497,7 @@ 

                      "content_set": "foo-2",

                      "arch": "x86_64",

                      "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  }

              },

              {
@@ -504,6 +506,7 @@ 

                      "content_set": "foo-3",

                      "arch": "ppc64",

                      "signatures": "SIG1,SIG3",

+                     "product_versions": "",

                  }

              }

          ]
@@ -605,7 +608,8 @@ 

                      "relative_url": "content/1/x86_64/os",

                      "content_set": "foo-1",

                      "arch": "ppc64",

-                     "signatures": "SIG1,SIG2"

+                     "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  },

              },

          ]
@@ -648,7 +652,8 @@ 

                      "relative_url": "content/1/x86_64/os",

                      "content_set": "foo-1",

                      "arch": "ppc64",

-                     "signatures": "SIG1,SIG2"

+                     "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  },

              },

          ]

file modified
+61
@@ -86,6 +86,7 @@ 

                      "content_set": "foo-1",

                      "arch": "x86_64",

                      "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  },

              },

              {
@@ -94,6 +95,7 @@ 

                      "content_set": "foo-1",

                      "arch": "ppc64le",

                      "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  }

              },

              {
@@ -102,6 +104,7 @@ 

                      "content_set": "foo-2",

                      "arch": "ppc64",

                      "signatures": "SIG1,SIG3",

+                     "product_versions": "",

                  }

              }

          ]
@@ -115,11 +118,13 @@ 

                      "url": "http://localhost/content/1/$basearch/os",

                      "arches": set(["x86_64", "ppc64le"]),

                      "sigkeys": ["SIG1", "SIG2"],

+                     "product_versions": "",

                  },

                  "foo-2": {

                      "url": "http://localhost/content/3/ppc64/os",

                      "arches": set(["ppc64"]),

                      "sigkeys": ["SIG1", "SIG3"],

+                     "product_versions": "",

                  }

              })

  
@@ -141,6 +146,7 @@ 

                      "content_set": "foo-1",

                      "arch": "x86_64",

                      "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  },

              },

              # Test two same relative_urls here.
@@ -150,6 +156,7 @@ 

                      "content_set": "foo-1",

                      "arch": "x86_64",

                      "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  },

              },

              {
@@ -158,6 +165,7 @@ 

                      "content_set": "foo-1",

                      "arch": "x86_64",

                      "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  }

              },

              {
@@ -166,6 +174,7 @@ 

                      "content_set": "foo-1",

                      "arch": "ppc64le",

                      "signatures": "SIG1,SIG2",

+                     "product_versions": "",

                  },

              },

          ]
@@ -210,3 +219,55 @@ 

          download_repodata.assert_any_call(

              repo_prefix + "1.0/ppc64le/os",

              "http://localhost/content/1.0/ppc64le/os")

+ 

+     @patch("odcs.server.mergerepo.execute_cmd")

+     @patch("odcs.server.mergerepo.makedirs")

+     @patch("odcs.server.mergerepo.Lock")

+     @patch("odcs.server.mergerepo.MergeRepo._download_repodata")

+     def test_pulp_compose_find_latest_version(

+             self, download_repodata, lock, makedirs, execute_cmd,

+             pulp_rest_post):

+         c = Compose.create(

+             db.session, "me", PungiSourceType.PULP, "foo-1", 0, 3600)

+         db.session.commit()

+ 

+         pulp_rest_post.return_value = [

+             {

+                 "notes": {

+                     "relative_url": "content/1.0/x86_64/os",

+                     "content_set": "foo-1",

+                     "arch": "x86_64",

+                     "signatures": "SIG1,SIG2",

+                     "product_versions": '["1.0"]',

+                 },

+             },

+             {

+                 "notes": {

+                     "relative_url": "content/1.1/x86_64/os",

+                     "content_set": "foo-1",

+                     "arch": "x86_64",

+                     "signatures": "SIG1,SIG2",

+                     "product_versions": '["1.1"]',

+                 }

+             },

+         ]

+ 

+         pulp = Pulp("http://localhost/", "user", "pass", c)

+         ret = pulp.get_repos_from_content_sets(["foo-1"])

+ 

+         self.assertEqual(

+             ret,

+             {

+                 "foo-1": {

+                     "url": "http://localhost/content/1.1/x86_64/os",

+                     "arches": set(["x86_64"]),

+                     "sigkeys": ["SIG1", "SIG2"],

+                     "product_versions": '["1.1"]',

+                 }

+             })

+ 

+         makedirs.assert_not_called()

+ 

+         execute_cmd.assert_not_called()

+ 

+         download_repodata.assert_not_called()

If Pulp returns multiple repos for a given content set, and all of them have a single product version each, and the product version follows the same schema for all repos, we can use only the latest one.

This is based on the assumption that X.Y+1 contains all the content from X.Y, so there's no need to merge all the repodata.

This should hopefully make pulp composes faster and avoid issues where mergerepo_c is running out of memory.

CC @jkaluza

Let me check some more content_sets. I remember there were also cases where content-set contained repos for multiple architectures and using this code, we would effectively keep just single latest repo.

I will try to find out more info tomorrow.

Anyway, I think we should do this optimization only if notes["arch"] has the same value in all repos.

Yeah, that's the case of rhel-6-server-rpms content_set. Use that as a test.

Since version_len is initialized to None then this if condition is never hit and second condition (version_len != ver_len) is never checked.

It is set after the condition. On first iteration the condition is skipped, and after that ver_len (length in current iteration) is compared with the saved length from previous iteration.

Yeah, that's the case of rhel-6-server-rpms content_set. Use that as a test.

This one does not pass the existing checks: there are multiple product versions for the same repo.

I can add the check that all repos have the same arch. The main problem now is that I don't know how to verify the optimization is not actually breaking something.

rebased onto 7700f3142a2092c0195b84e74e919874e1acd9fe

3 years ago

+1.

I was thinking about log.info before each break to find out why the repos cannot be sorted and therefore must be merged. But we can also always just run that code locally to find out why particular repos cannot be merged... So it's probably not worth doing that unless you also think it's good idea.

I think it would be useful to log the details. Basically just turn the comments into log calls. I think debug is more appropriate than info though.

@jkaluza, do you think this change is safe to merge?

rebased onto d2fe5554674414d59f8fca79a9dd2526796d5392

3 years ago

Yeah, I think it can be merged.

rebased onto 7263894

3 years ago

Pull-Request has been merged by lsedlar

3 years ago