#55 Implement orphan detection using graphs
Merged 3 years ago by frantisekz. Opened 3 years ago by jskladan.

file modified
+128 -101
@@ -32,6 +32,7 @@ 

  import koji

  import urllib.parse

  import re

+ import igraph

  

  from bodhi.client.bindings import BodhiClient

  from urllib3.util.retry import Retry
@@ -154,90 +155,73 @@ 

          "release": release_from_nevra(override["nvr"])

      }

  

- def orphan_reason(package, orphans_json, reason_tree=[]):

-     """

-     TODO: WIP

-     Returns chain of dependencies that caused the package to depend on something orphaned

-     """

-     for p in orphans_json["affected_packages"][package]:

-         if p not in reason_tree:

-             reason_tree.append(p)

-             reason_tree = orphan_reason(p, orphans_json, reason_tree=reason_tree)

-     return reason_tree

- 

- def is_directly_orphaned(package, orphans_json):

-     """

-     Returns True if a package itself is orphaned

-     """

-     if package in orphans_json["status_change"]:

-         return True

-     return False

- 

- def filter_indirectly_orphaned(packages, orphans_json):

-     """

-     Filters list containing orphaned and depending on orphaned packages to contain only directly orphaned packages

-     """

-     ret = []

-     for package in packages:

-         if is_directly_orphaned(package, orphans_json):

-             ret.append(package)

-     return ret

+ class OrphanGraph(object):

+ 

+     def __init__(self, orphans_data):

+         self.orphans_data = orphans_data

+         aff = orphans_data['affected_packages']

+         counter = 0

+         edges = []

+         self.pimap = {}

+         self.ipmap = {}

+         for i, p in enumerate(aff.keys()):

+             self.pimap[p] = i

+             self.ipmap[i] = p

+         self.ileafs = [self.pimap[p] for p in orphans_data['status_change'].keys()]

+ 

+         for p in aff:

+             for d in aff[p]:

+                 edges.append((self.pimap[p],self.pimap[d]))

+ 

+         self.g = igraph.Graph(edges, directed=True)

+ 

+     def get_package_info(self, package):

+         p = package

+ 

+         if p not in self.orphans_data["affected_packages"]:

+             return {

+                 "orphaned": False,

+                 "depends_on_orphaned": False,

+                 "direct_dependencies": [],

+                 "remote_dependencies": [],

+                 "problematic_since": None,

+                 "dot_graph": ""

+             }

  

+         paths = self.g.get_all_simple_paths(self.pimap[p], self.ileafs)

+         direct_deps = list(set([self.ipmap[l[-1]] for l in paths if len(l) == 2]))

+         remote_deps = list(set([self.ipmap[l[-1]] for l in paths if len(l) > 2]))

  

- def orphan_since_earliest(reason_tree, orphans_json):

-     """

-     Returns the oldest time from reason_tree

-     """

-     times = []

-     for reason in reason_tree:

          try:

-             times.append(orphans_json["status_change"][reason])

-         except KeyError:

-             continue

-     return min(times)

+             # the "Z"*20 hack is here to 'fool' the min operator into ignoring the package in question, when it's not in "status_change"

+             problematic_since = min([self.orphans_data["status_change"].get(d, "Z" * 20) for d in direct_deps + remote_deps + [p]])

+         except ValueError:

+             problematic_since = None

+         if problematic_since == "Z" * 20:

+             problematic_since = None

  

- def get_orphans(packages, orphans_json=None):

-     """

-     Returns dict of dictionaries (all packages): {

-         "orphaned": directly orphaned package has orphaned == True,

-         "depends_on_orphaned": Package that depends on something orphaned (even indirectly),

-         "problematic_since": oldest time when something in the chain was orphaned

-     }

-     Example of orphans_json: {

-         "affected_packages": {

-             "package_a": [package_b, package_c,...],

-             "package_b": [package_c, package_d,...],

-             ...

-             },

-         "status_change": {

-             "package_a": "time",

-             "package_b": "time",

-             ...

+         dot_graph = ""

+         for path in paths:

+             dot_graph += ' -- '.join('"%s"' % self.ipmap[i] for i in path) + ";\n"

+ 

+         return {

+             "orphaned": p in self.orphans_data["status_change"].keys(),

+             "depends_on_orphaned": bool(len(self.orphans_data["affected_packages"][p])),

+             "direct_dependencies": direct_deps,

+             "remote_dependencies": remote_deps,

+             "problematic_since": problematic_since,

+             "dot_graph": dot_graph.strip()

          }

-     }

-     """

-     if not orphans_json:

-         orphans_json = CACHE.get('orphans_json')

+ 

+ def get_orphans(packages, orphans_data=None):

+ 

+     if not orphans_data:

+         orphans_data = CACHE.get('orphans_json')

+ 

+     graph = OrphanGraph(orphans_data)

      orphans = {}

      for package in packages:

-         orphans[package] = {

-             "orphaned": False,

-             "depends_on_orphaned": False,

-             "reason_tree": [],

-             "problematic_since": None

-         }

-         if package in orphans_json["affected_packages"]:

-             reason = orphan_reason(package, orphans_json)

-             orphans[package] = {

-                 "orphaned": is_directly_orphaned(package, orphans_json),

-                 "depends_on_orphaned": True if len(orphans_json["affected_packages"][package]) != 0 else False,

-                 "reason_tree": filter_indirectly_orphaned(reason, orphans_json),

-                 "problematic_since": None

-             }

-             if len(reason) != 0:

-                 orphans[package]["problematic_since"] = orphan_since_earliest(reason, orphans_json)

-             else:

-                 orphans[package]["problematic_since"] = orphans_json["status_change"][package]

+         orphans[package] = graph.get_package_info(package)

      return orphans

  

  
@@ -693,35 +677,78 @@ 

  

  

  if __name__ == "__main__":

-     def test_simple_orphans():

-         EXPECTED_OUTPUT = {'directly_orphaned_package': {

-                                'orphaned': True,

-                                'depends_on_orphaned': False,

-                                'reason_tree': [],

-                                'problematic_since': '2020-05-13T05:50:55'},

-                            'indirectly_orphaned_package': {

-                                'orphaned': True,

-                                'depends_on_orphaned': True,

-                                'reason_tree': ['directly_orphaned_package'],

-                                'problematic_since': '2020-05-13T05:50:55'}}

+     def test_orphans():

+         """

+           * notes packages being orphaned

+           A -- B -- C*

+                  -- D* -- E*

+                  -- E*

+             -- F*

+         """

          DATA = {

                  "affected_packages": {

-                     "directly_orphaned_package": [],

-                     "indirectly_orphaned_package": ["directly_orphaned_package"]

+                     'A': ['B', 'F'],

+                     'B': ['C', 'D', 'E'],

+                     'C': [],

+                     'D': ['E'],

+                     'E': [],

+                     'F': []

                  },

                  "status_change": {

-                     "directly_orphaned_package": "2020-05-13T05:50:55",

-                     "indirectly_orphaned_package": "2020-06-04T18:08:34"

+                     'C': '2020-01-01T00:00:00',

+                     'D': '2020-01-01T00:00:01',

+                     'E': '2020-01-01T00:00:02',

+                     'F': '2020-01-01T00:00:03',

+ 

                  }

          }

-         if get_orphans(["directly_orphaned_package","indirectly_orphaned_package"], DATA) == EXPECTED_OUTPUT:

-             print("simple orphans parsing seems to work FINE...")

-         else:

-             print("simple orphans parsing seems to be BROKEN!")

+         out = get_orphans(["A", "F", "Z"], DATA)

+         # Not affected

+         o_z = {

+             "orphaned": False,

+             "depends_on_orphaned": False,

+             "direct_dependencies": [],

+             "remote_dependencies": [],

+             "problematic_since": None,

+             "dot_graph": ""

+             }

+         assert out["Z"] == o_z

+ 

+         # Directly orphaned

+         o_f = {

+             "orphaned": True,

+             "depends_on_orphaned": False,

+             "direct_dependencies": [],

+             "remote_dependencies": [],

+             "problematic_since": '2020-01-01T00:00:03',

+             "dot_graph": ""

+             }

+         assert out["F"] == o_f

+ 

+         # Complete example

+         o_a = {

+             "orphaned": False,

+             "depends_on_orphaned": True,

+             "direct_dependencies": ['F'],

+             "remote_dependencies": ['C', 'D', 'E'],

+             "problematic_since": '2020-01-01T00:00:00'

+             }

+         dg = sorted(out["A"]["dot_graph"].split('\n'))

+         del(out["A"]["dot_graph"])

+ 

+         out["A"]["remote_dependencies"].sort()

+         assert out["A"] == o_a

+ 

+         o_dg = sorted([

+             '"A" -- "B" -- "C";',

+             '"A" -- "B" -- "D";',

+             '"A" -- "B" -- "D" -- "E";',

+             '"A" -- "B" -- "E";',

+             '"A" -- "F";'

+             ])

+         assert o_dg == dg

+ 

+         print("OK")

  

-     def test_advanced_orphans():

-         EXPECTED_OUTPUT = ""

-         DATA = ""

-         pass

  

-     test_simple_orphans()

+     test_orphans()

file modified
+1
@@ -16,6 +16,7 @@ 

  pygments

  python-bugzilla >= 2.4.0

  python-dateutil == 2.8.1

+ python-igraph >= 2.8.0

  pytz

  requests

  celery

file modified
+1
@@ -47,6 +47,7 @@ 

            'pygments',

            'python-bugzilla',

            'python-dateutil',

+           'python-igraph',

            'pytz',

            'requests',

            'redis',

Also adds new format for the orphans output:

{
 "orphaned": False,
 "depends_on_orphaned": False,
 "direct_dependencies": [],
 "remote_dependencies": [],
 "problematic_since": None,
 "dot_graph": ""
}

{
 "orphaned": True,
 "depends_on_orphaned": False,
 "direct_dependencies": [],
 "remote_dependencies": [],
 "problematic_since": '2020-01-01T00:00:03',
 "dot_graph": ""
}

{
 "orphaned": False,
 "depends_on_orphaned": True,
 "direct_dependencies": ['F'],
 "remote_dependencies": ['C', 'D', 'E'],
 "problematic_since": '2020-01-01T00:00:00',
 'dot_graph': '"A" -- "B" -- "C";\n'
              '"A" -- "B" -- "D";\n'
              '"A" -- "B" -- "D" -- "E";\n'
              '"A" -- "B" -- "E";\n'
              '"A" -- "F";',
}

rebased onto 38a5379

3 years ago

Pull-Request has been merged by frantisekz

3 years ago