#177 Precise timestamps for api/v2
Merged 2 years ago by frantisekz. Opened 2 years ago by frantisekz.

@@ -0,0 +1,28 @@ 

+ """Static timestamps store

+ 

+ Revision ID: b1d650afb45d

+ Revises: 270f52aeacca

+ Create Date: 2021-09-22 09:50:37.456978

+ 

+ """

+ from alembic import op

+ import sqlalchemy as sa

+ 

+ 

+ # revision identifiers, used by Alembic.

+ revision = 'b1d650afb45d'

+ down_revision = '270f52aeacca'

+ branch_labels = None

+ depends_on = None

+ 

+ 

+ def upgrade():

+     # ### commands auto generated by Alembic - please adjust! ###

+     op.add_column('cached_data', sa.Column('static_data_timestamps', sa.Text(), nullable=True))

+     # ### end Alembic commands ###

+ 

+ 

+ def downgrade():

+     # ### commands auto generated by Alembic - please adjust! ###

+     op.drop_column('cached_data', 'static_data_timestamps')

+     # ### end Alembic commands ###

@@ -84,23 +84,27 @@ 

              return {

                  "packages": None,

                  "status": 400,

-                 "last_synced": None,

                  "error": "The orphan user can't be combined with anything else!"

              }

  

      orphan_data = CACHE.get('packager-dashboard_orphan_page')["static_info"]["data"]

+ 

+     refresh_times_dict = {key: None for key in package_data_dict.keys()}

+     refresh_times_dict["orphans"] = CACHE.get_refreshed_time('orphans_json').isoformat()

+ 

      for package in orphan_data["packages"]:

          packages[package] = {

              "package": package,

              "kind": "primary",

              "origin": ["orphan"],

-             "data": {**package_data_dict}

+             "data": {**package_data_dict},

+             "refresh_times": refresh_times_dict

          }

          packages[package]["data"]["orphans"] = orphan_data["orphans"][package]

  

-     return {"packages": packages,

+     return {

+         "packages": packages,

          "status": 200,

-         "last_synced": CACHE.get_refreshed_time('orphans_json').isoformat(),

          "error": None

      }

  
@@ -114,7 +118,6 @@ 

          return jsonify({

              "packages": None,

              "status": 400,

-             "last_synced": None,

              "error": "The correct format to query this endpoint is /api/v2/packager_dashboard?users=[users]&packages=[packages]"

          }), 400

  
@@ -138,6 +141,8 @@ 

          'abrt_reports': {}

      }

  

+     refresh_times = {key: None for key in package_data_dict}

+ 

      packages = {}

  

      # orphan user requires a bit of special handling since we don't care for anything else than the orphan data
@@ -158,7 +163,8 @@ 

                  "package": package,

                  "kind": "primary" if package in user_packages["primary"] else "group",

                  "origin": [user],

-                 "data": {**package_data_dict}

+                 "data": {**package_data_dict},

+                 "refresh_times": {**refresh_times}

              }

  

              # Remove package from explicit packages if we have it here
@@ -169,7 +175,8 @@ 

              "package": package,

              "kind": "additional",

              "origin": [],

-             "data": {**package_data_dict}

+             "data": {**package_data_dict},

+             "refresh_times": {**refresh_times}

          }

  

      flat_pkgs_list = packages.keys()
@@ -177,6 +184,10 @@ 

      dashboard_helpers.update_users_access_time(users)

  

      generic_data = dashboard_data_generic(flat_pkgs_list)

+     refresh_times_dict = {key: generic_data["last_synced"] for key in package_data_dict}

+ 

+     # Generic package versions are a bit special... sigh

+     refresh_times_dict["package_versions"] = CACHE.get_refreshed_time("package_versions_generic").isoformat()

  

      prs = dashboard_data_prs(flat_pkgs_list)

      bzs = dashboard_data_bzs(flat_pkgs_list, authenticated=is_packager())
@@ -185,14 +196,6 @@ 

      # Get the worst of status codes

      user_status = max(int(partial_status) for partial_status in [prs["status"], bzs["status"], abrt_reports["status"]])

  

-     # Get last_synced

-     last_synced = {

-         "prs": prs["last_synced"],

-         "bzs": bzs["last_synced"],

-         "abrt_reports": abrt_reports["last_synced"],

-         "generic_data": generic_data["last_synced"]

-     }

- 

      # And finally, fill in our packages dict

      for package in flat_pkgs_list:

          packages[package]["data"] |= generic_data["data"][package]
@@ -200,7 +203,13 @@ 

          packages[package]["data"]["bzs"] = bzs["data"][package]

          packages[package]["data"]["abrt_reports"] = abrt_reports["data"][package]

  

-     return jsonify({"packages": packages, "status": user_status, "last_synced": last_synced, "error": None})

+         # And exact sync times

+         packages[package]["refresh_times"] |= generic_data["static_data_timestamps"][package]

+         packages[package]["refresh_times"]["prs"] = prs["cache_ages"][package]

+         packages[package]["refresh_times"]["bzs"] = bzs["cache_ages"][package]

+         packages[package]["refresh_times"]["abrt_reports"] = abrt_reports["cache_ages"][package]

+ 

+     return jsonify({"packages": packages, "status": user_status, "error": None})

  

  

  @app.route('/api/v1/packager_dashboard/<user>', methods=['GET'])
@@ -258,12 +267,18 @@ 

      v1_compat_generic_data["primary_packages"] = user_packages["primary"]

      v1_compat_generic_data["group_packages"] = user_packages["group"]

  

-     static_info = {'status': generic_data["status"], 'data': v1_compat_generic_data, 'last_synced': generic_data["last_synced"].isoformat()}

+     static_info = {'status': generic_data["status"], 'data': v1_compat_generic_data, 'last_synced': generic_data["last_synced"]}

  

      prs = dashboard_data_prs(user_packages["combined"])

      bzs = dashboard_data_bzs(user_packages["combined"], authenticated=is_packager())

      abrt = dashboard_data_abrt(user_packages["combined"])

  

+     # Clean stuff from api/v2

+     for drop in ["cache_ages", "static_data_timestamps"]:

+         del prs[drop]

+         del bzs[drop]

+         del abrt[drop]

+ 

      return jsonify({'static_info': static_info,

          'prs': prs,

          'bzs': bzs,
@@ -274,29 +289,47 @@ 

  

  def dashboard_data_abrt(packages):

      status = 200

-     data, load_status, last_synced, _ = CACHE.async_get_batch('packager-dashboard_abrt_issues', packages, 'low')

+     data, load_status, cache_ages, _, _ = CACHE.async_get_batch('packager-dashboard_abrt_issues', packages, 'low')

      if load_status in [cache_utils.RefresherNotRegistered, cache_utils.AsyncRefreshInProgress]:

          status = 202

-     return {"status": status, "data": data, "last_synced": last_synced}

+     return {

+         "status": status,

+         "data": data,

+         "last_synced": min(cache_ages.values(), default=None),

+         "cache_ages": cache_ages,

+         "static_data_timestamps": {}

+     }

  

  def dashboard_data_prs(packages):

      status = 200

-     data, load_status, last_synced, _ = CACHE.async_get_batch('packager-dashboard_pull_requests', packages, 'low')

+     data, load_status, cache_ages, _, _ = CACHE.async_get_batch('packager-dashboard_pull_requests', packages, 'low')

      if load_status in [cache_utils.RefresherNotRegistered, cache_utils.AsyncRefreshInProgress]:

          status = 202

-     return {"status": status, "data": data, "last_synced": last_synced}

+     return {

+         "status": status,

+         "data": data,

+         "last_synced": min(cache_ages.values(), default=None),

+         "cache_ages": cache_ages,

+         "static_data_timestamps": {}

+     }

  

  

  def dashboard_data_bzs(packages, authenticated=False):

      status = 200

-     data, load_status, last_synced, _ = CACHE.async_get_batch('packager-dashboard_bugs', packages, 'low')

+     data, load_status, cache_ages, _, _ = CACHE.async_get_batch('packager-dashboard_bugs', packages, 'low')

      if load_status in [cache_utils.RefresherNotRegistered, cache_utils.AsyncRefreshInProgress]:

          status = 202

  

      if not authenticated:

-         return {"status": status, "data": data, "last_synced": last_synced}

+         return {

+             "status": status,

+             "data": data,

+             "last_synced": min(cache_ages.values(), default=None),

+             "cache_ages": cache_ages,

+             "static_data_timestamps": {}

+         }

  

-     data_private, load_status, last_synced, _  = CACHE.async_get_batch('packager-dashboard_bugs_private', packages, 'low')

+     data_private, load_status, cache_ages, _, _ = CACHE.async_get_batch('packager-dashboard_bugs_private', packages, 'low')

      if load_status in [cache_utils.RefresherNotRegistered, cache_utils.AsyncRefreshInProgress]:

          status = 202

  
@@ -306,7 +339,13 @@ 

          else:

              data[package] = data_private[package]

  

-     return {"status": status, "data": data, "last_synced": last_synced}

+     return {

+         "status": status,

+         "data": data,

+         "last_synced": min(cache_ages.values(), default=None),

+         "cache_ages": cache_ages,

+         "static_data_timestamps": {}

+     }

  

  def dashboard_data_generic(packages):

      """
@@ -315,17 +354,33 @@ 

      Returns {"status": 200/500, "data": data <dict> or None, "last_synced": Timestamp of last sync (oldest data)}

      """

      status = 200

-     data, load_status, last_synced, missed = CACHE.async_get_batch('packager-dashboard_package_static_cache', packages, 'low', False)

+     data, load_status, cache_ages, missed, static_data_timestamps = CACHE.async_get_batch(

+         'packager-dashboard_package_static_cache',

+         packages,

+         'low',

+         False

+     )

+ 

      if load_status in [cache_utils.RefresherNotRegistered, cache_utils.AsyncRefreshInProgress, cache_utils.AsyncDataIncomplete]:

          try:

              data_remaining = celery_utils.get_static_package_caches(missed, redis_count=True)

              data |= data_remaining

-             last_synced = datetime.utcnow() if missed == set(packages) else last_synced

+             current_data_timestamps = celery_utils.get_static_data_timestamps()

+             for miss in missed:

+                 cache_ages[miss] = str(datetime.utcnow())

+                 static_data_timestamps[miss] = current_data_timestamps

          except Exception as e:

              watchdog_utils.push_to_watchdog("sync_failed", "packager-dashboard_package_static_cache", str(e))

              data = None

              status = 500

-     return {"status": status, "data": data, "last_synced": last_synced}

+             static_data_timestamps = {}

+     return {

+         "status": status,

+         "data": data,

+         "last_synced": min(cache_ages.values(), default=None),

+         "cache_ages": cache_ages,

+         "static_data_timestamps": static_data_timestamps

+     }

  

  def dashboard_static_data_package_cache(package):

      # This is just to have something for the packager-dashboard_package_static_cache handle

file modified
+15 -1
@@ -31,12 +31,15 @@ 

      provider = db.Column(db.Text, unique=True)

      time_created = db.Column(db.DateTime, unique=False)

      raw_text = db.Column(db.Text, unique=False)

+     static_data_timestamps = db.Column(db.Text, unique=False)

  

-     def __init__(self, provider, raw_text):

+     def __init__(self, provider, raw_text, static_data_timestamps=None):

          self.provider = provider

          self.time_created = datetime.utcnow()

          self.raw_text = raw_text

          self._parsed_raw_text = None

+         self.static_data_timestamps = static_data_timestamps

+         self._parsed_static_data_timestamps = None

  

      @property

      def data(self):
@@ -46,3 +49,14 @@ 

          except AttributeError:

              self._parsed_raw_text = json.loads(self.raw_text)

          return self._parsed_raw_text

+ 

+     @property

+     def static_timestamps(self):

+         if self.static_data_timestamps is None:

+             return None

+         try:

+             if self._parsed_static_data_timestamps is None:

+                 self._parsed_static_data_timestamps = json.loads(self.static_data_timestamps)

+         except AttributeError:

+             self._parsed_static_data_timestamps = json.loads(self.static_data_timestamps)

+         return self._parsed_static_data_timestamps

file modified
+17 -9
@@ -95,12 +95,14 @@ 

          return self.__get(False, priority, what, *args, **kwargs)

  

      def async_get_batch(self, what_base, items, priority="medium", refresh_on_miss=True):

+ 

          whats = set()

          what_to_item = {}

          data = {}

+         static_data_timestamps = {}

  

          if what_base not in self._refreshers:

-             return (None, RefresherNotRegistered, None, set())

+             return (None, RefresherNotRegistered, {}, set(), {})

  

          for item in items:

              what = self._construct_what(what_base, item)
@@ -108,7 +110,7 @@ 

              what_to_item[what] = item

  

          if not whats:

-             return (data, None, None, set())

+             return (data, None, {}, set(), {})

  

          from_db = CachedData.query.filter(CachedData.provider.in_(list(whats))).all()

  
@@ -116,14 +118,15 @@ 

          db.session.expunge_all()

  

          seen = set()

-         cache_ages = set()

+         cache_ages = {}

          for result in from_db:

              if not self._new_enough(result):

                  continue

              item = what_to_item[result.provider]

              data[item] = result.data

+             static_data_timestamps[item] = result.static_timestamps

+             cache_ages[item] = result.time_created.isoformat()

              seen.add(item)

-             cache_ages.add(result.time_created)

  

          # Schedule refresh for any missing result that we've asked for

          missing = set(items).difference(seen)
@@ -144,10 +147,11 @@ 

          if missing:

              return (data,

                  AsyncRefreshInProgress if refresh_on_miss else AsyncDataIncomplete,

-                 min(cache_ages) if cache_ages else None,

-                 missing

+                 cache_ages,

+                 missing,

+                 static_data_timestamps

              )

-         return (data, None, min(cache_ages), missing)

+         return (data, None, cache_ages, missing, static_data_timestamps)

  

      def get(self, what, *args, **kwargs):

          return self.__get(True, "medium", what, *args, **kwargs)
@@ -191,7 +195,7 @@ 

  

          return data

  

-     def _store_batch(self, what_base, data_dict):

+     def _store_batch(self, what_base, data_dict, static_data_timestamps=None):

          """

          Dumps provided data to provided 'what' handle, circumventing the refresh mechanism

          """
@@ -199,6 +203,9 @@ 

          if what_base not in self._refreshers:

              return RefresherNotRegistered

  

+         if static_data_timestamps:

+             static_data_timestamps = json.dumps(static_data_timestamps, default=str)

+ 

          for item in data_dict:

              what = self._construct_what(what_base, item)

  
@@ -206,8 +213,9 @@ 

              if row:

                  row.time_created = datetime.datetime.utcnow()

                  row.raw_text = json.dumps(data_dict[item])

+                 row.static_data_timestamps = static_data_timestamps

              else:

-                 row = CachedData(what, json.dumps(data_dict[item]))

+                 row = CachedData(what, json.dumps(data_dict[item]), static_data_timestamps)

                  db.session.add(row)

  

          try:

@@ -205,6 +205,20 @@ 

      for package in packages:

          plan_celery_refresh("low", 'packager-dashboard_abrt_issues', package)

  

+ def get_static_data_timestamps():

+     """

+     Returns timestamps of currently available static cache blobs, in a api/v2/packager_dashboard data naming conventions

+     """

+     return {

+         'calendars': oraculum.CACHE.get_refreshed_time("package_calendars"),

+         'orphans': oraculum.CACHE.get_refreshed_time("orphans_json"),

+         'fails_to_install': oraculum.CACHE.get_refreshed_time("health_check_data"),

+         'updates': oraculum.CACHE.get_refreshed_time("bodhi_updates"),

+         'overrides': oraculum.CACHE.get_refreshed_time("bodhi_overrides"),

+         'koschei': oraculum.CACHE.get_refreshed_time("koschei_data"),

+         'package_versions': oraculum.CACHE.get_refreshed_time("package_versions_generic")

+     }

+ 

  def get_static_package_caches(packages, redis_count=True):

      """

      Builds static cache for given packages
@@ -245,7 +259,11 @@ 

              data[static_key] = flat_data[static_key][package]

          return_data[package] = data

  

-     oraculum.CACHE._store_batch("packager-dashboard_package_static_cache", return_data)

+     static_timestamps = get_static_data_timestamps()

+     # Convert to ISO 8601

+     static_timestamps = {k: v.isoformat() for k, v in static_timestamps.items()}

+ 

+     oraculum.CACHE._store_batch("packager-dashboard_package_static_cache", return_data, static_timestamps)

  

      if redis_count:

          for package in packages:

Todo:

  • Measure performance impact - Non Issue

Build succeeded.

Build succeeded.

rebased onto 8f2c016

2 years ago

Build succeeded.

rebased onto d954d61

2 years ago

Build succeeded.

rebased onto 7e27f19

2 years ago

Build succeeded.

rebased onto 585c6f0

2 years ago

Build succeeded.

rebased onto 2b51562

2 years ago

Build succeeded.

rebased onto c128cd3

2 years ago

Build succeeded.

:fire: This is fine :fire:

Pull-Request has been merged by frantisekz

2 years ago