From e2e747053ae4089b65ea6e2dff6003135e7cc21a Mon Sep 17 00:00:00 2001 From: FrantiĊĦek Zatloukal Date: Oct 05 2022 08:53:35 +0000 Subject: More aggressive cache behavior Improves https://pagure.io/fedora-qa/packager_dashboard/issue/134 --- diff --git a/conf/settings.py.example b/conf/settings.py.example index 75c3746..f0e6833 100644 --- a/conf/settings.py.example +++ b/conf/settings.py.example @@ -22,11 +22,10 @@ MOTD_PASSWORD = 'change-me' # Bugzilla api key is required to fetch private bugs BZ_API_KEY = "" -# MAX_DB_AGE will be ignored as per IGNORE_CACHE_TIMESTAMP setting bellow, data from caches might be used no matter their age -# make sure to set up cron with "runcli.py sync" if set to True -# The idea is to have sync run every hour and leave MAX_DB_AGE big enough not to drop local_cache -# before sync finishes -MAX_DB_AGE = 14400 # Max cache age allowed in seconds (240 minutes) +# Data older than this will be considered as non-existent in the cache +# MAX_DB_AGE will be ignored as per IGNORE_CACHE_TIMESTAMP setting bellow, +# some data from caches might be used no matter their age +MAX_DB_AGE = 172800 # Max cache age allowed in seconds (2 days) # IGNORE_CACHE_TIMESTAMP sets how oraculum verifies data aga in cache # values can be 'db' or None @@ -34,7 +33,6 @@ MAX_DB_AGE = 14400 # Max cache age allowed in seconds (240 minutes) # None - verify age of database caches, plan refresh if data are too old IGNORE_CACHE_TIMESTAMP = 'db' - ACTIVITY_REQUIRED = 14 # Cache data for users who used the service in at least last 14 days PACKAGE_MAINTAINERS_JSON_URL = "https://src.fedoraproject.org/extras/pagure_owner_alias.json" @@ -99,11 +97,8 @@ SYNC_INTERVALS["packager_dashboard_package_prs"] = 7200 SYNC_INTERVALS["packager-dashboard_abrt_issues"] = 7200 SYNC_INTERVALS["package_versions_generic"] = 43200 -# Set how old can user static info be, default is 12 hours -STATIC_INFO_ROT_AFTER = 43200 - # Every BZ_FULL_EVERY_N_TIMES sync will be full instead of partial fast path sync -BZ_FULL_EVERY_N_TIMES = 12 +BZ_FULL_EVERY_N_TIMES = 6 # Specify time (seconds) after which are tasks considered to be in "rotting state" TASKS_ROT_AFTER = 7200 diff --git a/oraculum/config.py b/oraculum/config.py index b39aa58..f65ee7a 100644 --- a/oraculum/config.py +++ b/oraculum/config.py @@ -61,7 +61,7 @@ class Config(object): # None - verify age of database caches, plan refresh if data are too old IGNORE_CACHE_TIMESTAMP = 'db' - MAX_DB_AGE = 14400 # keep data cached for 240 minutes + MAX_DB_AGE = 172800 # keep data cached for 2 days ACTIVITY_REQUIRED = 14 # cache users who used the service in at least last 14 days PACKAGE_MAINTAINERS_JSON_URL = 'https://src.fedoraproject.org/extras/pagure_owner_alias.json' @@ -124,11 +124,8 @@ class Config(object): SYNC_INTERVALS["packager-dashboard_abrt_issues"] = 7200 SYNC_INTERVALS["package_versions_generic"] = 43200 - # Set how old can user static info be, default is 12 hours - STATIC_INFO_ROT_AFTER = 43200 - # Every BZ_FULL_EVERY_N_TIMES sync will be full instead of partial fast path sync - BZ_FULL_EVERY_N_TIMES = 12 + BZ_FULL_EVERY_N_TIMES = 6 # Specify time (seconds) after which are tasks considered to be in "rotting state" TASKS_ROT_AFTER = 7200 diff --git a/oraculum/utils/cache_utils.py b/oraculum/utils/cache_utils.py index 2b05839..818f63a 100644 --- a/oraculum/utils/cache_utils.py +++ b/oraculum/utils/cache_utils.py @@ -120,15 +120,14 @@ class Cached(): seen = set() cache_ages = {} for result in from_db: - if not self._new_enough(result): - continue item = what_to_item[result.provider] data[item] = result.data static_data_timestamps[item] = result.static_timestamps cache_ages[item] = result.time_created.isoformat() - seen.add(item) + if self._new_enough(result, True): + seen.add(item) - # Schedule refresh for any missing result that we've asked for + # Schedule refresh for any missing or "too old" result that we've asked for missing = set(items).difference(seen) if refresh_on_miss: @@ -156,11 +155,10 @@ class Cached(): def get(self, what, *args, **kwargs): return self.__get(True, "medium", what, *args, **kwargs) - def _new_enough(self, cached_object): + def _new_enough(self, cached_object, strict=False): if cached_object is None: return False - # If cached_object is an instance of CachedData, it is from the database - if app.config['IGNORE_CACHE_TIMESTAMP'] == "db" and isinstance(cached_object, CachedData): + if not strict and app.config['IGNORE_CACHE_TIMESTAMP'] == "db": return True if datetime.datetime.utcnow() > cached_object.time_created + self.max_cache_age: return False