| |
@@ -61,6 +61,42 @@
|
| |
return 0;
|
| |
}
|
| |
|
| |
+ /*
|
| |
+ * On some cloud platforms the time functions don't always work as expected.
|
| |
+ * We should be checking for errors. Since getting the time is so important
|
| |
+ * to the server, if we do get an error we should retry a few times before
|
| |
+ * giving up.
|
| |
+ */
|
| |
+ static int32_t
|
| |
+ slapi_clock_gettime(int64_t clock_id, struct timespec *tp)
|
| |
+ {
|
| |
+ int32_t count = 0;
|
| |
+
|
| |
+ while (count < 5) {
|
| |
+ if (clock_gettime(clock_id, tp) == 0) {
|
| |
+ /* Success, we got the time */
|
| |
+ if (count > 0) {
|
| |
+ /* This succeeded on a retry, log a follow up message */
|
| |
+ slapi_log_err(SLAPI_LOG_NOTICE, "slapi_clock_gettime",
|
| |
+ "Successfully retrieved system time\n");
|
| |
+ }
|
| |
+ return 0;
|
| |
+ } else {
|
| |
+ /* Failed to get the system time */
|
| |
+ slapi_log_err(SLAPI_LOG_ERR, "slapi_clock_gettime",
|
| |
+ "Getting system time failed! Errno (%d) (%s) Trying again ...\n",
|
| |
+ errno, slapd_system_strerror(errno));
|
| |
+ DS_Sleep(PR_MillisecondsToInterval(100));
|
| |
+ count++;
|
| |
+ }
|
| |
+ }
|
| |
+ slapi_log_err(SLAPI_LOG_CRIT, "slapi_clock_gettime",
|
| |
+ "Getting system time failed! Errno (%d) (%s) Giving up.\n",
|
| |
+ errno, slapd_system_strerror(errno));
|
| |
+ return -1;
|
| |
+
|
| |
+ }
|
| |
+
|
| |
time_t
|
| |
current_time(void)
|
| |
{
|
| |
@@ -69,8 +105,8 @@
|
| |
* but this should be removed in favour of the
|
| |
* more accurately named slapi_current_utc_time
|
| |
*/
|
| |
- struct timespec now;
|
| |
- clock_gettime(CLOCK_REALTIME, &now);
|
| |
+ struct timespec now = {0};
|
| |
+ slapi_clock_gettime(CLOCK_REALTIME, &now);
|
| |
return now.tv_sec;
|
| |
}
|
| |
|
| |
@@ -83,24 +119,24 @@
|
| |
struct timespec
|
| |
slapi_current_rel_time_hr(void)
|
| |
{
|
| |
- struct timespec now;
|
| |
- clock_gettime(CLOCK_MONOTONIC, &now);
|
| |
+ struct timespec now = {0};
|
| |
+ slapi_clock_gettime(CLOCK_MONOTONIC, &now);
|
| |
return now;
|
| |
}
|
| |
|
| |
struct timespec
|
| |
slapi_current_utc_time_hr(void)
|
| |
{
|
| |
- struct timespec ltnow;
|
| |
- clock_gettime(CLOCK_REALTIME, <now);
|
| |
+ struct timespec ltnow = {0};
|
| |
+ slapi_clock_gettime(CLOCK_REALTIME, <now);
|
| |
return ltnow;
|
| |
}
|
| |
|
| |
time_t
|
| |
slapi_current_utc_time(void)
|
| |
{
|
| |
- struct timespec ltnow;
|
| |
- clock_gettime(CLOCK_REALTIME, <now);
|
| |
+ struct timespec ltnow = {0};
|
| |
+ slapi_clock_gettime(CLOCK_REALTIME, <now);
|
| |
return ltnow.tv_sec;
|
| |
}
|
| |
|
| |
@@ -108,9 +144,9 @@
|
| |
slapi_timestamp_utc_hr(char *buf, size_t bufsize)
|
| |
{
|
| |
PR_ASSERT(bufsize >= SLAPI_TIMESTAMP_BUFSIZE);
|
| |
- struct timespec ltnow;
|
| |
+ struct timespec ltnow = {0};
|
| |
struct tm utctm;
|
| |
- clock_gettime(CLOCK_REALTIME, <now);
|
| |
+ slapi_clock_gettime(CLOCK_REALTIME, <now);
|
| |
gmtime_r(&(ltnow.tv_sec), &utctm);
|
| |
strftime(buf, bufsize, "%Y%m%d%H%M%SZ", &utctm);
|
| |
}
|
| |
@@ -252,7 +288,7 @@
|
| |
expire->tv_sec = 0;
|
| |
expire->tv_nsec = 0;
|
| |
} else {
|
| |
- clock_gettime(CLOCK_MONOTONIC, expire);
|
| |
+ slapi_clock_gettime(CLOCK_MONOTONIC, expire);
|
| |
expire->tv_sec += timeout;
|
| |
}
|
| |
}
|
| |
@@ -278,8 +314,8 @@
|
| |
if (expire->tv_sec == 0 && expire->tv_nsec == 0) {
|
| |
return TIMER_CONTINUE;
|
| |
}
|
| |
- struct timespec now;
|
| |
- clock_gettime(CLOCK_MONOTONIC, &now);
|
| |
+ struct timespec now = {0};
|
| |
+ slapi_clock_gettime(CLOCK_MONOTONIC, &now);
|
| |
if (now.tv_sec > expire->tv_sec ||
|
| |
(expire->tv_sec == now.tv_sec && now.tv_sec > expire->tv_nsec)) {
|
| |
return TIMER_EXPIRED;
|
| |
Bug Description:
On some cloud platforms we see issues with replication time skew. It's not clear if the time functions are failing or just returning the wrong value. We do not check the result code from any of the time functions.
Fix Description:
Add a wrapper function for clock_gettime() that will check for errors, log a message, and retry a few times. We retry because getting the time is so critical to things like replication and logging.
Also added a check for large jump in CSN generation times, and we log a message if detected.
relates: https://pagure.io/389-ds-base/issue/51095