#50878 Ticket 50877 - task to run tests of csn generator
Closed 3 years ago by spichugi. Opened 4 years ago by tbordaz.
tbordaz/389-ds-base ticket_50877  into  master

@@ -11,6 +11,7 @@ 

  from lib389.tasks import *

  from lib389.utils import *

  from lib389.topologies import topology_m4 as topo_m4

+ from lib389.topologies import topology_m2 as topo_m2

  from . import get_repl_entries

  from lib389.idm.user import UserAccount

  from lib389.replica import ReplicationManager
@@ -499,6 +500,26 @@ 

      log.info('Check the error log for the error')

      assert topo_m4.ms["master1"].ds_error_log.match('.*nsds5ReplicaBackoffMax.*10.*invalid.*')

  

+ def test_csngen_task(topo_m2):

+     """Test csn generator test

+ 

+     :id: b976849f-dbed-447e-91a7-c877d5d71fd0

+     :setup: MMR with 2 masters

+     :steps:

+         1. Create a csngen_test task

+         2. Check that debug messages "_csngen_gen_tester_main" are in errors logs

+     :expectedresults:

+         1. Should succeeds

+         2. Should succeeds

+     """

+     m1 = topo_m2.ms["master1"]

+     csngen_task = csngenTestTask(m1)

+     csngen_task.create(properties={

+         'ttl': '300'

+     })

+     time.sleep(10)

+     log.info('Check the error log contains strings showing csn generator is tested')

+     assert m1.searchErrorsLog("_csngen_gen_tester_main")

  

  if __name__ == '__main__':

      # Run isolated

@@ -812,6 +812,11 @@ 

      char *replGen;

  } cleanruv_purge_data;

  

+ typedef struct _csngen_test_data

+ {

+     Slapi_Task *task;

+ } csngen_test_data;

+ 

  /* In repl5_replica_config.c */

  int replica_config_init(void);

  void replica_config_destroy(void);

@@ -50,6 +50,7 @@ 

  static int replica_config_delete(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *entryAfter, int *returncode, char *returntext, void *arg);

  static int replica_config_search(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *entryAfter, int *returncode, char *returntext, void *arg);

  static int replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, int *returncode, char *returntext, void *arg);

+ static int replica_csngen_test_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, int *returncode, char *returntext, void *arg);

  static int replica_config_change_type_and_id(Replica *r, const char *new_type, const char *new_id, char *returntext, int apply_mods);

  static int replica_config_change_updatedn(Replica *r, const LDAPMod *mod, char *returntext, int apply_mods);

  static int replica_config_change_updatedngroup(Replica *r, const LDAPMod *mod, char *returntext, int apply_mods);
@@ -167,6 +168,17 @@ 

      slapi_task_register_handler("cleanallruv", replica_cleanall_ruv_task);

      slapi_task_register_handler("abort cleanallruv", replica_cleanall_ruv_abort);

  

+     /* register the csngen_test task

+      *

+      * To start the test, create a task

+      * dn: cn=run the test,cn=csngen_test,cn=tasks,cn=config

+      * objectclass: top

+      * objectclass: extensibleobject

+      * cn: run the test

+      * ttl: 300

+      */

+     slapi_task_register_handler("csngen_test", replica_csngen_test_task);

+ 

      return 0;

  }

  
@@ -1370,6 +1382,61 @@ 

      slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "cleanAllRUV_task - Finished successfully\n");

      return LDAP_SUCCESS;

  }

+ /* This thread runs the tests of csn generator.

+  * It will log a set of csn generated while simulating local and remote time skews

+  * All csn should increase

+  */

+ void

+ replica_csngen_test_thread(void *arg)

+ {

+     csngen_test_data *data = (csngen_test_data *)arg;

+     int rc = 0;

+     if (data->task) {

+         slapi_task_inc_refcount(data->task);

+         slapi_log_err(SLAPI_LOG_INFO, repl_plugin_name, "replica_csngen_test_thread --> refcount incremented.\n");

+     }

+ 

+     /* defined in csngen.c */

+     csngen_test();

+ 

+     if (data->task) {

+         slapi_task_finish(data->task, rc);

+         slapi_task_dec_refcount(data->task);

+         slapi_log_err(SLAPI_LOG_INFO, repl_plugin_name, "replica_csngen_test_thread <-- refcount incremented.\n");

+     }

+ }

+ 

+ /* It spawn a thread running the test of a csn generator */

+ static int

+ replica_csngen_test_task(Slapi_PBlock *pb __attribute__((unused)),

+                           Slapi_Entry *e,

+                           Slapi_Entry *eAfter __attribute__((unused)),

+                           int *returncode,

+                           char *returntext,

+                           void *arg __attribute__((unused)))

+ {

+     Slapi_Task *task = NULL;

+     csngen_test_data *data;

+     PRThread *thread = NULL;

+     int rc = SLAPI_DSE_CALLBACK_OK;

+ 

+     /* allocate new task now */

+     task = slapi_new_task(slapi_entry_get_ndn(e));

+     data = (csngen_test_data *)slapi_ch_calloc(1, sizeof(csngen_test_data));

+     data->task = task;

+ 

+     thread = PR_CreateThread(PR_USER_THREAD, replica_csngen_test_thread,

+                              (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,

+                              PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE);

+     if (thread == NULL) {

+         *returncode = LDAP_OPERATIONS_ERROR;

+         rc = SLAPI_DSE_CALLBACK_ERROR;

+     }

+     if (rc != SLAPI_DSE_CALLBACK_OK) {

+         slapi_task_finish(task, rc);

+     }

+     return rc;

+ }

  

  static int

  replica_cleanall_ruv_task(Slapi_PBlock *pb __attribute__((unused)),

file modified
+39 -16
@@ -247,9 +247,32 @@ 

  }

  

  /* this function should be called when a remote CSN for the same part of

-    the dit becomes known to the server (for instance, as part of RUV during

-    replication session. In response, the generator would adjust its notion

-    of time so that it does not generate smaller csns */

+  * the dit becomes known to the server (for instance, as part of RUV during

+  * replication session. In response, the generator would adjust its notion

+  * of time so that it does not generate smaller csns

+  *

+  * The following counters are updated

+  *   - when a new csn is generated

+  *   - when csngen is adjusted (beginning of a incoming (extop) or outgoing

+  *     (inc_protocol) session)

+  *

+  * sampled_time: It takes the value of current system time.

+  *

+  * remote offset: it is updated when 'csn' argument is ahead of the next csn

+  * that the csn generator will generate. It is the MAX jump ahead, it is not

+  * cumulative counter (e.g. if remote_offset=7 and 'csn' is 5sec ahead

+  * remote_offset stays the same. The jump ahead (5s) pour into the local offset.

+  * It is not clear of the interest of this counter. It gives an indication of

+  * the maximum jump ahead but not much.

+  *

+  * local offset: it is increased if

+  *   - system time is going backward (compare sampled_time)

+  *   - if 'csn' argument is ahead of csn that the csn generator would generate

+  *     AND diff('csn', csngen.new_csn) < remote_offset

+  *     then the diff "pour" into local_offset

+  *  It is decreased as the clock is ticking, local offset is "consumed" as

+  *  sampled_time progresses.

+  */

  int

  csngen_adjust_time(CSNGen *gen, const CSN *csn)

  {
@@ -286,7 +309,7 @@ 

          (CSN_SUCCESS != (rc = _csngen_adjust_local_time(gen, cur_time)))) {

          /* _csngen_adjust_local_time will log error */

          slapi_rwlock_unlock(gen->lock);

-         csngen_dump_state(gen);

+         csngen_dump_state(gen, SLAPI_LOG_DEBUG);

          return rc;

      }

  
@@ -312,7 +335,7 @@ 

                                "Adjustment limit exceeded; value - %ld, limit - %ld\n",

                                remote_offset, (long)CSN_MAX_TIME_ADJUST);

                  slapi_rwlock_unlock(gen->lock);

-                 csngen_dump_state(gen);

+                 csngen_dump_state(gen, SLAPI_LOG_DEBUG);

                  return CSN_LIMIT_EXCEEDED;

              }

          } else if (remote_offset > 0) { /* still need to account for this */
@@ -421,16 +444,16 @@ 

  

  /* debugging function */

  void

- csngen_dump_state(const CSNGen *gen)

+ csngen_dump_state(const CSNGen *gen, int severity)

  {

      if (gen) {

          slapi_rwlock_rdlock(gen->lock);

-         slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "CSN generator's state:\n");

-         slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\treplica id: %d\n", gen->state.rid);

-         slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tsampled time: %ld\n", gen->state.sampled_time);

-         slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tlocal offset: %ld\n", gen->state.local_offset);

-         slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tremote offset: %ld\n", gen->state.remote_offset);

-         slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tsequence number: %d\n", gen->state.seq_num);

+         slapi_log_err(severity, "csngen_dump_state", "CSN generator's state:\n");

+         slapi_log_err(severity, "csngen_dump_state", "\treplica id: %d\n", gen->state.rid);

+         slapi_log_err(severity, "csngen_dump_state", "\tsampled time: %ld\n", gen->state.sampled_time);

+         slapi_log_err(severity, "csngen_dump_state", "\tlocal offset: %ld\n", gen->state.local_offset);

+         slapi_log_err(severity, "csngen_dump_state", "\tremote offset: %ld\n", gen->state.remote_offset);

+         slapi_log_err(severity, "csngen_dump_state", "\tsequence number: %d\n", gen->state.seq_num);

          slapi_rwlock_unlock(gen->lock);

      }

  }
@@ -445,7 +468,7 @@ 

      CSNGen *gen = csngen_new(255, NULL);

  

      slapi_log_err(SLAPI_LOG_DEBUG, "csngen_test", "staring csn generator test ...");

-     csngen_dump_state(gen);

+     csngen_dump_state(gen, SLAPI_LOG_INFO);

  

      rc = _csngen_start_test_threads(gen);

      if (rc == 0) {
@@ -453,7 +476,7 @@ 

      }

  

      _csngen_stop_test_threads();

-     csngen_dump_state(gen);

+     csngen_dump_state(gen, SLAPI_LOG_INFO);

      slapi_log_err(SLAPI_LOG_DEBUG, "csngen_test", "csn generator test is complete...");

  }

  
@@ -783,7 +806,7 @@ 

                                "Failed to adjust generator's time; csn error - %d\n", rc);

              }

  

-             csngen_dump_state(gen);

+             csngen_dump_state(gen, SLAPI_LOG_INFO);

          }

          csn_free(&csn);

  
@@ -811,7 +834,7 @@ 

           * g_sampled_time -= slapi_rand () % 100;

           */

  

-         csngen_dump_state(gen);

+         csngen_dump_state(gen, SLAPI_LOG_INFO);

      }

  

      PR_AtomicDecrement(&s_thread_count);

@@ -272,7 +272,7 @@ 

  void csngen_unregister_callbacks(CSNGen *gen, void *cookie);

  

  /* debugging function */

- void csngen_dump_state(const CSNGen *gen);

+ void csngen_dump_state(const CSNGen *gen, int severity);

  

  /* this function tests csn generator */

  void csngen_test(void);

@@ -189,6 +189,19 @@ 

          return super(USNTombstoneCleanupTask, self)._validate(rdn, properties, basedn)

  

  

+ class csngenTestTask(Task):

+     """A single instance of csn generator test task entry

+ 

+     :param instance: An instance

+     :type instance: lib389.DirSrv

+     """

+ 

+     def __init__(self, instance, dn=None):

+         self.cn = 'csngenTest_' + Task._get_task_date()

+         dn = "cn=" + self.cn + ",cn=csngen_test," + DN_TASKS

+ 

+         super(csngenTestTask, self).__init__(instance, dn)

+ 

  class SchemaReloadTask(Task):

      """A single instance of schema reload task entry

  

Bug Description:
It exists a test of csn generator (csngen_test()).
It is not called from any function.

Fix Description:
register a task container 'cn=csngen_test,cn=tasks,cn=config"
that calls the test function

https://pagure.io/389-ds-base/issue/50877

Reviewed by: ?

Platforms tested: F29

Flag Day: no

Doc impact: no

This code all looks fine, is there a reason not to put it behind -DDEBUG though? IE is there a need for end-users to be able to run this?

This code all looks fine, is there a reason not to put it behind -DDEBUG though? IE is there a need for end-users to be able to run this?

I think we frequently have customer deployments showing issues we cannot reproduce. Being able to run this test could be useful. I would not document it though.

@firstyear , @lkrispen thanks for the review.
From test pov, calling csngen_test could help to detect a regression if csn generator code changes. For example it is easier to simulate remote/offset with this tests than to play with the system time on a test topology. Regarding tests I think some improvements can be done:
- we should create a tool to parse logs can confirm generated CSN always increase
- check generated CSN conforms the csn generator tuning

For detection in customer deployement we also need more work. at the moment it tests adjustment functions with random csn ahead. It would be interesting to see how csn generator behave with adjustment in the past. An other option would be to replay in real time CSN logged in access log.
Running on customer machine it would also be impacted by system time shift.

Okay, I'm happy with this :) It may be good to have lib389 tests and functions related to this too to help us automate this - we can even make tools into the cli that don't list on --help, but could create the test if required.

@firstyear, was it a ack ?

sorry for the delay, the PR got out of my radar but it could now be useful for additional tests with a library playing with the time.

Yes it's an ack, but also would be good to have some lib389 tests for it later too to show it works as you want :)

rebased onto 25f46b6931d328e86eaa37185a454010b551360e

3 years ago

rebased onto 88a33cde321bc178a16949494132d1908864ac74

3 years ago

rebased onto 69a389e

3 years ago

Thanks. Updated patch with lib389 and testcase :)

Pull-Request has been merged by tbordaz

3 years ago

389-ds-base is moving from Pagure to Github. This means that new issues and pull requests
will be accepted only in 389-ds-base's github repository.

This pull request has been cloned to Github as issue and is available here:
- https://github.com/389ds/389-ds-base/issues/3931

If you want to continue to work on the PR, please navigate to the github issue,
download the patch from the attachments and file a new pull request.

Thank you for understanding. We apologize for all inconvenience.

Pull-Request has been closed by spichugi

3 years ago