#50650 Issue 50646 - Improve task handling during shutdowns
Merged 2 months ago by mreynolds. Opened 2 months ago by mreynolds.
mreynolds/389-ds-base issue50646  into  389-ds-base-1.4.1

@@ -1626,7 +1626,10 @@ 

  void

  import_main(void *arg)

this is now in db-bdb/bdb_inmport.c.

do you prepare another version on master ?

  {

+     /* For online import tasks increment/decrement the global thread count */

+     g_incr_active_threadcnt();

      import_main_offline(arg);

+     g_decr_active_threadcnt();

  }

  

  int

@@ -1116,6 +1116,11 @@ 

          ns_thrpool_wait(tp);

      }

  

+     if (!in_referral_mode) {

+         /* signal tasks to start shutting down */

+         task_cancel_all();

+     }

+ 

      threads = g_get_active_threadcnt();

      if (threads > 0) {

          slapi_log_err(SLAPI_LOG_INFO, "slapd_daemon",

file modified
+1 -1

@@ -2002,7 +2002,7 @@ 

  {

      Slapi_Entry **entries = NULL;

      Slapi_PBlock *pb = slapi_pblock_new();

-     struct slapdplugin *plugin;

+     struct slapdplugin *plugin = NULL;

      char *query, *dn, *cn;

      int ret = 0;

  

@@ -802,6 +802,7 @@ 

  

  /* begin and end the task subsystem */

  void task_init(void);

+ void task_cancel_all(void);

  void task_shutdown(void);

  void task_cleanup(void);

  

file modified
+25 -15

@@ -26,7 +26,7 @@ 

   */

  static Slapi_Task *global_task_list = NULL;

  static PRLock *global_task_lock = NULL;

- static int shutting_down = 0;

+ static uint64_t shutting_down = 0;

  

  /***********************************

   * Private Defines

@@ -582,7 +582,7 @@ 

      Slapi_Task *task = NULL;

      char *dn = NULL;

  

-     if (rawdn == NULL) {

+     if (rawdn == NULL || shutting_down) {

          return NULL;

      }

  

@@ -611,9 +611,20 @@ 

      PR_Lock(task->task_log_lock);

  

      PR_Lock(global_task_lock);

+     if (shutting_down) {

+         /* Abort!  Free everything and return NULL */

+         PR_Unlock(task->task_log_lock);

+         PR_Unlock(global_task_lock);

+         PR_DestroyLock(task->task_log_lock);

+         slapi_ch_free((void **)&task);

+         slapi_ch_free_string(&dn);

+         slapi_log_err(SLAPI_LOG_ERR, "new_task", "Server is shutting down, aborting task: %s\n", rawdn);

+         return NULL;

+     }

      task->next = global_task_list;

      global_task_list = task;

      PR_Unlock(global_task_lock);

+ 

      task->task_dn = dn;

      task->task_state = SLAPI_TASK_SETUP;

      task->task_flags = SLAPI_TASK_RUNNING_AS_TASK;

@@ -3002,32 +3013,31 @@ 

  

  /* called when the server is shutting down -- abort all existing tasks */

  void

- task_shutdown(void)

- {

+ task_cancel_all(void) {

      Slapi_Task *task;

-     int found_any = 0;

  

-     /* first, cancel all tasks */

      PR_Lock(global_task_lock);

      shutting_down = 1;

      for (task = global_task_list; task; task = task->next) {

-         if ((task->task_state != SLAPI_TASK_CANCELLED) &&

-             (task->task_state != SLAPI_TASK_FINISHED)) {

+         if (task->task_state != SLAPI_TASK_CANCELLED &&

+             task->task_state != SLAPI_TASK_FINISHED)

+         {

              task->task_state = SLAPI_TASK_CANCELLED;

              if (task->cancel) {

-                 slapi_log_err(SLAPI_LOG_INFO, "task_shutdown", "Cancelling task '%s'\n",

+                 slapi_log_err(SLAPI_LOG_INFO, "task_cancel_all", "Canceling task '%s'\n",

                                task->task_dn);

                  (*task->cancel)(task);

-                 found_any = 1;

              }

          }

      }

+     PR_Unlock(global_task_lock);

+ }

  

-     if (found_any) {

-         /* give any tasks 1 second to say their last rites */

-         DS_Sleep(PR_SecondsToInterval(1));

-     }

- 

+ void

+ task_shutdown(void)

+ {

+     /* Now we can destroy the tasks... */

+     PR_Lock(global_task_lock);

      while (global_task_list) {

          destroy_task(0, global_task_list);

      }

Bug Description:

There is a race condition when stopping the server and a running import task that can cause a heap-use-after-free.

Fix Description:

For an import task, encapsulate the import thread with a global thread increment/decrement (just like the export task). Also improved how tasks are notified to abort by notifying them before we wait for active threads to finish. Then the tasks get destroyed after all threads are complete.

relates: https://pagure.io/389-ds-base/issue/50646

this is now in db-bdb/bdb_inmport.c.

do you prepare another version on master ?

this is now in db-bdb/bdb_inmport.c.
do you prepare another version on master ?

I will, but this needs to get into 1.3.10 first, so I started at 1.4.1 branch, and will front port it to master once ack'ed.

this is now in db-bdb/bdb_inmport.c.
do you prepare another version on master ?

I will, but this needs to get into 1.3.10 first, so I started at 1.4.1 branch, and will front port it to master once ack'ed.

ok, ack

rebased onto 4673148

2 months ago

Pull-Request has been merged by mreynolds

2 months ago