#50268 Ticket 49873: (cont) Contention on virtual attribute lookup
Closed 3 years ago by spichugi. Opened 5 years ago by tbordaz.
tbordaz/389-ds-base ticket_49873_cont  into  master

@@ -1509,7 +1509,6 @@ 

      long bypasspollcnt = 0;

  

      enable_nunc_stans = config_get_enable_nunc_stans();

-     vattr_global_lock_init();

  #if defined(hpux)

      /* Arrange to ignore SIGPIPE signals. */

      SIGNAL(SIGPIPE, SIG_IGN);

@@ -950,6 +950,10 @@ 

          return_value = 1;

          goto cleanup;

      }

+     /* The thread private counter needs to be allocated after the fork

+      * it is not inherited from parent process

+      */

+     vattr_global_lock_create();

  

      /*

       * Create our thread pool here for tasks to utilise.

@@ -987,7 +987,7 @@ 

          slapi_be_Unlock(be_single);

      }

      if (vattr_lock_acquired) {

-         vattr_unlock();

+         vattr_rd_unlock();

      }

  

  free_and_return_nolock:

@@ -1419,9 +1419,11 @@ 

   * vattr.c

   */

  void vattr_init(void);

- void vattr_global_lock_init(void);

+ void vattr_global_lock_create(void);

  void vattr_rdlock();

- void vattr_unlock();

+ void vattr_rd_unlock();

+ void vattr_wrlock();

+ void vattr_wr_unlock();

  void vattr_cleanup(void);

  

  /*

@@ -267,7 +267,6 @@ 

      Operation *pb_op = NULL;

  

      g_incr_active_threadcnt();

-     vattr_global_lock_init();

  

      slapi_pblock_get(ps->ps_pblock, SLAPI_CONNECTION, &pb_conn);

      slapi_pblock_get(ps->ps_pblock, SLAPI_OPERATION, &pb_op);

file modified
+93 -33
@@ -119,20 +119,45 @@ 

  vattr_init()

  {

      statechange_api = 0;

-     PR_NewThreadPrivateIndex(&thread_private_global_vattr_lock, NULL);

      vattr_map_create();

  

  #ifdef VATTR_TEST_CODE

      vattr_basic_sp_init();

  #endif

  }

- 

+ /* Create a private variable for each individual thread of the current process */

  void

- vattr_global_lock_init()

+ vattr_global_lock_create()

+ {

+     if (PR_NewThreadPrivateIndex(&thread_private_global_vattr_lock, NULL) != PR_SUCCESS) {

+         slapi_log_err(SLAPI_LOG_ALERT,

+               "vattr_global_lock_create", "Failure to create global lock for virtual attribute !\n");

+         PR_ASSERT(0);

+     }

+ }

+ static int

+ global_vattr_lock_get_acquired_count()

  {

-     if (thread_private_global_vattr_lock) {

-         PR_SetThreadPrivate(thread_private_global_vattr_lock, (void *) 0);

+     int *nb_acquired;

+     nb_acquired = (int *) PR_GetThreadPrivate(thread_private_global_vattr_lock);

+     if (nb_acquired == NULL) {

+         /* if it was not initialized set it to zero */

+         nb_acquired = (int *) slapi_ch_calloc(1, sizeof(int));

+         PR_SetThreadPrivate(thread_private_global_vattr_lock, (void *) nb_acquired);

      }

+     return *nb_acquired;

+ }

+ static void

+ global_vattr_lock_set_acquired_count(int nb_acquired)

+ {

+     int *val;

+     val = (int *) PR_GetThreadPrivate(thread_private_global_vattr_lock);

+     if (val == NULL) {

+         /* if it was not initialized set it to zero */

+         val = (int *) slapi_ch_calloc(1, sizeof(int));

+     }

+     *val = nb_acquired;

+     PR_SetThreadPrivate(thread_private_global_vattr_lock, (void *) val);

  }

  /* The map lock can be acquired recursively. So only the first rdlock

   * will acquire the lock.
@@ -142,18 +167,15 @@ 

  void

  vattr_rdlock()

  {

-     if (thread_private_global_vattr_lock) {

-         int nb_acquire = (int) PR_GetThreadPrivate(thread_private_global_vattr_lock);

+     int nb_acquire = global_vattr_lock_get_acquired_count();

  

-         if (nb_acquire == 0) {

-             /* The lock was not held just acquire it */

-             slapi_rwlock_rdlock(the_map->lock);

-         }

-         nb_acquire++;

-         PR_SetThreadPrivate(thread_private_global_vattr_lock, (void *) nb_acquire);

-     } else {

+     if (nb_acquire == 0) {

+         /* The lock was not held just acquire it */

          slapi_rwlock_rdlock(the_map->lock);

      }

+     nb_acquire++;

+     global_vattr_lock_set_acquired_count(nb_acquire);

+ 

  }

  /* The map lock can be acquired recursively. So only the last unlock

   * will release the lock.
@@ -161,25 +183,63 @@ 

   * later calls during the operation processing will just increase/decrease a counter.

   */

  void

- vattr_unlock()

+ vattr_rd_unlock()

  {

-     if (thread_private_global_vattr_lock) {

-         int nb_acquire = (int) PR_GetThreadPrivate(thread_private_global_vattr_lock);

+     int nb_acquire = global_vattr_lock_get_acquired_count();

  

-         if (nb_acquire >= 1) {

-             nb_acquire--;

-             if (nb_acquire == 0) {

-                 slapi_rwlock_unlock(the_map->lock);

-             }

-             PR_SetThreadPrivate(thread_private_global_vattr_lock, (void *) nb_acquire);

-         } else {

-             slapi_log_err(SLAPI_LOG_CRIT,

-               "vattr_unlock", "The lock was not acquire. We should not be here\n");

-             PR_ASSERT(nb_acquire >= 1);

+     if (nb_acquire >= 1) {

+         nb_acquire--;

+         if (nb_acquire == 0) {

+             slapi_rwlock_unlock(the_map->lock);

          }

+         global_vattr_lock_set_acquired_count(nb_acquire);

      } else {

+         /* this is likely the consequence of lock acquire in read during an internal search

+          * but the search callback updated the map and release the readlock and acquired

+          * it in write.

+          * So after the update the lock was no longer held but when completing the internal

+          * search we release the global read lock, that now has nothing to do

+          */

+         slapi_log_err(SLAPI_LOG_INFO,

+           "vattr_rd_unlock", "vattr lock no longer acquired in read.\n");

+     }

+ }

+ 

+ /* The map lock is acquired in write (updating the map)

+  * It exists a possibility that lock is acquired in write while it is already

+  * hold in read by this thread (internal search with updating callback)

+  * In such situation, the we must abandon the read global lock and acquire in write

+  */

+ void

+ vattr_wrlock()

+ {

+     int nb_read_acquire = global_vattr_lock_get_acquired_count();

+ 

+     if (nb_read_acquire) {

+         /* The lock was acquired in read but we need it in write

+          * release it and set the global vattr_lock counter to 0

+          */

          slapi_rwlock_unlock(the_map->lock);

+         global_vattr_lock_set_acquired_count(0);

      }

+     slapi_rwlock_wrlock(the_map->lock);

+ }

+ /* The map lock is release from a write write (updating the map)

+  */

+ void

+ vattr_wr_unlock()

+ {

+     int nb_read_acquire = global_vattr_lock_get_acquired_count();

+ 

+     if (nb_read_acquire) {

+         /* The lock being acquired in write, the private thread counter

+          * (that count the number of time it was acquired in read) should be 0

+          */

+         slapi_log_err(SLAPI_LOG_INFO,

+           "vattr_unlock", "The lock was acquired in write. We should not be here\n");

+         PR_ASSERT(nb_read_acquire == 0);

+     }

+     slapi_rwlock_unlock(the_map->lock);

  }

  /* Called on server shutdown, free all structures, inform service providers that we're going down etc */

  void
@@ -1999,7 +2059,7 @@ 

      *result = (vattr_map_entry *)PL_HashTableLookupConst(the_map->hashtable,

                                                           (void *)basetype);

      /* Release ze lock */

-     vattr_unlock();

+     vattr_rd_unlock();

  

      if (tmp) {

          slapi_ch_free_string(&tmp);
@@ -2018,13 +2078,13 @@ 

  {

      PR_ASSERT(the_map);

      /* Get the writer lock */

-     slapi_rwlock_wrlock(the_map->lock);

+     vattr_wrlock();

      /* Insert the thing */

      /* It's illegal to call this function if the entry is already there */

      PR_ASSERT(NULL == PL_HashTableLookupConst(the_map->hashtable, (void *)vae->type_name));

      PL_HashTableAdd(the_map->hashtable, (void *)vae->type_name, (void *)vae);

      /* Unlock and we're done */

-     slapi_rwlock_unlock(the_map->lock);

+     vattr_wr_unlock();

      return 0;

  }

  
@@ -2161,13 +2221,13 @@ 

                          void *caller_data __attribute__((unused)))

  {

      /* Get the writer lock */

-     slapi_rwlock_wrlock(the_map->lock);

+     vattr_wrlock();

  

      /* go through the list */

      PL_HashTableEnumerateEntries(the_map->hashtable, vattr_map_entry_rebuild_schema, 0);

  

      /* Unlock and we're done */

-     slapi_rwlock_unlock(the_map->lock);

+     vattr_wr_unlock();

  }

  

  
@@ -2204,7 +2264,7 @@ 

                          obj = obj->pNext;

                      }

  

-                     vattr_unlock();

+                     vattr_rd_unlock();

                  }

  

                  slapi_valueset_free(vs);

Bug Description:
The previous fix was incomplete.
First it created the thread private counter before the fork.
The deamon process was not inheriting it.

Second there is a possiblity that an callback of an internal search
tries to update the map. (cos thread monitoring cos definition)
In such case the RW lock was first acquired in read at the top level
of the internal search, then later the callback try to acquire it in write.
this created a deadlock

Fix Description:
The fix consists to create the thread private counter after the deamon creation.
In adding, when acquiring the lock in write, if the lock was already acquired
at the top level (in read), it release the lock and reset the counter. Then acquires
the lock in write.
In the opposite when releasing the lock in read, if the lock was not already acquired
it assumes it was acquired in write and do nothing

https://pagure.io/389-ds-base/issue/49873

Reviewed by: ?

Platforms tested: F30

Flag Day: no

Doc impact: no

There should be no case where there isn't a thread_private_global_vattr_lock though, so why do we check this condition? If we are missing this ,then this could hit a self deadlock case?

Certainly, this is a bug as I think of it. The condition is checking "do we have a thread private value present", not "what is the content of the thread private value. So I think perhaps this could be the cause of some confusion?

@tbordaz, you are still not using the integer pointers correctly. There are a bunch of compiler warnings like:

../389-ds-base/ldap/servers/slapd/vattr.c: In function ‘vattr_rdlock’:
../389-ds-base/ldap/servers/slapd/vattr.c:150:26: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
         int nb_acquire = (int) PR_GetThreadPrivate(thread_private_global_vattr_lock);

Please see this comment on how to fix it:

https://pagure.io/389-ds-base/issue/49873#comment-557494

Thanks,
Mark

At the moment the private index is defined but if it was not (decision to disable this mechanism or a bug), it will fallback to previous behavior.

@tbordaz I think I'd rather see PR_ASSERT(private index) and have the "old behaviour" in #ifdef instead of doing an actual if check every time if the intent is to "feature gate" and roll back.

rebased onto 1a9d414522c65b21baa0746d2e215fdc7a37ddfc

5 years ago

This version appears to have fixed the deadlock I was encountering, ack

rebased onto 6d0ba29

5 years ago

Pull-Request has been merged by tbordaz

5 years ago

389-ds-base is moving from Pagure to Github. This means that new issues and pull requests
will be accepted only in 389-ds-base's github repository.

This pull request has been cloned to Github as issue and is available here:
- https://github.com/389ds/389-ds-base/issues/3327

If you want to continue to work on the PR, please navigate to the github issue,
download the patch from the attachments and file a new pull request.

Thank you for understanding. We apologize for all inconvenience.

Pull-Request has been closed by spichugi

3 years ago