#50822 Ticket 49624 cont - DB Deadlock on modrdn appears to corrupt database and entry cache
Closed 3 years ago by spichugi. Opened 4 years ago by lkrispen.
lkrispen/389-ds-base t49624cont  into  master

@@ -723,7 +723,7 @@ 

      }

      cache->c_maxsize = size;

      if (cache->c_curentries > 0) {

-         slapi_log_err(SLAPI_LOG_WARNING,

+         slapi_log_err(SLAPI_LOG_CACHE,

                        "entrycache_clear_int", "There are still %" PRIu64 " entries "

                                                "in the entry cache.\n",

                        cache->c_curentries);

@@ -3064,7 +3064,7 @@ 

  

      txn_test_init_cfg(&cfg);

  

-     if(BDB_CONFIG(li)->bdb_enable_transactions) {

+     if(!BDB_CONFIG(li)->bdb_enable_transactions) {

          goto end;

      }

  

@@ -67,6 +67,7 @@ 

      Slapi_DN *dn_newsuperiordn = NULL;

      Slapi_DN dn_parentdn;

      Slapi_DN *orig_dn_newsuperiordn = NULL;

+     Slapi_DN *pb_dn_newsuperiordn = NULL; /* used to check what is currently in the pblock */

      Slapi_Entry *target_entry = NULL;

      Slapi_Entry *original_targetentry = NULL;

      int rc;
@@ -248,30 +249,45 @@ 

              slapi_sdn_set_dn_byref(&dn_newrdn, original_newrdn);

              original_newrdn = slapi_ch_strdup(original_newrdn);

  

-             slapi_pblock_get(pb, SLAPI_MODRDN_NEWSUPERIOR_SDN, &dn_newsuperiordn);

-             slapi_sdn_free(&dn_newsuperiordn);

-             slapi_pblock_set(pb, SLAPI_MODRDN_NEWSUPERIOR_SDN, orig_dn_newsuperiordn);

-             dn_newsuperiordn = slapi_sdn_dup(orig_dn_newsuperiordn);

+             /* we need to restart with the original newsuperiordn which could have

+              * been modified. So check what is in the pblock, if it was changed

+              * free it, reset orig dn in th epblock and recreate a working superior

+              */

+             slapi_pblock_get(pb, SLAPI_MODRDN_NEWSUPERIOR_SDN, &pb_dn_newsuperiordn);

+             if (pb_dn_newsuperiordn != orig_dn_newsuperiordn) {

+                 slapi_sdn_free(&pb_dn_newsuperiordn);

+                 slapi_pblock_set(pb, SLAPI_MODRDN_NEWSUPERIOR_SDN, orig_dn_newsuperiordn);

+                 dn_newsuperiordn = slapi_sdn_dup(orig_dn_newsuperiordn);

+             }

              /* must duplicate ec before returning it to cache,

               * which could free the entry. */

-             if ((tmpentry = backentry_dup(original_entry ? original_entry : ec)) == NULL) {

+             if (!original_entry) {

+                 slapi_log_err(SLAPI_LOG_ERR, "ldbm_back_modrdn",

+                     "retrying transaction, but no original entry found\n");

+                 ldap_result_code = LDAP_OPERATIONS_ERROR;

+                 goto error_return;

+             }

+             if ((tmpentry = backentry_dup(original_entry)) == NULL) {

                  ldap_result_code = LDAP_OPERATIONS_ERROR;

                  goto error_return;

              }

              slapi_pblock_get(pb, SLAPI_MODRDN_EXISTING_ENTRY, &ent);

              if (cache_is_in_cache(&inst->inst_cache, ec)) {

                  CACHE_REMOVE(&inst->inst_cache, ec);

-                 if (ent && (ent == ec->ep_entry)) {

-                     /*

-                      * On a retry, it's possible that ec is now stored in the

-                      * pblock as SLAPI_MODRDN_EXISTING_ENTRY.  "ec" will be freed

-                      * by CACHE_RETURN below, so set ent to NULL so don't free

-                      * it again.

-                      */

-                     ent = NULL;

-                 }

+             }

+             if (ent && (ent == ec->ep_entry)) {

+                 /*

+                  * On a retry, it's possible that ec is now stored in the

+                  * pblock as SLAPI_MODRDN_EXISTING_ENTRY.  "ec" will be freed

+                  * by CACHE_RETURN below, so set ent to NULL so don't free

+                  * it again.

+                  * And it needs to be checked always.

+                  */

+                 ent = NULL;

              }

              CACHE_RETURN(&inst->inst_cache, &ec);

+ 

+             /* LK why do we need this ????? */

              if (!cache_is_in_cache(&inst->inst_cache, e)) {

                  if (CACHE_ADD(&inst->inst_cache, e, NULL) < 0) {

                      slapi_log_err(SLAPI_LOG_CACHE,
@@ -1087,8 +1103,9 @@ 

          if (slapi_sdn_get_dn(dn_newsuperiordn) != NULL) {

              retval = ldbm_ancestorid_move_subtree(be, sdn, &dn_newdn, e->ep_id, children, &txn);

              if (retval != 0) {

-                 if (retval == DB_LOCK_DEADLOCK)

+                 if (retval == DB_LOCK_DEADLOCK) {

                      continue;

+                 }

                  if (retval == DB_RUNRECOVERY || LDBM_OS_ERR_IS_DISKFULL(retval))

                      disk_full = 1;

                  MOD_SET_ERROR(ldap_result_code,
@@ -1108,8 +1125,9 @@ 

                                               e->ep_id, &txn, is_tombstone);

              slapi_rdn_done(&newsrdn);

              if (retval != 0) {

-                 if (retval == DB_LOCK_DEADLOCK)

+                 if (retval == DB_LOCK_DEADLOCK) {

                      continue;

+                 }

                  if (retval == DB_RUNRECOVERY || LDBM_OS_ERR_IS_DISKFULL(retval))

                      disk_full = 1;

                  MOD_SET_ERROR(ldap_result_code, LDAP_OPERATIONS_ERROR, retry_count);
@@ -1500,7 +1518,12 @@ 

      done_with_pblock_entry(pb, SLAPI_MODRDN_NEWPARENT_ENTRY);

      done_with_pblock_entry(pb, SLAPI_MODRDN_TARGET_ENTRY);

      slapi_ch_free_string(&original_newrdn);

-     slapi_sdn_free(&orig_dn_newsuperiordn);

+     slapi_pblock_get(pb, SLAPI_MODRDN_NEWSUPERIOR_SDN, &pb_dn_newsuperiordn);

+     if (pb_dn_newsuperiordn != orig_dn_newsuperiordn) {

+         slapi_sdn_free(&orig_dn_newsuperiordn);

+     } else {

+         slapi_sdn_free(&dn_newsuperiordn);

+     }

      backentry_free(&original_entry);

      backentry_free(&tmpentry);

      slapi_entry_free(original_targetentry);
@@ -1561,6 +1584,9 @@ 

      /* Something bad happened so we should give back all the entries */

      if (*targetentry != NULL) {

          cache_unlock_entry(&inst->inst_cache, *targetentry);

+         if (cache_is_in_cache(&inst->inst_cache, *targetentry)) {

+             CACHE_REMOVE(&inst->inst_cache, *targetentry);

+         }

          CACHE_RETURN(&inst->inst_cache, targetentry);

          *targetentry = NULL;

      }

Bug: If there are deadlocks a transaction will be retried. In the case
of modrdn operation there is an error in handling the newsuperior
dn, which has to be reset when the txn is repeated.

Fix: check if the newsuperior in the pblock was changed before the retry and
only then free and reset it.
fix the txn_test_thread to run

Reviewed by: ?

Overall looks good.
Shouldn't be commented that is this case orig_dn_newsuperiordn did not go in the pblock (no retry) and need to be free.

In this case orig_dn_newsuperiordn did got into the pblock so dn_newsuperiordn was duplicated from orig_dn_newsuperiordn.

Except those possibles comments the fix looks good. Ack

rebased onto cd5b3fbc1e2e764a74d65e5bdb847c1f029cf26a

4 years ago

The patch looks good to me. Ack

My test with ASAN passed. Ack!

rebased onto 711b9de

4 years ago

Pull-Request has been merged by lkrispen

4 years ago

389-ds-base is moving from Pagure to Github. This means that new issues and pull requests
will be accepted only in 389-ds-base's github repository.

This pull request has been cloned to Github as issue and is available here:
- https://github.com/389ds/389-ds-base/issues/3876

If you want to continue to work on the PR, please navigate to the github issue,
download the patch from the attachments and file a new pull request.

Thank you for understanding. We apologize for all inconvenience.

Pull-Request has been closed by spichugi

3 years ago