From 13f202fba8962b8b0bc6e3dcd777110e758169fe Mon Sep 17 00:00:00 2001 From: William Brown Date: Apr 03 2017 23:58:28 +0000 Subject: Ticket 49153 - Remove vacuum lock on transaction cleanup Bug Description: Previously we held a vacuum lock to prevent conflicts during transaction clean up. This created a bottleneck. Fix Description: Remove the vacuum lock in favour of a pacman style reference count, where each parent holds a refcount to the child, and decrements it on free. https://pagure.io/389-ds-base/issue/49153 Author: wibrown Review by: mreynolds (Thanks!) --- diff --git a/src/libsds/include/sds.h b/src/libsds/include/sds.h index 1b0dede..d8d51d1 100644 --- a/src/libsds/include/sds.h +++ b/src/libsds/include/sds.h @@ -549,7 +549,6 @@ typedef struct _sds_bptree_instance { * - read_lock * - write_lock * - tail_txn - * - vacuum_lock * * ### Read Transaction Begin * @@ -589,6 +588,9 @@ typedef struct _sds_bptree_instance { * previous transaction is the last point where they are valid and needed - The last transaction now must * clean up after itself when it's ready to GC! * + * We set our reference count to 2, to indicate that a following node (the previous active transaction) + * relies on us, and that we are also active. + * * We now take the write variant of the rw read_lock. This waits for all in progress read transactions to * begin, and we take exclusive access of this lock. We then pivot the older transaction out with our * transaction. The read_lock is released, and read transactions may now resume. @@ -607,20 +609,20 @@ typedef struct _sds_bptree_instance { * * If the reference count remains positive, we complete, and continue. * - * If the reference count drops to 0, we take the vacuum lock. If our reference count is 0 *and* - * we are the oldest generation of transaction (ie tail_txn), then we free our transaction, and all - * associated owned nodes, values and keys. We update the tail_txn to be the next oldest transaction - * in the chain and repeat the check. + * If the reference count drops to 0, we take a reference to the next transaction in the series, + * and we free our node. We then decrement the reference counter of the next transaction and check + * the result. + * + * Either, the reference count is > 0 because it is the active transaction *or* there is a current + * read transaction. * - * While the tail transaction reference count is 0, and the oldest transaction, we continue to garbage - * collect until we find a live transaction. At this point, we release the vacuum lock and are complete. + * OR, the reference count reaches 0 because there are no active transactions, so we can free this node. + * in the same operation, we then decrement the reference count to the next node in the series, + * and we repeat this til we reach a node with a positive reference count. * - * NOTE: Due to this design, if a reference count is dropped to 0 of the tail_txn, while someone else - * is vacuuming there are two states. Either, the other thread does not ees the reference count drop, and will - * release the vacuum lock, allowing us to take it and contine the garbage collection. Alternately, the other - * thread *does* see the reference count drop, and garbage collects the transaction. In this case, we are blocked - * on the vacuum lock, but because the ref count is 0, and the tail_txn pointer is now a newer generation than - * our transaction id, we do *not* attempt the double free. + * Due to the design of this system, we are guarantee no thread races, as one and only one thread + * can ever set the reference count to 0: either the closing read, or the closing previous transaction. + * This gives us complete thread safety, but without the need for a mutex to ensure this. * * ### Usage of the memory safe properties. * @@ -696,12 +698,6 @@ typedef struct _sds_bptree_cow_instance { * the duration to allow only a single writer. */ pthread_mutex_t *write_lock; - /** - * The vacuum lock. When a transaction final hits a ref count of 0, we have - * to clean from the tail. To do this we need to update the tail_txn of the - * binst atomically. - */ - pthread_mutex_t *vacuum_lock; } sds_bptree_cow_instance; /** diff --git a/src/libsds/sds/bpt_cow/bpt_cow.c b/src/libsds/sds/bpt_cow/bpt_cow.c index 41e5bf6..7120efc 100644 --- a/src/libsds/sds/bpt_cow/bpt_cow.c +++ b/src/libsds/sds/bpt_cow/bpt_cow.c @@ -39,8 +39,6 @@ sds_result sds_bptree_cow_init(sds_bptree_cow_instance **binst_ptr, uint16_t che pthread_rwlock_init((*binst_ptr)->read_lock, NULL); (*binst_ptr)->write_lock = sds_calloc(sizeof(pthread_mutex_t)); pthread_mutex_init((*binst_ptr)->write_lock, NULL); - (*binst_ptr)->vacuum_lock = sds_calloc(sizeof(pthread_mutex_t)); - pthread_mutex_init((*binst_ptr)->vacuum_lock, NULL); /* Take both to be sure of barriers etc. */ pthread_mutex_lock( (*binst_ptr)->write_lock); @@ -98,8 +96,6 @@ sds_result sds_bptree_cow_destroy(sds_bptree_cow_instance *binst) { sds_free(binst->read_lock); pthread_mutex_destroy(binst->write_lock); sds_free(binst->write_lock); - pthread_mutex_destroy(binst->vacuum_lock); - sds_free(binst->vacuum_lock); sds_free(binst->bi); sds_free(binst); diff --git a/src/libsds/sds/bpt_cow/txn.c b/src/libsds/sds/bpt_cow/txn.c index 6dafa8b..11451fe 100644 --- a/src/libsds/sds/bpt_cow/txn.c +++ b/src/libsds/sds/bpt_cow/txn.c @@ -151,7 +151,6 @@ sds_bptree_txn_decrement(sds_bptree_transaction *btxn) { #endif // If the counter is 0 && we are the tail transaction. if (result == 0) { - pthread_mutex_lock(binst->vacuum_lock); while (result == 0 && btxn != NULL && btxn == binst->tail_txn) { #ifdef DEBUG sds_log("sds_bptree_txn_decrement", " txn_%p has reached 0, and is at the tail, vacuumming!", btxn); @@ -161,15 +160,14 @@ sds_bptree_txn_decrement(sds_bptree_transaction *btxn) { // Now, we need to check to see if the next txn is ready to free also .... // I'm not sure if this is okay, as we may not have barriered properly. btxn = binst->tail_txn; - // This isn't an atomic read, but the way that txns work is that - // this will always be >= 1 if this is the alive read, but never - // 0. So we may only "miss" freeing a now zerod txn in this case, - // but we never free "too much"; + // we decrement this txn by 1 to say "there is no more parents behind this" + // as a result, 1 and ONLY ONE thread can be the one to cause this decrement, because + // * there are more parents left, so we are > 0 + // * there are still active holders left, so we are > 0 if (btxn != NULL) { - __atomic_load(&(btxn->reference_count), &result, __ATOMIC_SEQ_CST); + result = __atomic_sub_fetch(&(btxn->reference_count), 1, __ATOMIC_SEQ_CST); } } - pthread_mutex_unlock(binst->vacuum_lock); } #ifdef DEBUG if (btxn != NULL) { @@ -387,8 +385,10 @@ sds_result sds_bptree_cow_wrtxn_commit(sds_bptree_transaction **btxn) { // Take the read lock now at the last possible moment. pthread_rwlock_wrlock((*btxn)->binst->read_lock); - // Say we are alive and commited. - __atomic_add_fetch(&((*btxn)->reference_count), 1, __ATOMIC_SEQ_CST); + // Say we are alive and commited - 2 means "our former transaction owns us" + // and "we are the active root". + uint32_t default_ref_count = 2; + __atomic_store(&((*btxn)->reference_count), &default_ref_count, __ATOMIC_SEQ_CST); // Set it. (*btxn)->binst->txn = *btxn; // Update our parent to reference us.