#4443 try to speed up cursor create and close on the mainline. the read lock... (b66f4e0c) · Commits · Software / OSDI20 Artifacts / mariadb

linux/toku_pthread.h

+51 −0

Original line number	Diff line number	Diff line
		@@ -9,6 +9,7 @@
		#include <pthread.h>
		#include <time.h>
		#include <stdint.h>
		#include "toku_assert.h"

		#if defined(__cplusplus) \|\| defined(__cilkplusplus)
		extern "C" {
		@@ -119,6 +120,56 @@ toku_pthread_mutex_unlock(toku_pthread_mutex_t *mutex) {
		return pthread_mutex_unlock(mutex);
		}

		static inline void
		toku_mutex_init(toku_pthread_mutex_t mutex, const toku_pthread_mutexattr_t attr) {
		int r = pthread_mutex_init(mutex, attr);
		assert_zero(r);
		}

		static inline void
		toku_mutex_destroy(toku_pthread_mutex_t *mutex) {
		int r = pthread_mutex_destroy(mutex);
		assert_zero(r);
		}

		static inline void
		toku_mutex_lock(toku_pthread_mutex_t *mutex) {
		int r = pthread_mutex_lock(mutex);
		assert_zero(r);
		}

		static inline void
		toku_mutex_unlock(toku_pthread_mutex_t *mutex) {
		int r = pthread_mutex_unlock(mutex);
		assert_zero(r);
		}

		typedef pthread_spinlock_t toku_spinlock_t;

		static inline void
		toku_spin_init(toku_spinlock_t *lock, int pshared) {
		int r = pthread_spin_init(lock, pshared);
		assert_zero(r);
		}

		static inline void
		toku_spin_destroy(toku_spinlock_t *lock) {
		int r = pthread_spin_destroy(lock);
		assert_zero(r);
		}

		static inline void
		toku_spin_lock(toku_spinlock_t *lock) {
		int r = pthread_spin_lock(lock);
		assert_zero(r);
		}

		static inline void
		toku_spin_unlock(toku_spinlock_t *lock) {
		int r = pthread_spin_unlock(lock);
		assert_zero(r);
		}

		static inline int
		toku_pthread_cond_init(toku_pthread_cond_t cond, const toku_pthread_condattr_t attr) {
		return pthread_cond_init(cond, attr);

newbrt/brt-internal.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -413,6 +413,7 @@ struct brt {
		// The header is shared. It is also ephemeral.
		struct brt_header *h;

		toku_spinlock_t cursors_lock;
		struct toku_list cursors;

		unsigned int nodesize;

newbrt/brt.c

+22 −6

Original line number	Diff line number	Diff line
		@@ -3478,6 +3478,8 @@ static void (callback_db_set_brt)(DB db, BRT brt) = NULL;
		static void
		brt_redirect_cursors (BRT brt_to, BRT brt_from) {
		assert(brt_to->db == brt_from->db);
		toku_spin_lock(&brt_to->cursors_lock);
		toku_spin_lock(&brt_from->cursors_lock);
		while (!toku_list_empty(&brt_from->cursors)) {
		struct toku_list * c_list = toku_list_head(&brt_from->cursors);
		BRT_CURSOR c = toku_list_struct(c_list, struct brt_cursor, cursors_link);
		@@ -3488,6 +3490,8 @@ brt_redirect_cursors (BRT brt_to, BRT brt_from) {

		c->brt = brt_to;
		}
		toku_spin_unlock(&brt_to->cursors_lock);
		toku_spin_unlock(&brt_from->cursors_lock);
		}

		static void
		@@ -4095,6 +4099,7 @@ int toku_close_brt_lsn (BRT brt, char **error_string, BOOL oplsn_valid, LSN opls
		r = toku_brt_cursor_close(c);
		if (r!=0) return r;
		}
		toku_spin_destroy(&brt->cursors_lock);

		// Must do this work before closing the cf
		r=toku_txn_note_close_brt(brt);
		@@ -4133,6 +4138,10 @@ int toku_brt_create(BRT *brt_ptr) {
		brt->update_fun = NULL;
		int r = toku_omt_create(&brt->txns);
		if (r!=0) { toku_free(brt); return r; }
		pthread_mutexattr_t attr;
		r = pthread_mutexattr_init(&attr); assert_zero(r);
		r = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); assert_zero(r);
		toku_spin_init(&brt->cursors_lock, 0);
		*brt_ptr = brt;
		return 0;
		}
		@@ -4236,7 +4245,11 @@ int toku_brt_cursor (
		cursor->is_leaf_mode = FALSE;
		cursor->ttxn = ttxn;
		cursor->disable_prefetching = disable_prefetching;
		if (1) {
		toku_spin_lock(&brt->cursors_lock);
		toku_list_push(&brt->cursors, &cursor->cursors_link);
		toku_spin_unlock(&brt->cursors_lock);
		}
		*cursorptr = cursor;
		return 0;
		}
		@@ -4278,8 +4291,6 @@ toku_brt_cursor_set_range_lock(BRT_CURSOR cursor, const DBT left, const DBT ri
		}
		}

		//TODO: #1378 When we split the ydb lock, touching cursor->cursors_link
		//is not thread safe.
		int toku_brt_cursor_close(BRT_CURSOR cursor) {
		brt_cursor_cleanup_dbts(cursor);
		if (cursor->range_lock_left_key.data) {
		@@ -4290,8 +4301,12 @@ int toku_brt_cursor_close(BRT_CURSOR cursor) {
		toku_free(cursor->range_lock_right_key.data);
		toku_destroy_dbt(&cursor->range_lock_right_key);
		}
		if (1) {
		toku_spin_lock(&cursor->brt->cursors_lock);
		toku_list_remove(&cursor->cursors_link);
		toku_free_n(cursor, sizeof *cursor);
		toku_spin_unlock(&cursor->brt->cursors_lock);
		}
		toku_free(cursor);
		return 0;
		}

		@@ -5738,9 +5753,10 @@ BOOL toku_brt_cursor_uninitialized(BRT_CURSOR c) {

		int toku_brt_get_cursor_count (BRT brt) {
		int n = 0;
		struct toku_list *list;
		for (list = brt->cursors.next; list != &brt->cursors; list = list->next)
		toku_spin_lock(&brt->cursors_lock);
		for (struct toku_list *list = brt->cursors.next; list != &brt->cursors; list = list->next)
		n += 1;
		toku_spin_unlock(&brt->cursors_lock);
		return n;
		}

src/tests/perf_cursor_nop.c

+2 −22

Original line number	Diff line number	Diff line
		@@ -14,28 +14,8 @@

		#include "threaded_stress_test_helpers.h"

		//
		// This test is a form of stress that does operations on a single dictionary:
		// We create a dictionary bigger than the cachetable (around 4x greater).
		// Then, we spawn a bunch of pthreads that do the following:
		// - scan dictionary forward with bulk fetch
		// - scan dictionary forward slowly
		// - scan dictionary backward with bulk fetch
		// - scan dictionary backward slowly
		// - Grow the dictionary with insertions
		// - do random point queries into the dictionary
		// With the small cachetable, this should produce quite a bit of churn in reading in and evicting nodes.
		// If the test runs to completion without crashing, we consider it a success. It also tests that snapshots
		// work correctly by verifying that table scans sum their vals to 0.
		//
		// This does NOT test:
		// - splits and merges
		// - multiple DBs
		//
		// Variables that are interesting to tweak and run:
		// - small cachetable
		// - number of elements
		//
		// The intent of this test is to measure the throughput of cursor create and close
		// with multiple threads.

		static void
		stress_table(DB_ENV* env, DB** dbp, struct cli_args *cli_args) {

src/tests/perf_malloc_free.c

+2 −30

Original line number	Diff line number	Diff line
		@@ -15,40 +15,12 @@

		#include "threaded_stress_test_helpers.h"

		//
		// This test is a form of stress that does operations on a single dictionary:
		// We create a dictionary bigger than the cachetable (around 4x greater).
		// Then, we spawn a bunch of pthreads that do the following:
		// - scan dictionary forward with bulk fetch
		// - scan dictionary forward slowly
		// - scan dictionary backward with bulk fetch
		// - scan dictionary backward slowly
		// - Grow the dictionary with insertions
		// - do random point queries into the dictionary
		// With the small cachetable, this should produce quite a bit of churn in reading in and evicting nodes.
		// If the test runs to completion without crashing, we consider it a success. It also tests that snapshots
		// work correctly by verifying that table scans sum their vals to 0.
		//
		// This does NOT test:
		// - splits and merges
		// - multiple DBs
		//
		// Variables that are interesting to tweak and run:
		// - small cachetable
		// - number of elements
		//
		// The intent of this test is to measure the throughput of malloc and free
		// with multiple threads.

		static void
		stress_table(DB_ENV* env, DB** dbp, struct cli_args *cli_args) {
		int n = cli_args->num_elements;
		//
		// the threads that we want:
		// - some threads constantly updating random values
		// - one thread doing table scan with bulk fetch
		// - one thread doing table scan without bulk fetch
		// - some threads doing random point queries
		//

		if (verbose) printf("starting creation of pthreads\n");
		const int num_threads = cli_args->num_ptquery_threads;
		struct arg myargs[num_threads];