Commit ca9e8dc6 authored by John Esmet's avatar John Esmet Committed by Yoni Fogel
Browse files

close[t:4574] merging 4574 to main. fixes the hcad deadlock found by Tim's...

close[t:4574] merging 4574 to main. fixes the hcad deadlock found by Tim's stress test, which adds and drops indexes concurrent with queries and insertions. transactions no longer keep trollback nodes pinned after an operation, but instead always unpin them.

this merge also introduces a lot of improvements to our rollback code, in terms of clarity and consistency. to that end, variable names and function names were improved, as well as more documentation of the rollback logic in rollback.h and log-internal.h

roll.h is removed because it is a dead file.


git-svn-id: file:///svn/toku/tokudb@41576 c7de825b-a66e-492c-adef-691d508d4ae1
parent ac7abc7f
Loading
Loading
Loading
Loading
+11 −11
Original line number Diff line number Diff line
@@ -2046,7 +2046,7 @@ toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF
static void
deserialize_descriptor_from_rbuf(struct rbuf *rb, DESCRIPTOR desc, int layout_version) {
    if (layout_version == BRT_LAYOUT_VERSION_13) {
	// in older versions of TokuDB the Descriptor had a 4 byte version, which we must skip over
	// in previous versions of TokuDB the Descriptor had a 4 byte version, which we must skip over
	u_int32_t dummy_version __attribute__((__unused__)) = rbuf_int(rb);
    }
    u_int32_t size;
@@ -2501,8 +2501,8 @@ serialize_rollback_log_size(ROLLBACK_LOG_NODE log) {
    size_t size = node_header_overhead //8 "tokuroll", 4 version, 4 version_original, 4 build_id
                 +8 //TXNID
                 +8 //sequence
                 +8 //thislogname
                 +8 //older (blocknum)
                 +8 //blocknum
                 +8 //previous (blocknum)
                 +8 //resident_bytecount
                 +8 //memarena_size_needed_to_load
                 +log->rollentry_resident_bytecount;
@@ -2521,8 +2521,8 @@ serialize_rollback_log_node_to_buf(ROLLBACK_LOG_NODE log, char *buf, size_t calc
        wbuf_nocrc_uint(&wb, BUILD_ID);
        wbuf_nocrc_TXNID(&wb, log->txnid);
        wbuf_nocrc_ulonglong(&wb, log->sequence);
        wbuf_nocrc_BLOCKNUM(&wb, log->thislogname);
        wbuf_nocrc_BLOCKNUM(&wb, log->older);
        wbuf_nocrc_BLOCKNUM(&wb, log->blocknum);
        wbuf_nocrc_BLOCKNUM(&wb, log->previous);
        wbuf_nocrc_ulonglong(&wb, log->rollentry_resident_bytecount);
        //Write down memarena size needed to restore
        wbuf_nocrc_ulonglong(&wb, memarena_total_size_in_use(log->rollentry_arena));
@@ -2677,18 +2677,18 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, u_int32_t fullhash, ROLLB
    //TODO: This is hard.. everything is shared in a single dictionary.
    rbuf_TXNID(rb, &result->txnid);
    result->sequence = rbuf_ulonglong(rb);
    result->thislogname = rbuf_blocknum(rb);
    if (result->thislogname.b != blocknum.b) {
    result->blocknum = rbuf_blocknum(rb);
    if (result->blocknum.b != blocknum.b) {
        r = toku_db_badformat();
        goto died0;
    }
    result->thishash    = toku_cachetable_hash(h->cf, result->thislogname);
    if (result->thishash != fullhash) {
    result->hash    = toku_cachetable_hash(h->cf, result->blocknum);
    if (result->hash != fullhash) {
        r = toku_db_badformat();
        goto died0;
    }
    result->older       = rbuf_blocknum(rb);
    result->older_hash  = toku_cachetable_hash(h->cf, result->older);
    result->previous       = rbuf_blocknum(rb);
    result->previous_hash  = toku_cachetable_hash(h->cf, result->previous);
    result->rollentry_resident_bytecount = rbuf_ulonglong(rb);

    size_t arena_initial_size = rbuf_ulonglong(rb);
+0 −1
Original line number Diff line number Diff line
@@ -120,7 +120,6 @@ basement nodes, bulk fetch, and partial fetch:
// Access to nested transaction logic
#include "ule.h"
#include "xids.h"
#include "roll.h"
#include "sub_block.h"
#include "sort.h"
#include <brt-cachetable-wrappers.h>
+0 −17
Original line number Diff line number Diff line
@@ -3571,17 +3571,6 @@ log_open_txn (OMTVALUE txnv, u_int32_t UU(index), void *UU(extra)) {
    assert(0);
}

static int
unpin_rollback_log_for_checkpoint (OMTVALUE txnv, u_int32_t UU(index), void *UU(extra)) {
    int r = 0;
    TOKUTXN    txn    = txnv;
    if (txn->pinned_inprogress_rollback_log) {
        r = toku_rollback_log_unpin(txn, txn->pinned_inprogress_rollback_log);
        assert(r==0);
    }
    return r;
}

// TODO: #1510 locking of cachetable is suspect
//             verify correct algorithm overall

@@ -3596,12 +3585,6 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) {
    {
        brt_begin_checkpoint();
        unsigned i;
	if (logger) { // Unpin all 'inprogress rollback log nodes' pinned by transactions
            int r = toku_omt_iterate(logger->live_txns,
                                     unpin_rollback_log_for_checkpoint,
                                     NULL);
            assert(r==0);
        }
	cachetable_lock(ct);
	//Initialize accountability counters
	ct->checkpoint_num_files = 0;
+22 −2
Original line number Diff line number Diff line
@@ -145,17 +145,37 @@ struct tokutxn {
    BOOL       force_fsync_on_commit;  //This transaction NEEDS an fsync once (if) it commits.  (commit means root txn)
    TXN_PROGRESS_POLL_FUNCTION progress_poll_fun;
    void *                     progress_poll_fun_extra;

    // these are number of rollback nodes and rollback entries for this txn.
    //
    // the current rollback node below has sequence number num_rollback_nodes - 1
    // (because they are numbered 0...num-1). often, the current rollback is
    // already set to this block num, which means it exists and is available to
    // log some entries. if the current rollback is NONE and the number of
    // rollback nodes for this transaction is non-zero, then we will use
    // the number of rollback nodes to know which sequence number to assign
    // to a new one we create
    uint64_t   num_rollback_nodes;
    uint64_t   num_rollentries;
    uint64_t   num_rollentries_processed;

    // spilled rollback nodes are rollback nodes that were gorged by this
    // transaction, retired, and saved in a list.

    // the spilled rollback head is the block number of the first rollback node
    // that makes up the rollback log chain
    BLOCKNUM   spilled_rollback_head;
    uint32_t   spilled_rollback_head_hash;
    // the spilled rollback is the block number of the last rollback node that
    // makes up the rollback log chain. 
    BLOCKNUM   spilled_rollback_tail;
    uint32_t   spilled_rollback_tail_hash;
    // the current rollback node block number we may use. if this is ROLLBACK_NONE,
    // then we need to create one and set it here before using it.
    BLOCKNUM   current_rollback; 
    uint32_t   current_rollback_hash;

    BOOL       recovered_from_checkpoint;
    ROLLBACK_LOG_NODE pinned_inprogress_rollback_log;
    struct toku_list checkpoint_before_commit;
    TXN_IGNORE_S ignore_errors; // 2954
    TOKUTXN_STATE state;
+5 −4
Original line number Diff line number Diff line
@@ -580,10 +580,8 @@ generate_rollbacks (void) {

		    fprintf(hf, ");\n");
		    fprintf(cf, ") {\n");
                    fprintf(cf, "  int r;\n");
                    fprintf(cf, "  ROLLBACK_LOG_NODE log;\n");
                    fprintf(cf, "  r = toku_get_and_pin_rollback_log_for_new_entry(txn, &log);\n");
                    fprintf(cf, "  assert(r==0);\n");
                    fprintf(cf, "  toku_get_and_pin_rollback_log_for_new_entry(txn, &log);\n");
		    // 'memdup' all BYTESTRINGS here
		    DO_FIELDS(ft, lt, {
                        if ( strcmp(ft->type, "BYTESTRING") == 0 ) {
@@ -620,7 +618,10 @@ generate_rollbacks (void) {
		    fprintf(cf, "  txn->rollentry_raw_count          += rollback_fsize;\n");
                    fprintf(cf, "  txn->num_rollentries++;\n");
                    fprintf(cf, "  log->dirty = TRUE;\n");
		    fprintf(cf, "  return toku_maybe_spill_rollbacks(txn, log);\n}\n");
		    fprintf(cf, "  // spill and unpin assert success internally\n");
		    fprintf(cf, "  toku_maybe_spill_rollbacks(txn, log);\n");
		    fprintf(cf, "  toku_rollback_log_unpin(txn, log);\n");
		    fprintf(cf, "  return 0;\n}\n");
	    });

    DO_ROLLBACKS(lt, {
Loading