Loading storage/ndb/src/kernel/blocks/ERROR_codes.txt +3 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4024 Next DBLQH 5045 Next DBDICT 6007 Next DBDIH 7178 Next DBDIH 7181 Next DBTC 8039 Next CMVMI 9000 Next BACKUP 10038 Loading Loading @@ -73,6 +73,8 @@ Delay GCP_SAVEREQ by 10 secs 7177: Delay copying of sysfileData in execCOPY_GCIREQ 7180: Crash master during master-take-over in execMASTER_LCPCONF ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- Loading storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +26 −1 Original line number Diff line number Diff line Loading @@ -4892,6 +4892,8 @@ void Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ jam(); Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId; c_lcpMasterTakeOverState.minTableId = ~0; c_lcpMasterTakeOverState.minFragId = ~0; c_lcpMasterTakeOverState.failedNodeId = nodeId; Loading @@ -4910,7 +4912,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ /** * Node failure during master take over... */ g_eventLogger.info("Nodefail during master take over"); g_eventLogger.info("Nodefail during master take over (old: %d)", oldNode); } NodeRecordPtr nodePtr; nodePtr.i = oldNode; if (oldNode > 0 && oldNode < MAX_NDB_NODES) { jam(); ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER)) { jam(); checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER); } } setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER); Loading Loading @@ -5927,6 +5942,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal) jamEntry(); const BlockReference newMasterBlockref = req->masterRef; if (newMasterBlockref != cmasterdihref) { jam(); ndbout_c("resending GSN_MASTER_LCPREQ"); sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal, signal->getLength(), 50); return; } Uint32 failedNodeId = req->failedNodeId; /** Loading Loading @@ -6223,6 +6246,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); nodePtr.p->lcpStateAtTakeOver = lcpState; CRASH_INSERTION(7180); #ifdef VM_TRACE g_eventLogger.info("MASTER_LCPCONF"); printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0); Loading storage/ndb/test/include/NdbRestarter.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -61,6 +61,8 @@ public: int dumpStateAllNodes(const int * _args, int _num_args); int getMasterNodeId(); int getNextMasterNodeId(int nodeId); int getNodeGroup(int nodeId); int getRandomNodeSameNodeGroup(int nodeId, int randomNumber); int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber); int getRandomNotMasterNodeId(int randomNumber); Loading storage/ndb/test/ndbapi/testNodeRestart.cpp +42 −0 Original line number Diff line number Diff line Loading @@ -1273,6 +1273,45 @@ int runBug25984(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } int runBug26457(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 4) return NDBT_OK; int loops = ctx->getNumLoops(); while (loops --) { retry: int master = res.getMasterNodeId(); int next = res.getNextMasterNodeId(master); ndbout_c("master: %d next: %d", master, next); if (res.getNodeGroup(master) == res.getNodeGroup(next)) { res.restartOneDbNode(next, false, false, true); if (res.waitClusterStarted()) return NDBT_FAILED; goto retry; } int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 }; if (res.dumpStateOneNode(next, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(next, 7180)) return NDBT_FAILED; res.restartOneDbNode(master, false, false, true); if (res.waitClusterStarted()) return NDBT_FAILED; } return NDBT_OK; } NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", Loading Loading @@ -1612,6 +1651,9 @@ TESTCASE("Bug25554", ""){ TESTCASE("Bug25984", ""){ INITIALIZER(runBug25984); } TESTCASE("Bug26457", ""){ INITIALIZER(runBug26457); } NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ Loading storage/ndb/test/run-test/daily-basic-tests.txt +4 −0 Original line number Diff line number Diff line Loading @@ -529,6 +529,10 @@ max-time: 3000 cmd: testNodeRestart args: -n Bug25984 max-time: 1000 cmd: testNodeRestart args: -n Bug26457 T1 # # DICT TESTS max-time: 1500 Loading Loading
storage/ndb/src/kernel/blocks/ERROR_codes.txt +3 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4024 Next DBLQH 5045 Next DBDICT 6007 Next DBDIH 7178 Next DBDIH 7181 Next DBTC 8039 Next CMVMI 9000 Next BACKUP 10038 Loading Loading @@ -73,6 +73,8 @@ Delay GCP_SAVEREQ by 10 secs 7177: Delay copying of sysfileData in execCOPY_GCIREQ 7180: Crash master during master-take-over in execMASTER_LCPCONF ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- Loading
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +26 −1 Original line number Diff line number Diff line Loading @@ -4892,6 +4892,8 @@ void Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ jam(); Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId; c_lcpMasterTakeOverState.minTableId = ~0; c_lcpMasterTakeOverState.minFragId = ~0; c_lcpMasterTakeOverState.failedNodeId = nodeId; Loading @@ -4910,7 +4912,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ /** * Node failure during master take over... */ g_eventLogger.info("Nodefail during master take over"); g_eventLogger.info("Nodefail during master take over (old: %d)", oldNode); } NodeRecordPtr nodePtr; nodePtr.i = oldNode; if (oldNode > 0 && oldNode < MAX_NDB_NODES) { jam(); ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER)) { jam(); checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER); } } setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER); Loading Loading @@ -5927,6 +5942,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal) jamEntry(); const BlockReference newMasterBlockref = req->masterRef; if (newMasterBlockref != cmasterdihref) { jam(); ndbout_c("resending GSN_MASTER_LCPREQ"); sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal, signal->getLength(), 50); return; } Uint32 failedNodeId = req->failedNodeId; /** Loading Loading @@ -6223,6 +6246,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); nodePtr.p->lcpStateAtTakeOver = lcpState; CRASH_INSERTION(7180); #ifdef VM_TRACE g_eventLogger.info("MASTER_LCPCONF"); printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0); Loading
storage/ndb/test/include/NdbRestarter.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -61,6 +61,8 @@ public: int dumpStateAllNodes(const int * _args, int _num_args); int getMasterNodeId(); int getNextMasterNodeId(int nodeId); int getNodeGroup(int nodeId); int getRandomNodeSameNodeGroup(int nodeId, int randomNumber); int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber); int getRandomNotMasterNodeId(int randomNumber); Loading
storage/ndb/test/ndbapi/testNodeRestart.cpp +42 −0 Original line number Diff line number Diff line Loading @@ -1273,6 +1273,45 @@ int runBug25984(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } int runBug26457(NDBT_Context* ctx, NDBT_Step* step) { NdbRestarter res; if (res.getNumDbNodes() < 4) return NDBT_OK; int loops = ctx->getNumLoops(); while (loops --) { retry: int master = res.getMasterNodeId(); int next = res.getNextMasterNodeId(master); ndbout_c("master: %d next: %d", master, next); if (res.getNodeGroup(master) == res.getNodeGroup(next)) { res.restartOneDbNode(next, false, false, true); if (res.waitClusterStarted()) return NDBT_FAILED; goto retry; } int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 }; if (res.dumpStateOneNode(next, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(next, 7180)) return NDBT_FAILED; res.restartOneDbNode(master, false, false, true); if (res.waitClusterStarted()) return NDBT_FAILED; } return NDBT_OK; } NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", Loading Loading @@ -1612,6 +1651,9 @@ TESTCASE("Bug25554", ""){ TESTCASE("Bug25984", ""){ INITIALIZER(runBug25984); } TESTCASE("Bug26457", ""){ INITIALIZER(runBug26457); } NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ Loading
storage/ndb/test/run-test/daily-basic-tests.txt +4 −0 Original line number Diff line number Diff line Loading @@ -529,6 +529,10 @@ max-time: 3000 cmd: testNodeRestart args: -n Bug25984 max-time: 1000 cmd: testNodeRestart args: -n Bug26457 T1 # # DICT TESTS max-time: 1500 Loading