Loading ndb/src/kernel/blocks/ERROR_codes.txt +2 −1 Original line number Diff line number Diff line Next QMGR 1 Next NDBCNTR 1000 Next NDBCNTR 1002 Next NDBFS 2000 Next DBACC 3002 Next DBTUP 4014 Loading Loading @@ -501,3 +501,4 @@ TUP: NDBCNTR: 1000: Crash insertion on SystemError::CopyFragRef 1001: Delay sending NODE_FAILREP (to own node), until error is cleared ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +7 −1 Original line number Diff line number Diff line Loading @@ -4459,12 +4459,18 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr) jam(); const Uint32 nodeId = failedNodePtr.i; if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){ if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i)) { /*----------------------------------------------------*/ /* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */ /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */ /* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */ /*----------------------------------------------------*/ /** * Bug#28717, Only master should do this, as this status is copied * to other nodes */ switch (failedNodePtr.p->activeStatus) { case Sysfile::NS_Active: jam(); Loading ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +7 −0 Original line number Diff line number Diff line Loading @@ -1375,6 +1375,13 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) { jamEntry(); if (ERROR_INSERTED(1001)) { sendSignalWithDelay(reference(), GSN_NODE_FAILREP, signal, 100, signal->getLength()); return; } const NodeFailRep * nodeFail = (NodeFailRep *)&signal->theData[0]; NdbNodeBitmask allFailed; allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes); Loading ndb/test/ndbapi/testNodeRestart.cpp +81 −0 Original line number Diff line number Diff line Loading @@ -1219,6 +1219,84 @@ runBug27283(NDBT_Context* ctx, NDBT_Step* step) return NDBT_OK; } int runBug28717(NDBT_Context* ctx, NDBT_Step* step) { int result = NDBT_OK; int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 4) { return NDBT_OK; } int master = res.getMasterNodeId(); int node0 = res.getRandomNodeOtherNodeGroup(master, rand()); int node1 = res.getRandomNodeSameNodeGroup(node0, rand()); ndbout_c("master: %d node0: %d node1: %d", master, node0, node1); if (res.restartOneDbNode(node0, false, true, true)) { return NDBT_FAILED; } { int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 }; NdbLogEventHandle handle = ndb_mgm_create_logevent_handle(res.handle, filter); int dump[] = { DumpStateOrd::DihStartLcpImmediately }; struct ndb_logevent event; for (Uint32 i = 0; i<3; i++) { res.dumpStateOneNode(master, dump, 1); while(ndb_logevent_get_next(handle, &event, 0) >= 0 && event.type != NDB_LE_LocalCheckpointStarted); while(ndb_logevent_get_next(handle, &event, 0) >= 0 && event.type != NDB_LE_LocalCheckpointCompleted); } } if (res.waitNodesNoStart(&node0, 1)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(node0, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(node0, 5010)) return NDBT_FAILED; if (res.insertErrorInNode(node1, 1001)) return NDBT_FAILED; if (res.startNodes(&node0, 1)) return NDBT_FAILED; NdbSleep_SecSleep(3); if (res.insertErrorInNode(node1, 0)) return NDBT_FAILED; if (res.waitNodesNoStart(&node0, 1)) return NDBT_FAILED; if (res.startNodes(&node0, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ Loading Loading @@ -1552,6 +1630,9 @@ TESTCASE("Bug27003", ""){ TESTCASE("Bug27283", ""){ INITIALIZER(runBug27283); } TESTCASE("Bug28717", ""){ INITIALIZER(runBug28717); } NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ Loading ndb/test/run-test/daily-basic-tests.txt +4 −0 Original line number Diff line number Diff line Loading @@ -508,6 +508,10 @@ max-time: 1500 cmd: testDict args: -n CreateAndDrop max-time: 1000 cmd: testNodeRestart args: -n Bug28717 T1 max-time: 1500 cmd: testDict args: -n CreateAndDropAtRandom -l 200 T1 Loading Loading
ndb/src/kernel/blocks/ERROR_codes.txt +2 −1 Original line number Diff line number Diff line Next QMGR 1 Next NDBCNTR 1000 Next NDBCNTR 1002 Next NDBFS 2000 Next DBACC 3002 Next DBTUP 4014 Loading Loading @@ -501,3 +501,4 @@ TUP: NDBCNTR: 1000: Crash insertion on SystemError::CopyFragRef 1001: Delay sending NODE_FAILREP (to own node), until error is cleared
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +7 −1 Original line number Diff line number Diff line Loading @@ -4459,12 +4459,18 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr) jam(); const Uint32 nodeId = failedNodePtr.i; if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){ if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i)) { /*----------------------------------------------------*/ /* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */ /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */ /* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */ /*----------------------------------------------------*/ /** * Bug#28717, Only master should do this, as this status is copied * to other nodes */ switch (failedNodePtr.p->activeStatus) { case Sysfile::NS_Active: jam(); Loading
ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +7 −0 Original line number Diff line number Diff line Loading @@ -1375,6 +1375,13 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) { jamEntry(); if (ERROR_INSERTED(1001)) { sendSignalWithDelay(reference(), GSN_NODE_FAILREP, signal, 100, signal->getLength()); return; } const NodeFailRep * nodeFail = (NodeFailRep *)&signal->theData[0]; NdbNodeBitmask allFailed; allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes); Loading
ndb/test/ndbapi/testNodeRestart.cpp +81 −0 Original line number Diff line number Diff line Loading @@ -1219,6 +1219,84 @@ runBug27283(NDBT_Context* ctx, NDBT_Step* step) return NDBT_OK; } int runBug28717(NDBT_Context* ctx, NDBT_Step* step) { int result = NDBT_OK; int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 4) { return NDBT_OK; } int master = res.getMasterNodeId(); int node0 = res.getRandomNodeOtherNodeGroup(master, rand()); int node1 = res.getRandomNodeSameNodeGroup(node0, rand()); ndbout_c("master: %d node0: %d node1: %d", master, node0, node1); if (res.restartOneDbNode(node0, false, true, true)) { return NDBT_FAILED; } { int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 }; NdbLogEventHandle handle = ndb_mgm_create_logevent_handle(res.handle, filter); int dump[] = { DumpStateOrd::DihStartLcpImmediately }; struct ndb_logevent event; for (Uint32 i = 0; i<3; i++) { res.dumpStateOneNode(master, dump, 1); while(ndb_logevent_get_next(handle, &event, 0) >= 0 && event.type != NDB_LE_LocalCheckpointStarted); while(ndb_logevent_get_next(handle, &event, 0) >= 0 && event.type != NDB_LE_LocalCheckpointCompleted); } } if (res.waitNodesNoStart(&node0, 1)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(node0, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(node0, 5010)) return NDBT_FAILED; if (res.insertErrorInNode(node1, 1001)) return NDBT_FAILED; if (res.startNodes(&node0, 1)) return NDBT_FAILED; NdbSleep_SecSleep(3); if (res.insertErrorInNode(node1, 0)) return NDBT_FAILED; if (res.waitNodesNoStart(&node0, 1)) return NDBT_FAILED; if (res.startNodes(&node0, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ Loading Loading @@ -1552,6 +1630,9 @@ TESTCASE("Bug27003", ""){ TESTCASE("Bug27283", ""){ INITIALIZER(runBug27283); } TESTCASE("Bug28717", ""){ INITIALIZER(runBug28717); } NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ Loading
ndb/test/run-test/daily-basic-tests.txt +4 −0 Original line number Diff line number Diff line Loading @@ -508,6 +508,10 @@ max-time: 1500 cmd: testDict args: -n CreateAndDrop max-time: 1000 cmd: testNodeRestart args: -n Bug28717 T1 max-time: 1500 cmd: testDict args: -n CreateAndDropAtRandom -l 200 T1 Loading