Loading storage/ndb/src/kernel/blocks/ERROR_codes.txt +6 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4029 Next DBLQH 5047 Next DBDICT 6008 Next DBDIH 7193 Next DBDIH 7195 Next DBTC 8054 Next CMVMI 9000 Next BACKUP 10038 Loading Loading @@ -81,6 +81,11 @@ Delay GCP_SAVEREQ by 10 secs 7185: Dont reply to COPY_GCI_REQ where reason == GCP 7193: Dont send LCP_FRAG_ORD to self, and crash when sending first LCP_FRAG_ORD(last) 7194: Force removeNodeFromStored to complete in the middle of MASTER_LCPCONF ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- Loading storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +10 −0 Original line number Diff line number Diff line Loading @@ -1310,7 +1310,17 @@ private: LcpStatus lcpStatus; Uint32 lcpStatusUpdatedPlace; struct Save { LcpStatus m_status; Uint32 m_place; } m_saveState[10]; void setLcpStatus(LcpStatus status, Uint32 line){ for (Uint32 i = 9; i > 0; i--) m_saveState[i] = m_saveState[i-1]; m_saveState[0].m_status = lcpStatus; m_saveState[0].m_place = lcpStatusUpdatedPlace; lcpStatus = status; lcpStatusUpdatedPlace = line; } Loading storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +54 −6 Original line number Diff line number Diff line Loading @@ -5181,10 +5181,18 @@ void Dbdih::startRemoveFailedNode(Signal* signal, NodeRecordPtr failedNodePtr) } jam(); if (!ERROR_INSERTED(7194)) { signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE; signal->theData[1] = failedNodePtr.i; signal->theData[2] = 0; // Tab id sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB); } else { ndbout_c("7194 Not starting ZREMOVE_NODE_FROM_TABLE"); } setLocalNodefailHandling(signal, failedNodePtr.i, NF_REMOVE_NODE_FROM_TABLE); }//Dbdih::startRemoveFailedNode() Loading Loading @@ -6115,11 +6123,21 @@ Dbdih::checkEmptyLcpComplete(Signal *signal){ signal->theData[0] = 7012; execDUMP_STATE_ORD(signal); if (ERROR_INSERTED(7194)) { ndbout_c("7194 starting ZREMOVE_NODE_FROM_TABLE"); signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE; signal->theData[1] = c_lcpMasterTakeOverState.failedNodeId; signal->theData[2] = 0; // Tab id sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB); } c_lcpMasterTakeOverState.set(LMTOS_INITIAL, __LINE__); MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0]; req->masterRef = reference(); req->failedNodeId = c_lcpMasterTakeOverState.failedNodeId; sendLoopMacro(MASTER_LCPREQ, sendMASTER_LCPREQ); } else { sendMASTER_LCPCONF(signal); } Loading Loading @@ -6432,6 +6450,15 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) { const MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0]; jamEntry(); if (ERROR_INSERTED(7194)) { ndbout_c("delaying MASTER_LCPCONF due to error 7194"); sendSignalWithDelay(reference(), GSN_MASTER_LCPCONF, signal, 300, signal->getLength()); return; } Uint32 senderNodeId = conf->senderNodeId; MasterLCPConf::State lcpState = (MasterLCPConf::State)conf->lcpState; const Uint32 failedNodeId = conf->failedNodeId; Loading Loading @@ -6566,7 +6593,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) #endif c_lcpState.keepGci = SYSFILE->keepGCI; c_lcpState.setLcpStatus(LCP_START_LCP_ROUND, __LINE__); startLcpRoundLoopLab(signal, 0, 0); break; } Loading Loading @@ -10538,6 +10564,8 @@ void Dbdih::sendLastLCP_FRAG_ORD(Signal* signal) if(ERROR_INSERTED(7075)){ continue; } CRASH_INSERTION(7193); BlockReference ref = calcLqhBlockRef(nodePtr.i); sendSignal(ref, GSN_LCP_FRAG_ORD, signal,LcpFragOrd::SignalLength, JBB); } Loading Loading @@ -10765,6 +10793,13 @@ Dbdih::checkLcpAllTablesDoneInLqh(){ CRASH_INSERTION2(7017, !isMaster()); c_lcpState.setLcpStatus(LCP_TAB_COMPLETED, __LINE__); if (ERROR_INSERTED(7194)) { ndbout_c("CLEARING 7194"); CLEAR_ERROR_INSERT_VALUE; } return true; } Loading Loading @@ -10954,6 +10989,11 @@ Dbdih::sendLCP_FRAG_ORD(Signal* signal, BlockReference ref = calcLqhBlockRef(replicaPtr.p->procNode); if (ERROR_INSERTED(7193) && replicaPtr.p->procNode == getOwnNodeId()) { return; } LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0]; lcpFragOrd->tableId = info.tableId; lcpFragOrd->fragmentId = info.fragId; Loading Loading @@ -14500,6 +14540,14 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) ("immediateLcpStart = %d masterLcpNodeId = %d", c_lcpState.immediateLcpStart, refToNode(c_lcpState.m_masterLcpDihRef)); for (Uint32 i = 0; i<10; i++) { infoEvent("%u : status: %u place: %u", i, c_lcpState.m_saveState[i].m_status, c_lcpState.m_saveState[i].m_place); } infoEvent("-- Node %d LCP STATE --", getOwnNodeId()); } Loading storage/ndb/test/ndbapi/testNodeRestart.cpp +48 −0 Original line number Diff line number Diff line Loading @@ -1836,6 +1836,51 @@ runBug31525(NDBT_Context* ctx, NDBT_Step* step) return NDBT_OK; } int runBug32160(NDBT_Context* ctx, NDBT_Step* step) { int result = NDBT_OK; int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } int master = res.getMasterNodeId(); int next = res.getNextMasterNodeId(master); if (res.insertErrorInNode(next, 7194)) { return NDBT_FAILED; } int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(master, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(master, 7193)) return NDBT_FAILED; int val3[] = { 7099 }; if (res.dumpStateOneNode(master, val3, 1)) return NDBT_FAILED; if (res.waitNodesNoStart(&master, 1)) return NDBT_FAILED; if (res.startNodes(&master, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ Loading Loading @@ -2205,6 +2250,9 @@ TESTCASE("Bug28717", ""){ TESTCASE("Bug29364", ""){ INITIALIZER(runBug29364); } TESTCASE("Bug32160", ""){ INITIALIZER(runBug32160); } NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ Loading storage/ndb/test/run-test/daily-basic-tests.txt +4 −0 Original line number Diff line number Diff line Loading @@ -581,6 +581,10 @@ max-time: 1000 cmd: testNodeRestart args: -n Bug29364 T1 max-time: 300 cmd: testNodeRestart args: -n Bug32160 T1 # # DICT TESTS max-time: 500 Loading Loading
storage/ndb/src/kernel/blocks/ERROR_codes.txt +6 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4029 Next DBLQH 5047 Next DBDICT 6008 Next DBDIH 7193 Next DBDIH 7195 Next DBTC 8054 Next CMVMI 9000 Next BACKUP 10038 Loading Loading @@ -81,6 +81,11 @@ Delay GCP_SAVEREQ by 10 secs 7185: Dont reply to COPY_GCI_REQ where reason == GCP 7193: Dont send LCP_FRAG_ORD to self, and crash when sending first LCP_FRAG_ORD(last) 7194: Force removeNodeFromStored to complete in the middle of MASTER_LCPCONF ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- Loading
storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +10 −0 Original line number Diff line number Diff line Loading @@ -1310,7 +1310,17 @@ private: LcpStatus lcpStatus; Uint32 lcpStatusUpdatedPlace; struct Save { LcpStatus m_status; Uint32 m_place; } m_saveState[10]; void setLcpStatus(LcpStatus status, Uint32 line){ for (Uint32 i = 9; i > 0; i--) m_saveState[i] = m_saveState[i-1]; m_saveState[0].m_status = lcpStatus; m_saveState[0].m_place = lcpStatusUpdatedPlace; lcpStatus = status; lcpStatusUpdatedPlace = line; } Loading
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +54 −6 Original line number Diff line number Diff line Loading @@ -5181,10 +5181,18 @@ void Dbdih::startRemoveFailedNode(Signal* signal, NodeRecordPtr failedNodePtr) } jam(); if (!ERROR_INSERTED(7194)) { signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE; signal->theData[1] = failedNodePtr.i; signal->theData[2] = 0; // Tab id sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB); } else { ndbout_c("7194 Not starting ZREMOVE_NODE_FROM_TABLE"); } setLocalNodefailHandling(signal, failedNodePtr.i, NF_REMOVE_NODE_FROM_TABLE); }//Dbdih::startRemoveFailedNode() Loading Loading @@ -6115,11 +6123,21 @@ Dbdih::checkEmptyLcpComplete(Signal *signal){ signal->theData[0] = 7012; execDUMP_STATE_ORD(signal); if (ERROR_INSERTED(7194)) { ndbout_c("7194 starting ZREMOVE_NODE_FROM_TABLE"); signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE; signal->theData[1] = c_lcpMasterTakeOverState.failedNodeId; signal->theData[2] = 0; // Tab id sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB); } c_lcpMasterTakeOverState.set(LMTOS_INITIAL, __LINE__); MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0]; req->masterRef = reference(); req->failedNodeId = c_lcpMasterTakeOverState.failedNodeId; sendLoopMacro(MASTER_LCPREQ, sendMASTER_LCPREQ); } else { sendMASTER_LCPCONF(signal); } Loading Loading @@ -6432,6 +6450,15 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) { const MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0]; jamEntry(); if (ERROR_INSERTED(7194)) { ndbout_c("delaying MASTER_LCPCONF due to error 7194"); sendSignalWithDelay(reference(), GSN_MASTER_LCPCONF, signal, 300, signal->getLength()); return; } Uint32 senderNodeId = conf->senderNodeId; MasterLCPConf::State lcpState = (MasterLCPConf::State)conf->lcpState; const Uint32 failedNodeId = conf->failedNodeId; Loading Loading @@ -6566,7 +6593,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) #endif c_lcpState.keepGci = SYSFILE->keepGCI; c_lcpState.setLcpStatus(LCP_START_LCP_ROUND, __LINE__); startLcpRoundLoopLab(signal, 0, 0); break; } Loading Loading @@ -10538,6 +10564,8 @@ void Dbdih::sendLastLCP_FRAG_ORD(Signal* signal) if(ERROR_INSERTED(7075)){ continue; } CRASH_INSERTION(7193); BlockReference ref = calcLqhBlockRef(nodePtr.i); sendSignal(ref, GSN_LCP_FRAG_ORD, signal,LcpFragOrd::SignalLength, JBB); } Loading Loading @@ -10765,6 +10793,13 @@ Dbdih::checkLcpAllTablesDoneInLqh(){ CRASH_INSERTION2(7017, !isMaster()); c_lcpState.setLcpStatus(LCP_TAB_COMPLETED, __LINE__); if (ERROR_INSERTED(7194)) { ndbout_c("CLEARING 7194"); CLEAR_ERROR_INSERT_VALUE; } return true; } Loading Loading @@ -10954,6 +10989,11 @@ Dbdih::sendLCP_FRAG_ORD(Signal* signal, BlockReference ref = calcLqhBlockRef(replicaPtr.p->procNode); if (ERROR_INSERTED(7193) && replicaPtr.p->procNode == getOwnNodeId()) { return; } LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0]; lcpFragOrd->tableId = info.tableId; lcpFragOrd->fragmentId = info.fragId; Loading Loading @@ -14500,6 +14540,14 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) ("immediateLcpStart = %d masterLcpNodeId = %d", c_lcpState.immediateLcpStart, refToNode(c_lcpState.m_masterLcpDihRef)); for (Uint32 i = 0; i<10; i++) { infoEvent("%u : status: %u place: %u", i, c_lcpState.m_saveState[i].m_status, c_lcpState.m_saveState[i].m_place); } infoEvent("-- Node %d LCP STATE --", getOwnNodeId()); } Loading
storage/ndb/test/ndbapi/testNodeRestart.cpp +48 −0 Original line number Diff line number Diff line Loading @@ -1836,6 +1836,51 @@ runBug31525(NDBT_Context* ctx, NDBT_Step* step) return NDBT_OK; } int runBug32160(NDBT_Context* ctx, NDBT_Step* step) { int result = NDBT_OK; int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } int master = res.getMasterNodeId(); int next = res.getNextMasterNodeId(master); if (res.insertErrorInNode(next, 7194)) { return NDBT_FAILED; } int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateOneNode(master, val2, 2)) return NDBT_FAILED; if (res.insertErrorInNode(master, 7193)) return NDBT_FAILED; int val3[] = { 7099 }; if (res.dumpStateOneNode(master, val3, 1)) return NDBT_FAILED; if (res.waitNodesNoStart(&master, 1)) return NDBT_FAILED; if (res.startNodes(&master, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ Loading Loading @@ -2205,6 +2250,9 @@ TESTCASE("Bug28717", ""){ TESTCASE("Bug29364", ""){ INITIALIZER(runBug29364); } TESTCASE("Bug32160", ""){ INITIALIZER(runBug32160); } NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ Loading
storage/ndb/test/run-test/daily-basic-tests.txt +4 −0 Original line number Diff line number Diff line Loading @@ -581,6 +581,10 @@ max-time: 1000 cmd: testNodeRestart args: -n Bug29364 T1 max-time: 300 cmd: testNodeRestart args: -n Bug32160 T1 # # DICT TESTS max-time: 500 Loading