Loading storage/ndb/src/kernel/blocks/ERROR_codes.txt +4 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4029 Next DBLQH 5045 Next DBDICT 6007 Next DBDIH 7186 Next DBDIH 7193 Next DBTC 8054 Next CMVMI 9000 Next BACKUP 10038 Loading Loading @@ -155,6 +155,9 @@ And crash when all have "not" been sent 7027: Crash in master when changing state to LCP_TAB_SAVED 7018: Crash in master when changing state to LCP_TAB_SAVED 7191: Crash when receiving LCP_COMPLETE_REP 7192: Crash in setLcpActiveStatusStart - when dead node missed to LCP's ERROR CODES FOR TESTING NODE FAILURE, FAILURE IN COPY FRAGMENT PROCESS: ----------------------------------------------------------------------- Loading storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +4 −0 Original line number Diff line number Diff line Loading @@ -10853,6 +10853,8 @@ void Dbdih::execLCP_COMPLETE_REP(Signal* signal) { jamEntry(); CRASH_INSERTION(7191); #if 0 g_eventLogger.info("LCP_COMPLETE_REP"); printLCP_COMPLETE_REP(stdout, Loading Loading @@ -13603,6 +13605,7 @@ void Dbdih::setLcpActiveStatusStart(Signal* signal) // It must be taken over with the copy fragment process after a system // crash. We indicate this by setting the active status to TAKE_OVER. /*-------------------------------------------------------------------*/ c_lcpState.m_participatingLQH.set(nodePtr.i); nodePtr.p->activeStatus = Sysfile::NS_TakeOver; //break; // Fall through case Sysfile::NS_TakeOver:{ Loading Loading @@ -13645,6 +13648,7 @@ void Dbdih::setLcpActiveStatusStart(Signal* signal) break; case Sysfile::NS_ActiveMissed_2: jam(); CRASH_INSERTION(7192); if ((nodePtr.p->nodeStatus == NodeRecord::ALIVE) && (!nodePtr.p->copyCompleted)) { jam(); Loading storage/ndb/test/ndbapi/testNodeRestart.cpp +77 −0 Original line number Diff line number Diff line Loading @@ -1668,6 +1668,80 @@ runBug28717(NDBT_Context* ctx, NDBT_Step* step) return NDBT_OK; } int runBug31525(NDBT_Context* ctx, NDBT_Step* step) { int result = NDBT_OK; int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } int nodes[2]; nodes[0] = res.getMasterNodeId(); nodes[1] = res.getNextMasterNodeId(nodes[0]); while (res.getNodeGroup(nodes[0]) != res.getNodeGroup(nodes[1])) { ndbout_c("Restarting %u as it not in same node group as %u", nodes[1], nodes[0]); if (res.restartOneDbNode(nodes[1], false, true, true)) return NDBT_FAILED; if (res.waitNodesNoStart(nodes+1, 1)) return NDBT_FAILED; if (res.startNodes(nodes+1, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; nodes[1] = res.getNextMasterNodeId(nodes[0]); } ndbout_c("nodes[0]: %u nodes[1]: %u", nodes[0], nodes[1]); int val = DumpStateOrd::DihMinTimeBetweenLCP; if (res.dumpStateAllNodes(&val, 1)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateAllNodes(val2, 2)) return NDBT_FAILED; if (res.insertErrorInAllNodes(932)) return NDBT_FAILED; if (res.insertErrorInNode(nodes[1], 7192)) return NDBT_FAILED; if (res.insertErrorInNode(nodes[0], 7191)) return NDBT_FAILED; if (res.waitClusterNoStart()) return NDBT_FAILED; if (res.startAll()) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; if (res.restartOneDbNode(nodes[1], false, false, true)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ Loading Loading @@ -1991,6 +2065,9 @@ TESTCASE("Bug21271", STEP(runPkUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("Bug31525", ""){ INITIALIZER(runBug31525); } TESTCASE("Bug24717", ""){ INITIALIZER(runBug24717); } Loading storage/ndb/test/run-test/daily-basic-tests.txt +4 −0 Original line number Diff line number Diff line Loading @@ -934,3 +934,7 @@ max-time: 1500 cmd: testSystemRestart args: -n SR_DD_2b_LCP D2 max-time: 600 cmd: testNodeRestart args: -n Bug31525 T1 Loading
storage/ndb/src/kernel/blocks/ERROR_codes.txt +4 −1 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4029 Next DBLQH 5045 Next DBDICT 6007 Next DBDIH 7186 Next DBDIH 7193 Next DBTC 8054 Next CMVMI 9000 Next BACKUP 10038 Loading Loading @@ -155,6 +155,9 @@ And crash when all have "not" been sent 7027: Crash in master when changing state to LCP_TAB_SAVED 7018: Crash in master when changing state to LCP_TAB_SAVED 7191: Crash when receiving LCP_COMPLETE_REP 7192: Crash in setLcpActiveStatusStart - when dead node missed to LCP's ERROR CODES FOR TESTING NODE FAILURE, FAILURE IN COPY FRAGMENT PROCESS: ----------------------------------------------------------------------- Loading
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +4 −0 Original line number Diff line number Diff line Loading @@ -10853,6 +10853,8 @@ void Dbdih::execLCP_COMPLETE_REP(Signal* signal) { jamEntry(); CRASH_INSERTION(7191); #if 0 g_eventLogger.info("LCP_COMPLETE_REP"); printLCP_COMPLETE_REP(stdout, Loading Loading @@ -13603,6 +13605,7 @@ void Dbdih::setLcpActiveStatusStart(Signal* signal) // It must be taken over with the copy fragment process after a system // crash. We indicate this by setting the active status to TAKE_OVER. /*-------------------------------------------------------------------*/ c_lcpState.m_participatingLQH.set(nodePtr.i); nodePtr.p->activeStatus = Sysfile::NS_TakeOver; //break; // Fall through case Sysfile::NS_TakeOver:{ Loading Loading @@ -13645,6 +13648,7 @@ void Dbdih::setLcpActiveStatusStart(Signal* signal) break; case Sysfile::NS_ActiveMissed_2: jam(); CRASH_INSERTION(7192); if ((nodePtr.p->nodeStatus == NodeRecord::ALIVE) && (!nodePtr.p->copyCompleted)) { jam(); Loading
storage/ndb/test/ndbapi/testNodeRestart.cpp +77 −0 Original line number Diff line number Diff line Loading @@ -1668,6 +1668,80 @@ runBug28717(NDBT_Context* ctx, NDBT_Step* step) return NDBT_OK; } int runBug31525(NDBT_Context* ctx, NDBT_Step* step) { int result = NDBT_OK; int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); Ndb* pNdb = GETNDB(step); NdbRestarter res; if (res.getNumDbNodes() < 2) { return NDBT_OK; } int nodes[2]; nodes[0] = res.getMasterNodeId(); nodes[1] = res.getNextMasterNodeId(nodes[0]); while (res.getNodeGroup(nodes[0]) != res.getNodeGroup(nodes[1])) { ndbout_c("Restarting %u as it not in same node group as %u", nodes[1], nodes[0]); if (res.restartOneDbNode(nodes[1], false, true, true)) return NDBT_FAILED; if (res.waitNodesNoStart(nodes+1, 1)) return NDBT_FAILED; if (res.startNodes(nodes+1, 1)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; nodes[1] = res.getNextMasterNodeId(nodes[0]); } ndbout_c("nodes[0]: %u nodes[1]: %u", nodes[0], nodes[1]); int val = DumpStateOrd::DihMinTimeBetweenLCP; if (res.dumpStateAllNodes(&val, 1)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (res.dumpStateAllNodes(val2, 2)) return NDBT_FAILED; if (res.insertErrorInAllNodes(932)) return NDBT_FAILED; if (res.insertErrorInNode(nodes[1], 7192)) return NDBT_FAILED; if (res.insertErrorInNode(nodes[0], 7191)) return NDBT_FAILED; if (res.waitClusterNoStart()) return NDBT_FAILED; if (res.startAll()) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; if (res.restartOneDbNode(nodes[1], false, false, true)) return NDBT_FAILED; if (res.waitClusterStarted()) return NDBT_FAILED; return NDBT_OK; } NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ Loading Loading @@ -1991,6 +2065,9 @@ TESTCASE("Bug21271", STEP(runPkUpdateUntilStopped); FINALIZER(runClearTable); } TESTCASE("Bug31525", ""){ INITIALIZER(runBug31525); } TESTCASE("Bug24717", ""){ INITIALIZER(runBug24717); } Loading
storage/ndb/test/run-test/daily-basic-tests.txt +4 −0 Original line number Diff line number Diff line Loading @@ -934,3 +934,7 @@ max-time: 1500 cmd: testSystemRestart args: -n SR_DD_2b_LCP D2 max-time: 600 cmd: testNodeRestart args: -n Bug31525 T1