Loading ndb/include/kernel/signaldata/DumpStateOrd.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -78,6 +78,8 @@ public: LqhDumpAllScanRec = 2301, LqhDumpAllActiveScanRec = 2302, LqhDumpLcpState = 2303, LqhErrorInsert5042 = 2315, AccDumpOneScanRec = 2400, AccDumpAllScanRec = 2401, AccDumpAllActiveScanRec = 2402, Loading ndb/include/ndb_version.h.in +3 −0 Original line number Diff line number Diff line Loading @@ -57,5 +57,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ]; */ /*#define NDB_VERSION_ID 0*/ #define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17) #define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18) #endif ndb/src/kernel/blocks/ERROR_codes.txt +9 −0 Original line number Diff line number Diff line Loading @@ -61,6 +61,8 @@ Insert system error in GCP participant when receiving GCP_SAVEREQ. 5007: Delay GCP_SAVEREQ by 10 secs 7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- Loading Loading @@ -155,11 +157,15 @@ Insert node failure handling when receiving COMPLETEREQ. 5006: Insert node failure handling when receiving ABORTREQ. 5042: As 5002, but with specified table (see DumpStateOrd) These error code can be combined with error codes for testing time-out handling in DBTC to ensure that node failures are also well handled in time-out handling. They can also be used to test multiple node failure handling. ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBLQH ------------------------------------------------- 5011: Loading Loading @@ -196,6 +202,9 @@ Delay execution of ABORTREQ signal 2 seconds to generate time-out. 8048: Make TC not choose own node for simple/dirty read 5041: Crash is receiving simple read from other TC on different node 5100,5101: Drop ABORT req in primary replica Crash on "next" ABORT ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC ------------------------------------------------- 8040: Loading ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +47 −5 Original line number Diff line number Diff line Loading @@ -215,7 +215,7 @@ void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId) signal->theData[2] = c_nodeStartMaster.failNr; signal->theData[3] = 0; signal->theData[4] = currentgcp; sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB); sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA); }//Dbdih::sendINCL_NODEREQ() void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId) Loading Loading @@ -1857,6 +1857,14 @@ void Dbdih::gcpBlockedLab(Signal* signal) // global checkpoint id and the correct state. We do not wait for any reply // since the starting node will not send any. /*-------------------------------------------------------------------------*/ Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version; if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) || (getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5)) { c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode); } sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode); }//Dbdih::gcpBlockedLab() Loading Loading @@ -2059,6 +2067,13 @@ void Dbdih::execINCL_NODEREQ(Signal* signal) jamEntry(); Uint32 retRef = signal->theData[0]; Uint32 nodeId = signal->theData[1]; if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165)) { CLEAR_ERROR_INSERT_VALUE; sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength()); return; } Uint32 tnodeStartFailNr = signal->theData[2]; currentgcp = signal->theData[4]; CRASH_INSERTION(7127); Loading Loading @@ -2086,6 +2101,15 @@ void Dbdih::execINCL_NODEREQ(Signal* signal) // id's and the lcp status. /*-----------------------------------------------------------------------*/ CRASH_INSERTION(7171); Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version; if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) || (NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5)) { signal->theData[0] = getOwnNodeId(); signal->theData[1] = getOwnNodeId(); sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB); } return; }//if if (getNodeStatus(nodeId) != NodeRecord::STARTING) { Loading Loading @@ -3737,8 +3761,16 @@ void Dbdih::execNODE_FAILREP(Signal* signal) /*------------------------------------------------------------------------*/ // Verify that a starting node has also crashed. Reset the node start record. /*-------------------------------------------------------------------------*/ if (c_nodeStartMaster.startNode != RNIL) { ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE); if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE) { BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode); SystemError * const sysErr = (SystemError*)&signal->theData[0]; sysErr->errorCode = SystemError::StartInProgressError; sysErr->errorRef = reference(); sysErr->data1= 0; sysErr->data2= __LINE__; sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA); nodeResetStart(); }//if /*--------------------------------------------------*/ Loading Loading @@ -5187,15 +5219,16 @@ void Dbdih::removeNodeFromTable(Signal* signal, /** * For each of replica record */ Uint32 replicaNo = 0; bool found = false; ReplicaRecordPtr replicaPtr; for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) { replicaPtr.i = replicaPtr.p->nextReplica) { jam(); ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); if(replicaPtr.p->procNode == nodeId){ jam(); found = true; noOfRemovedReplicas++; removeNodeFromStored(nodeId, fragPtr, replicaPtr); if(replicaPtr.p->lcpOngoingFlag){ Loading @@ -5211,6 +5244,15 @@ void Dbdih::removeNodeFromTable(Signal* signal, } } } if (!found) { jam(); /** * Run updateNodeInfo to remove any dead nodes from list of activeNodes * see bug#15587 */ updateNodeInfo(fragPtr); } noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas; } Loading ndb/src/kernel/blocks/dblqh/Dblqh.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -2881,6 +2881,7 @@ private: UintR ctransidHash[1024]; Uint32 c_diskless; Uint32 c_error_insert_table_id; public: /** Loading Loading
ndb/include/kernel/signaldata/DumpStateOrd.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -78,6 +78,8 @@ public: LqhDumpAllScanRec = 2301, LqhDumpAllActiveScanRec = 2302, LqhDumpLcpState = 2303, LqhErrorInsert5042 = 2315, AccDumpOneScanRec = 2400, AccDumpAllScanRec = 2401, AccDumpAllActiveScanRec = 2402, Loading
ndb/include/ndb_version.h.in +3 −0 Original line number Diff line number Diff line Loading @@ -57,5 +57,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ]; */ /*#define NDB_VERSION_ID 0*/ #define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17) #define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18) #endif
ndb/src/kernel/blocks/ERROR_codes.txt +9 −0 Original line number Diff line number Diff line Loading @@ -61,6 +61,8 @@ Insert system error in GCP participant when receiving GCP_SAVEREQ. 5007: Delay GCP_SAVEREQ by 10 secs 7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- Loading Loading @@ -155,11 +157,15 @@ Insert node failure handling when receiving COMPLETEREQ. 5006: Insert node failure handling when receiving ABORTREQ. 5042: As 5002, but with specified table (see DumpStateOrd) These error code can be combined with error codes for testing time-out handling in DBTC to ensure that node failures are also well handled in time-out handling. They can also be used to test multiple node failure handling. ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBLQH ------------------------------------------------- 5011: Loading Loading @@ -196,6 +202,9 @@ Delay execution of ABORTREQ signal 2 seconds to generate time-out. 8048: Make TC not choose own node for simple/dirty read 5041: Crash is receiving simple read from other TC on different node 5100,5101: Drop ABORT req in primary replica Crash on "next" ABORT ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC ------------------------------------------------- 8040: Loading
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +47 −5 Original line number Diff line number Diff line Loading @@ -215,7 +215,7 @@ void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId) signal->theData[2] = c_nodeStartMaster.failNr; signal->theData[3] = 0; signal->theData[4] = currentgcp; sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB); sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA); }//Dbdih::sendINCL_NODEREQ() void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId) Loading Loading @@ -1857,6 +1857,14 @@ void Dbdih::gcpBlockedLab(Signal* signal) // global checkpoint id and the correct state. We do not wait for any reply // since the starting node will not send any. /*-------------------------------------------------------------------------*/ Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version; if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) || (getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5)) { c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode); } sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode); }//Dbdih::gcpBlockedLab() Loading Loading @@ -2059,6 +2067,13 @@ void Dbdih::execINCL_NODEREQ(Signal* signal) jamEntry(); Uint32 retRef = signal->theData[0]; Uint32 nodeId = signal->theData[1]; if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165)) { CLEAR_ERROR_INSERT_VALUE; sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength()); return; } Uint32 tnodeStartFailNr = signal->theData[2]; currentgcp = signal->theData[4]; CRASH_INSERTION(7127); Loading Loading @@ -2086,6 +2101,15 @@ void Dbdih::execINCL_NODEREQ(Signal* signal) // id's and the lcp status. /*-----------------------------------------------------------------------*/ CRASH_INSERTION(7171); Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version; if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) || (NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5)) { signal->theData[0] = getOwnNodeId(); signal->theData[1] = getOwnNodeId(); sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB); } return; }//if if (getNodeStatus(nodeId) != NodeRecord::STARTING) { Loading Loading @@ -3737,8 +3761,16 @@ void Dbdih::execNODE_FAILREP(Signal* signal) /*------------------------------------------------------------------------*/ // Verify that a starting node has also crashed. Reset the node start record. /*-------------------------------------------------------------------------*/ if (c_nodeStartMaster.startNode != RNIL) { ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE); if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE) { BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode); SystemError * const sysErr = (SystemError*)&signal->theData[0]; sysErr->errorCode = SystemError::StartInProgressError; sysErr->errorRef = reference(); sysErr->data1= 0; sysErr->data2= __LINE__; sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA); nodeResetStart(); }//if /*--------------------------------------------------*/ Loading Loading @@ -5187,15 +5219,16 @@ void Dbdih::removeNodeFromTable(Signal* signal, /** * For each of replica record */ Uint32 replicaNo = 0; bool found = false; ReplicaRecordPtr replicaPtr; for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) { replicaPtr.i = replicaPtr.p->nextReplica) { jam(); ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); if(replicaPtr.p->procNode == nodeId){ jam(); found = true; noOfRemovedReplicas++; removeNodeFromStored(nodeId, fragPtr, replicaPtr); if(replicaPtr.p->lcpOngoingFlag){ Loading @@ -5211,6 +5244,15 @@ void Dbdih::removeNodeFromTable(Signal* signal, } } } if (!found) { jam(); /** * Run updateNodeInfo to remove any dead nodes from list of activeNodes * see bug#15587 */ updateNodeInfo(fragPtr); } noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas; } Loading
ndb/src/kernel/blocks/dblqh/Dblqh.hpp +1 −0 Original line number Diff line number Diff line Loading @@ -2881,6 +2881,7 @@ private: UintR ctransidHash[1024]; Uint32 c_diskless; Uint32 c_error_insert_table_id; public: /** Loading