Loading ndb/include/kernel/signaldata/DumpStateOrd.hpp +4 −0 Original line number Diff line number Diff line Loading @@ -107,6 +107,10 @@ public: CmvmiDumpLongSignalMemory = 2601, CmvmiSetRestartOnErrorInsert = 2602, CmvmiTestLongSigWithDelay = 2603, CmvmiDumpSubscriptions = 2604, /* note: done to respective outfile to be able to debug if events for some reason does not end up in clusterlog */ // 7000 DIH // 7001 DIH // 7002 DIH Loading ndb/src/common/debugger/EventLogger.cpp +95 −4 Original line number Diff line number Diff line Loading @@ -16,6 +16,7 @@ #include <ndb_global.h> #include "EventLogger.hpp" #include <TransporterCallback.hpp> #include <NdbConfig.h> #include <kernel/BlockNumbers.h> Loading Loading @@ -528,8 +529,98 @@ void getTextUndoLogBlocked(QQQQ) { theData[2]); } void getTextTransporterError(QQQQ) { struct myTransporterError{ int errorNum; char errorString[256]; }; int i = 0; int lenth = 0; static const struct myTransporterError TransporterErrorString[]= { //TE_NO_ERROR = 0 {TE_NO_ERROR,"No error"}, //TE_ERROR_CLOSING_SOCKET = 0x1 {TE_ERROR_CLOSING_SOCKET,"Error found during closing of socket"}, //TE_ERROR_IN_SELECT_BEFORE_ACCEPT = 0x2 {TE_ERROR_IN_SELECT_BEFORE_ACCEPT,"Error found before accept. The transporter will retry"}, //TE_INVALID_MESSAGE_LENGTH = 0x3 | TE_DO_DISCONNECT {TE_INVALID_MESSAGE_LENGTH,"Error found in message (invalid message length)"}, //TE_INVALID_CHECKSUM = 0x4 | TE_DO_DISCONNECT {TE_INVALID_CHECKSUM,"Error found in message (checksum)"}, //TE_COULD_NOT_CREATE_SOCKET = 0x5 {TE_COULD_NOT_CREATE_SOCKET,"Error found while creating socket(can't create socket)"}, //TE_COULD_NOT_BIND_SOCKET = 0x6 {TE_COULD_NOT_BIND_SOCKET,"Error found while binding server socket"}, //TE_LISTEN_FAILED = 0x7 {TE_LISTEN_FAILED,"Error found while listening to server socket"}, //TE_ACCEPT_RETURN_ERROR = 0x8 {TE_ACCEPT_RETURN_ERROR,"Error found during accept(accept return error)"}, //TE_SHM_DISCONNECT = 0xb | TE_DO_DISCONNECT {TE_SHM_DISCONNECT,"The remote node has disconnected"}, //TE_SHM_IPC_STAT = 0xc | TE_DO_DISCONNECT {TE_SHM_IPC_STAT,"Unable to check shm segment"}, //TE_SHM_UNABLE_TO_CREATE_SEGMENT = 0xd {TE_SHM_UNABLE_TO_CREATE_SEGMENT,"Unable to create shm segment"}, //TE_SHM_UNABLE_TO_ATTACH_SEGMENT = 0xe {TE_SHM_UNABLE_TO_ATTACH_SEGMENT,"Unable to attach shm segment"}, //TE_SHM_UNABLE_TO_REMOVE_SEGMENT = 0xf {TE_SHM_UNABLE_TO_REMOVE_SEGMENT,"Unable to remove shm segment"}, //TE_TOO_SMALL_SIGID = 0x10 {TE_TOO_SMALL_SIGID,"Sig ID too small"}, //TE_TOO_LARGE_SIGID = 0x11 {TE_TOO_LARGE_SIGID,"Sig ID too large"}, //TE_WAIT_STACK_FULL = 0x12 | TE_DO_DISCONNECT {TE_WAIT_STACK_FULL,"Wait stack was full"}, //TE_RECEIVE_BUFFER_FULL = 0x13 | TE_DO_DISCONNECT {TE_RECEIVE_BUFFER_FULL,"Receive buffer was full"}, //TE_SIGNAL_LOST_SEND_BUFFER_FULL = 0x14 | TE_DO_DISCONNECT {TE_SIGNAL_LOST_SEND_BUFFER_FULL,"Send buffer was full,and trying to force send fails"}, //TE_SIGNAL_LOST = 0x15 {TE_SIGNAL_LOST,"Send failed for unknown reason(signal lost)"}, //TE_SEND_BUFFER_FULL = 0x16 {TE_SEND_BUFFER_FULL,"The send buffer was full, but sleeping for a while solved"}, //TE_SCI_LINK_ERROR = 0x0017 {TE_SCI_LINK_ERROR,"There is no link from this node to the switch"}, //TE_SCI_UNABLE_TO_START_SEQUENCE = 0x18 | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_START_SEQUENCE,"Could not start a sequence, because system resources are exumed or no sequence has been created"}, //TE_SCI_UNABLE_TO_REMOVE_SEQUENCE = 0x19 | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_REMOVE_SEQUENCE,"Could not remove a sequence"}, //TE_SCI_UNABLE_TO_CREATE_SEQUENCE = 0x1a | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_CREATE_SEQUENCE,"Could not create a sequence, because system resources are exempted. Must reboot"}, //TE_SCI_UNRECOVERABLE_DATA_TFX_ERROR = 0x1b | TE_DO_DISCONNECT {TE_SCI_UNRECOVERABLE_DATA_TFX_ERROR,"Tried to send data on redundant link but failed"}, //TE_SCI_CANNOT_INIT_LOCALSEGMENT = 0x1c | TE_DO_DISCONNECT {TE_SCI_CANNOT_INIT_LOCALSEGMENT,"Cannot initialize local segment"}, //TE_SCI_CANNOT_MAP_REMOTESEGMENT = 0x1d | TE_DO_DISCONNEC {TE_SCI_CANNOT_MAP_REMOTESEGMENT,"Cannot map remote segment"}, //TE_SCI_UNABLE_TO_UNMAP_SEGMENT = 0x1e | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_UNMAP_SEGMENT,"Cannot free the resources used by this segment (step 1)"}, //TE_SCI_UNABLE_TO_REMOVE_SEGMENT = 0x1f | TE_DO_DISCONNEC {TE_SCI_UNABLE_TO_REMOVE_SEGMENT,"Cannot free the resources used by this segment (step 2)"}, //TE_SCI_UNABLE_TO_DISCONNECT_SEGMENT = 0x20 | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_DISCONNECT_SEGMENT,"Cannot disconnect from a remote segment"}, //TE_SHM_IPC_PERMANENT = 0x21 {TE_SHM_IPC_PERMANENT,"Shm ipc Permanent error"}, //TE_SCI_UNABLE_TO_CLOSE_CHANNEL = 0x22 {TE_SCI_UNABLE_TO_CLOSE_CHANNEL,"Unable to close the sci channel and the resources allocated"} }; lenth = sizeof(TransporterErrorString)/sizeof(struct myTransporterError); for(i=0; i<lenth; i++) { if(theData[2] == TransporterErrorString[i].errorNum) { BaseString::snprintf(m_text, m_text_len, "Transporter to node %d reported error 0x%x: %s", theData[1], theData[2], TransporterErrorString[i].errorString); break; } } if(i == lenth) BaseString::snprintf(m_text, m_text_len, "Transporter to node %d reported error 0x%x", "Transporter to node %d reported error 0x%x: unknown error", theData[1], theData[2]); } Loading ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +19 −1 Original line number Diff line number Diff line Loading @@ -897,7 +897,7 @@ void Cmvmi::execSET_VAR_REQ(Signal* signal) case TimeToWaitAlive: // QMGR case HeartbeatIntervalDbDb: // TODO ev till Ndbcnt ocks case HeartbeatIntervalDbDb: // TODO possibly Ndbcnt too case HeartbeatIntervalDbApi: case ArbitTimeout: sendSignal(QMGR_REF, GSN_SET_VAR_REQ, signal, 3, JBB); Loading Loading @@ -1105,6 +1105,24 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } } if (arg == DumpStateOrd::CmvmiDumpSubscriptions) { SubscriberPtr ptr; subscribers.first(ptr); g_eventLogger.info("List subscriptions:"); while(ptr.i != RNIL) { g_eventLogger.info("Subscription: %u, nodeId: %u, ref: 0x%x", ptr.i, refToNode(ptr.p->blockRef), ptr.p->blockRef); for(Uint32 i = 0; i < LogLevel::LOGLEVEL_CATEGORIES; i++) { Uint32 level = ptr.p->logLevel.getLogLevel((LogLevel::EventCategory)i); g_eventLogger.info("Category %u Level %u", i, level); } subscribers.next(ptr); } } if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){ infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", g_sectionSegmentPool.getSize(), Loading ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +66 −64 Original line number Diff line number Diff line Loading @@ -1786,8 +1786,8 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) return; }//if if (getNodeStatus(nodeId) != NodeRecord::DEAD){ ndbout << "nodeStatus in START_PERMREQ = " << (Uint32) getNodeStatus(nodeId) << endl; g_eventLogger.error("nodeStatus in START_PERMREQ = %u", (Uint32) getNodeStatus(nodeId)); ndbrequire(false); }//if Loading Loading @@ -4029,7 +4029,7 @@ void Dbdih::checkCopyTab(NodeRecordPtr failedNodePtr) jam(); break; default: ndbout_c("outstanding gsn: %s(%d)", g_eventLogger.error("outstanding gsn: %s(%d)", getSignalName(c_nodeStartMaster.m_outstandingGsn), c_nodeStartMaster.m_outstandingGsn); ndbrequire(false); Loading Loading @@ -4472,9 +4472,10 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr) failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver; break; default: ndbout << "activeStatus = " << (Uint32) failedNodePtr.p->activeStatus; ndbout << " at failure after NODE_FAILREP of node = "; ndbout << failedNodePtr.i << endl; g_eventLogger.error("activeStatus = %u " "at failure after NODE_FAILREP of node = %u", (Uint32) failedNodePtr.p->activeStatus, failedNodePtr.i); ndbrequire(false); break; }//switch Loading Loading @@ -4629,7 +4630,7 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ /** * Node failure during master take over... */ ndbout_c("Nodefail during master take over"); g_eventLogger.info("Nodefail during master take over"); } setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER); Loading Loading @@ -4869,7 +4870,8 @@ void Dbdih::execMASTER_GCPCONF(Signal* signal) if (latestLcpId > SYSFILE->latestLCP_ID) { jam(); #if 0 ndbout_c("Dbdih: Setting SYSFILE->latestLCP_ID to %d", latestLcpId); g_eventLogger.info("Dbdih: Setting SYSFILE->latestLCP_ID to %d", latestLcpId); SYSFILE->latestLCP_ID = latestLcpId; #endif SYSFILE->keepGCI = oldestKeepGci; Loading Loading @@ -5528,7 +5530,7 @@ Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId, if (ERROR_INSERTED(7030)) { ndbout_c("Reenable GCP_PREPARE"); g_eventLogger.info("Reenable GCP_PREPARE"); CLEAR_ERROR_INSERT_VALUE; } Loading Loading @@ -5701,7 +5703,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){ c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__); #if 0 if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){ ndbout_c("Dbdih: Also resetting c_copyGCISlave"); g_eventLogger.info("Dbdih: Also resetting c_copyGCISlave"); c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE; c_copyGCISlave.m_expectedNextWord = 0; } Loading Loading @@ -5790,7 +5792,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){ if(c_lcpState.lcpStatus == LCP_TAB_SAVED){ #ifdef VM_TRACE ndbout_c("Sending extra GSN_LCP_COMPLETE_REP to new master"); g_eventLogger.info("Sending extra GSN_LCP_COMPLETE_REP to new master"); #endif sendLCP_COMPLETE_REP(signal); } Loading Loading @@ -5946,7 +5948,7 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) nodePtr.p->lcpStateAtTakeOver = lcpState; #ifdef VM_TRACE ndbout_c("MASTER_LCPCONF"); g_eventLogger.info("MASTER_LCPCONF"); printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0); #endif Loading Loading @@ -6023,7 +6025,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) // protocol. /* --------------------------------------------------------------------- */ #ifdef VM_TRACE ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart"); g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart"); #endif checkLcpStart(signal, __LINE__); break; Loading @@ -6034,7 +6036,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) // protocol by calculating the keep gci and storing the new lcp id. /* --------------------------------------------------------------------- */ #ifdef VM_TRACE ndbout_c("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId"); g_eventLogger.info("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId"); #endif if (c_lcpState.lcpStatus == LCP_STATUS_ACTIVE) { jam(); Loading @@ -6045,7 +6047,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) /*---------------------------------------------------------------------*/ Uint32 lcpId = SYSFILE->latestLCP_ID; #ifdef VM_TRACE ndbout_c("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1); g_eventLogger.info("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1); #endif SYSFILE->latestLCP_ID--; }//if Loading @@ -6062,7 +6064,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) * complete before finalising the LCP process. * ------------------------------------------------------------------ */ #ifdef VM_TRACE ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> " g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> " "startLcpRoundLoopLab(table=%u, fragment=%u)", c_lcpMasterTakeOverState.minTableId, c_lcpMasterTakeOverState.minFragId); Loading Loading @@ -7376,8 +7378,8 @@ void Dbdih::checkGcpStopLab(Signal* signal) if (cgcpSameCounter == 1200) { jam(); #ifdef VM_TRACE ndbout << "System crash due to GCP Stop in state = "; ndbout << (Uint32) cgcpStatus << endl; g_eventLogger.error("System crash due to GCP Stop in state = %u", (Uint32) cgcpStatus); #endif crashSystemAtGcpStop(signal); return; Loading @@ -7390,8 +7392,8 @@ void Dbdih::checkGcpStopLab(Signal* signal) if (cgcpSameCounter == 1200) { jam(); #ifdef VM_TRACE ndbout << "System crash due to GCP Stop in state = "; ndbout << (Uint32) cgcpStatus << endl; g_eventLogger.error("System crash due to GCP Stop in state = %u", (Uint32) cgcpStatus); #endif crashSystemAtGcpStop(signal); return; Loading Loading @@ -7582,7 +7584,7 @@ void Dbdih::GCP_SAVEhandling(Signal* signal, Uint32 nodeId) getNodeState().startLevel == NodeState::SL_STARTED){ jam(); #if 0 ndbout_c("Dbdih: Clearing initial start ongoing"); g_eventLogger.info("Dbdih: Clearing initial start ongoing"); #endif Sysfile::clearInitialStartOngoing(SYSFILE->systemRestartBits); } Loading @@ -7601,7 +7603,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal) if (ERROR_INSERTED(7030)) { cgckptflag = true; ndbout_c("Delayed GCP_PREPARE 5s"); g_eventLogger.info("Delayed GCP_PREPARE 5s"); sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000, signal->getLength()); return; Loading @@ -7621,7 +7623,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal) if (ERROR_INSERTED(7031)) { ndbout_c("Crashing delayed in GCP_PREPARE 3s"); g_eventLogger.info("Crashing delayed in GCP_PREPARE 3s"); signal->theData[0] = 9999; sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1); return; Loading Loading @@ -8136,7 +8138,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId) * This is LCP master takeover */ #ifdef VM_TRACE ndbout_c("initLcpLab aborted due to LCP master takeover - 1"); g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 1"); #endif c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__); sendMASTER_LCPCONF(signal); Loading @@ -8149,7 +8151,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId) * Master take over but has not yet received MASTER_LCPREQ */ #ifdef VM_TRACE ndbout_c("initLcpLab aborted due to LCP master takeover - 2"); g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 2"); #endif return; } Loading Loading @@ -9380,9 +9382,10 @@ void Dbdih::checkTcCounterLab(Signal* signal) { CRASH_INSERTION(7009); if (c_lcpState.lcpStatus != LCP_STATUS_IDLE) { ndbout << "lcpStatus = " << (Uint32) c_lcpState.lcpStatus; ndbout << "lcpStatusUpdatedPlace = " << c_lcpState.lcpStatusUpdatedPlace << endl; g_eventLogger.error("lcpStatus = %u" "lcpStatusUpdatedPlace = %d", (Uint32) c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace); ndbrequire(false); return; }//if Loading Loading @@ -9935,9 +9938,8 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal) if(tabPtr.p->tabStatus == TabRecord::TS_DROPPING){ jam(); ndbout_c("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ", tableId, fragId); g_eventLogger.info("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ", tableId, fragId); } else { jam(); /** Loading Loading @@ -10065,7 +10067,7 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, }; #ifdef VM_TRACE ndbout_c("Fragment Replica(node=%d) not found", nodeId); g_eventLogger.info("Fragment Replica(node=%d) not found", nodeId); replicaPtr.i = fragPtrP->oldStoredReplicas; while(replicaPtr.i != RNIL){ ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); Loading @@ -10078,9 +10080,9 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, }//if }; if(replicaPtr.i != RNIL){ ndbout_c("...But was found in oldStoredReplicas"); g_eventLogger.info("...But was found in oldStoredReplicas"); } else { ndbout_c("...And wasn't found in oldStoredReplicas"); g_eventLogger.info("...And wasn't found in oldStoredReplicas"); } #endif ndbrequire(false); Loading Loading @@ -10114,7 +10116,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport) ndbrequire(replicaPtr.p->lcpOngoingFlag == true); if(lcpNo != replicaPtr.p->nextLcp){ ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d", g_eventLogger.error("lcpNo = %d replicaPtr.p->nextLcp = %d", lcpNo, replicaPtr.p->nextLcp); ndbrequire(false); } Loading Loading @@ -10150,7 +10152,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport) // Not all fragments in table have been checkpointed. /* ----------------------------------------------------------------- */ if(0) ndbout_c("reportLcpCompletion: fragment %d not ready", fid); g_eventLogger.info("reportLcpCompletion: fragment %d not ready", fid); return false; }//if }//for Loading Loading @@ -10267,7 +10269,7 @@ void Dbdih::execLCP_COMPLETE_REP(Signal* signal) jamEntry(); #if 0 ndbout_c("LCP_COMPLETE_REP"); g_eventLogger.info("LCP_COMPLETE_REP"); printLCP_COMPLETE_REP(stdout, signal->getDataPtr(), signal->length(), number()); Loading Loading @@ -10353,7 +10355,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal) if(c_lcpMasterTakeOverState.state != LMTOS_IDLE){ jam(); #ifdef VM_TRACE ndbout_c("Exiting from allNodesLcpCompletedLab"); g_eventLogger.info("Exiting from allNodesLcpCompletedLab"); #endif return; } Loading Loading @@ -10582,12 +10584,12 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) infoEvent("Detected GCP stop...sending kill to %s", c_GCP_SAVEREQ_Counter.getText()); ndbout_c("Detected GCP stop...sending kill to %s", g_eventLogger.error("Detected GCP stop...sending kill to %s", c_GCP_SAVEREQ_Counter.getText()); return; } case GCP_SAVE_LQH_FINISHED: ndbout_c("m_copyReason: %d m_waiting: %d", g_eventLogger.error("m_copyReason: %d m_waiting: %d", c_copyGCIMaster.m_copyReason, c_copyGCIMaster.m_waiting); break; Loading @@ -10597,7 +10599,7 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) break; } ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", g_eventLogger.error("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", c_copyGCISlave.m_senderData, c_copyGCISlave.m_senderRef, c_copyGCISlave.m_copyReason, Loading Loading @@ -12804,9 +12806,9 @@ void Dbdih::setLcpActiveStatusEnd() nodePtr.i = getOwnNodeId(); ptrAss(nodePtr, nodeRecord); ndbrequire(nodePtr.p->activeStatus == Sysfile::NS_Active); ndbout_c("NR: setLcpActiveStatusEnd - m_participatingLQH"); g_eventLogger.info("NR: setLcpActiveStatusEnd - m_participatingLQH"); } else { ndbout_c("NR: setLcpActiveStatusEnd - !m_participatingLQH"); g_eventLogger.info("NR: setLcpActiveStatusEnd - !m_participatingLQH"); } } Loading Loading @@ -13637,8 +13639,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) } if(arg == DumpStateOrd::EnableUndoDelayDataWrite){ ndbout << "Dbdih:: delay write of datapages for table = " << dumpState->args[1]<< endl; g_eventLogger.info("Dbdih:: delay write of datapages for table = %s", dumpState->args[1]); // Send this dump to ACC and TUP EXECUTE_DIRECT(DBACC, GSN_DUMP_STATE_ORD, signal, 2); EXECUTE_DIRECT(DBTUP, GSN_DUMP_STATE_ORD, signal, 2); Loading @@ -13655,13 +13657,13 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) }//if if (signal->theData[0] == DumpStateOrd::DihMinTimeBetweenLCP) { // Set time between LCP to min value ndbout << "Set time between LCP to min value" << endl; g_eventLogger.info("Set time between LCP to min value"); c_lcpState.clcpDelay = 0; // TimeBetweenLocalCheckpoints.min return; } if (signal->theData[0] == DumpStateOrd::DihMaxTimeBetweenLCP) { // Set time between LCP to max value ndbout << "Set time between LCP to max value" << endl; g_eventLogger.info("Set time between LCP to max value"); c_lcpState.clcpDelay = 31; // TimeBetweenLocalCheckpoints.max return; } Loading Loading @@ -13697,7 +13699,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) { cgcpDelay = signal->theData[1]; } ndbout_c("Setting time between gcp : %d", cgcpDelay); g_eventLogger.info("Setting time between gcp : %d", cgcpDelay); } if (arg == 7021 && signal->getLength() == 2) Loading Loading @@ -13820,7 +13822,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ while(index < count){ if(nodePtr.p->queuedChkpt[index].tableId == tabPtr.i){ jam(); // ndbout_c("Unqueuing %d", index); // g_eventLogger.info("Unqueuing %d", index); count--; for(Uint32 i = index; i<count; i++){ Loading Loading @@ -13860,7 +13862,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ if(checkLcpAllTablesDoneInLqh()){ jam(); ndbout_c("This is the last table"); g_eventLogger.info("This is the last table"); /** * Then check if saving of tab info is done for all tables Loading @@ -13869,7 +13871,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ checkLcpCompletedLab(signal); if(a != c_lcpState.lcpStatus){ ndbout_c("And all tables are written to already written disk"); g_eventLogger.info("And all tables are written to already written disk"); } } break; Loading ndb/src/kernel/vm/WatchDog.cpp +5 −2 Original line number Diff line number Diff line Loading @@ -22,6 +22,9 @@ #include <NdbOut.hpp> #include <NdbSleep.h> #include <ErrorHandlingMacros.hpp> #include <EventLogger.hpp> extern EventLogger g_eventLogger; extern "C" void* Loading Loading @@ -125,7 +128,7 @@ WatchDog::run(){ last_stuck_action = "Unknown place"; break; }//switch ndbout << "Ndb kernel is stuck in: " << last_stuck_action << endl; g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action); if(alerts == 3){ shutdownSystem(last_stuck_action); } Loading Loading
ndb/include/kernel/signaldata/DumpStateOrd.hpp +4 −0 Original line number Diff line number Diff line Loading @@ -107,6 +107,10 @@ public: CmvmiDumpLongSignalMemory = 2601, CmvmiSetRestartOnErrorInsert = 2602, CmvmiTestLongSigWithDelay = 2603, CmvmiDumpSubscriptions = 2604, /* note: done to respective outfile to be able to debug if events for some reason does not end up in clusterlog */ // 7000 DIH // 7001 DIH // 7002 DIH Loading
ndb/src/common/debugger/EventLogger.cpp +95 −4 Original line number Diff line number Diff line Loading @@ -16,6 +16,7 @@ #include <ndb_global.h> #include "EventLogger.hpp" #include <TransporterCallback.hpp> #include <NdbConfig.h> #include <kernel/BlockNumbers.h> Loading Loading @@ -528,8 +529,98 @@ void getTextUndoLogBlocked(QQQQ) { theData[2]); } void getTextTransporterError(QQQQ) { struct myTransporterError{ int errorNum; char errorString[256]; }; int i = 0; int lenth = 0; static const struct myTransporterError TransporterErrorString[]= { //TE_NO_ERROR = 0 {TE_NO_ERROR,"No error"}, //TE_ERROR_CLOSING_SOCKET = 0x1 {TE_ERROR_CLOSING_SOCKET,"Error found during closing of socket"}, //TE_ERROR_IN_SELECT_BEFORE_ACCEPT = 0x2 {TE_ERROR_IN_SELECT_BEFORE_ACCEPT,"Error found before accept. The transporter will retry"}, //TE_INVALID_MESSAGE_LENGTH = 0x3 | TE_DO_DISCONNECT {TE_INVALID_MESSAGE_LENGTH,"Error found in message (invalid message length)"}, //TE_INVALID_CHECKSUM = 0x4 | TE_DO_DISCONNECT {TE_INVALID_CHECKSUM,"Error found in message (checksum)"}, //TE_COULD_NOT_CREATE_SOCKET = 0x5 {TE_COULD_NOT_CREATE_SOCKET,"Error found while creating socket(can't create socket)"}, //TE_COULD_NOT_BIND_SOCKET = 0x6 {TE_COULD_NOT_BIND_SOCKET,"Error found while binding server socket"}, //TE_LISTEN_FAILED = 0x7 {TE_LISTEN_FAILED,"Error found while listening to server socket"}, //TE_ACCEPT_RETURN_ERROR = 0x8 {TE_ACCEPT_RETURN_ERROR,"Error found during accept(accept return error)"}, //TE_SHM_DISCONNECT = 0xb | TE_DO_DISCONNECT {TE_SHM_DISCONNECT,"The remote node has disconnected"}, //TE_SHM_IPC_STAT = 0xc | TE_DO_DISCONNECT {TE_SHM_IPC_STAT,"Unable to check shm segment"}, //TE_SHM_UNABLE_TO_CREATE_SEGMENT = 0xd {TE_SHM_UNABLE_TO_CREATE_SEGMENT,"Unable to create shm segment"}, //TE_SHM_UNABLE_TO_ATTACH_SEGMENT = 0xe {TE_SHM_UNABLE_TO_ATTACH_SEGMENT,"Unable to attach shm segment"}, //TE_SHM_UNABLE_TO_REMOVE_SEGMENT = 0xf {TE_SHM_UNABLE_TO_REMOVE_SEGMENT,"Unable to remove shm segment"}, //TE_TOO_SMALL_SIGID = 0x10 {TE_TOO_SMALL_SIGID,"Sig ID too small"}, //TE_TOO_LARGE_SIGID = 0x11 {TE_TOO_LARGE_SIGID,"Sig ID too large"}, //TE_WAIT_STACK_FULL = 0x12 | TE_DO_DISCONNECT {TE_WAIT_STACK_FULL,"Wait stack was full"}, //TE_RECEIVE_BUFFER_FULL = 0x13 | TE_DO_DISCONNECT {TE_RECEIVE_BUFFER_FULL,"Receive buffer was full"}, //TE_SIGNAL_LOST_SEND_BUFFER_FULL = 0x14 | TE_DO_DISCONNECT {TE_SIGNAL_LOST_SEND_BUFFER_FULL,"Send buffer was full,and trying to force send fails"}, //TE_SIGNAL_LOST = 0x15 {TE_SIGNAL_LOST,"Send failed for unknown reason(signal lost)"}, //TE_SEND_BUFFER_FULL = 0x16 {TE_SEND_BUFFER_FULL,"The send buffer was full, but sleeping for a while solved"}, //TE_SCI_LINK_ERROR = 0x0017 {TE_SCI_LINK_ERROR,"There is no link from this node to the switch"}, //TE_SCI_UNABLE_TO_START_SEQUENCE = 0x18 | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_START_SEQUENCE,"Could not start a sequence, because system resources are exumed or no sequence has been created"}, //TE_SCI_UNABLE_TO_REMOVE_SEQUENCE = 0x19 | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_REMOVE_SEQUENCE,"Could not remove a sequence"}, //TE_SCI_UNABLE_TO_CREATE_SEQUENCE = 0x1a | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_CREATE_SEQUENCE,"Could not create a sequence, because system resources are exempted. Must reboot"}, //TE_SCI_UNRECOVERABLE_DATA_TFX_ERROR = 0x1b | TE_DO_DISCONNECT {TE_SCI_UNRECOVERABLE_DATA_TFX_ERROR,"Tried to send data on redundant link but failed"}, //TE_SCI_CANNOT_INIT_LOCALSEGMENT = 0x1c | TE_DO_DISCONNECT {TE_SCI_CANNOT_INIT_LOCALSEGMENT,"Cannot initialize local segment"}, //TE_SCI_CANNOT_MAP_REMOTESEGMENT = 0x1d | TE_DO_DISCONNEC {TE_SCI_CANNOT_MAP_REMOTESEGMENT,"Cannot map remote segment"}, //TE_SCI_UNABLE_TO_UNMAP_SEGMENT = 0x1e | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_UNMAP_SEGMENT,"Cannot free the resources used by this segment (step 1)"}, //TE_SCI_UNABLE_TO_REMOVE_SEGMENT = 0x1f | TE_DO_DISCONNEC {TE_SCI_UNABLE_TO_REMOVE_SEGMENT,"Cannot free the resources used by this segment (step 2)"}, //TE_SCI_UNABLE_TO_DISCONNECT_SEGMENT = 0x20 | TE_DO_DISCONNECT {TE_SCI_UNABLE_TO_DISCONNECT_SEGMENT,"Cannot disconnect from a remote segment"}, //TE_SHM_IPC_PERMANENT = 0x21 {TE_SHM_IPC_PERMANENT,"Shm ipc Permanent error"}, //TE_SCI_UNABLE_TO_CLOSE_CHANNEL = 0x22 {TE_SCI_UNABLE_TO_CLOSE_CHANNEL,"Unable to close the sci channel and the resources allocated"} }; lenth = sizeof(TransporterErrorString)/sizeof(struct myTransporterError); for(i=0; i<lenth; i++) { if(theData[2] == TransporterErrorString[i].errorNum) { BaseString::snprintf(m_text, m_text_len, "Transporter to node %d reported error 0x%x: %s", theData[1], theData[2], TransporterErrorString[i].errorString); break; } } if(i == lenth) BaseString::snprintf(m_text, m_text_len, "Transporter to node %d reported error 0x%x", "Transporter to node %d reported error 0x%x: unknown error", theData[1], theData[2]); } Loading
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +19 −1 Original line number Diff line number Diff line Loading @@ -897,7 +897,7 @@ void Cmvmi::execSET_VAR_REQ(Signal* signal) case TimeToWaitAlive: // QMGR case HeartbeatIntervalDbDb: // TODO ev till Ndbcnt ocks case HeartbeatIntervalDbDb: // TODO possibly Ndbcnt too case HeartbeatIntervalDbApi: case ArbitTimeout: sendSignal(QMGR_REF, GSN_SET_VAR_REQ, signal, 3, JBB); Loading Loading @@ -1105,6 +1105,24 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } } if (arg == DumpStateOrd::CmvmiDumpSubscriptions) { SubscriberPtr ptr; subscribers.first(ptr); g_eventLogger.info("List subscriptions:"); while(ptr.i != RNIL) { g_eventLogger.info("Subscription: %u, nodeId: %u, ref: 0x%x", ptr.i, refToNode(ptr.p->blockRef), ptr.p->blockRef); for(Uint32 i = 0; i < LogLevel::LOGLEVEL_CATEGORIES; i++) { Uint32 level = ptr.p->logLevel.getLogLevel((LogLevel::EventCategory)i); g_eventLogger.info("Category %u Level %u", i, level); } subscribers.next(ptr); } } if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){ infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", g_sectionSegmentPool.getSize(), Loading
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +66 −64 Original line number Diff line number Diff line Loading @@ -1786,8 +1786,8 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) return; }//if if (getNodeStatus(nodeId) != NodeRecord::DEAD){ ndbout << "nodeStatus in START_PERMREQ = " << (Uint32) getNodeStatus(nodeId) << endl; g_eventLogger.error("nodeStatus in START_PERMREQ = %u", (Uint32) getNodeStatus(nodeId)); ndbrequire(false); }//if Loading Loading @@ -4029,7 +4029,7 @@ void Dbdih::checkCopyTab(NodeRecordPtr failedNodePtr) jam(); break; default: ndbout_c("outstanding gsn: %s(%d)", g_eventLogger.error("outstanding gsn: %s(%d)", getSignalName(c_nodeStartMaster.m_outstandingGsn), c_nodeStartMaster.m_outstandingGsn); ndbrequire(false); Loading Loading @@ -4472,9 +4472,10 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr) failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver; break; default: ndbout << "activeStatus = " << (Uint32) failedNodePtr.p->activeStatus; ndbout << " at failure after NODE_FAILREP of node = "; ndbout << failedNodePtr.i << endl; g_eventLogger.error("activeStatus = %u " "at failure after NODE_FAILREP of node = %u", (Uint32) failedNodePtr.p->activeStatus, failedNodePtr.i); ndbrequire(false); break; }//switch Loading Loading @@ -4629,7 +4630,7 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ /** * Node failure during master take over... */ ndbout_c("Nodefail during master take over"); g_eventLogger.info("Nodefail during master take over"); } setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER); Loading Loading @@ -4869,7 +4870,8 @@ void Dbdih::execMASTER_GCPCONF(Signal* signal) if (latestLcpId > SYSFILE->latestLCP_ID) { jam(); #if 0 ndbout_c("Dbdih: Setting SYSFILE->latestLCP_ID to %d", latestLcpId); g_eventLogger.info("Dbdih: Setting SYSFILE->latestLCP_ID to %d", latestLcpId); SYSFILE->latestLCP_ID = latestLcpId; #endif SYSFILE->keepGCI = oldestKeepGci; Loading Loading @@ -5528,7 +5530,7 @@ Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId, if (ERROR_INSERTED(7030)) { ndbout_c("Reenable GCP_PREPARE"); g_eventLogger.info("Reenable GCP_PREPARE"); CLEAR_ERROR_INSERT_VALUE; } Loading Loading @@ -5701,7 +5703,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){ c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__); #if 0 if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){ ndbout_c("Dbdih: Also resetting c_copyGCISlave"); g_eventLogger.info("Dbdih: Also resetting c_copyGCISlave"); c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE; c_copyGCISlave.m_expectedNextWord = 0; } Loading Loading @@ -5790,7 +5792,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){ if(c_lcpState.lcpStatus == LCP_TAB_SAVED){ #ifdef VM_TRACE ndbout_c("Sending extra GSN_LCP_COMPLETE_REP to new master"); g_eventLogger.info("Sending extra GSN_LCP_COMPLETE_REP to new master"); #endif sendLCP_COMPLETE_REP(signal); } Loading Loading @@ -5946,7 +5948,7 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) nodePtr.p->lcpStateAtTakeOver = lcpState; #ifdef VM_TRACE ndbout_c("MASTER_LCPCONF"); g_eventLogger.info("MASTER_LCPCONF"); printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0); #endif Loading Loading @@ -6023,7 +6025,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) // protocol. /* --------------------------------------------------------------------- */ #ifdef VM_TRACE ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart"); g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart"); #endif checkLcpStart(signal, __LINE__); break; Loading @@ -6034,7 +6036,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) // protocol by calculating the keep gci and storing the new lcp id. /* --------------------------------------------------------------------- */ #ifdef VM_TRACE ndbout_c("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId"); g_eventLogger.info("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId"); #endif if (c_lcpState.lcpStatus == LCP_STATUS_ACTIVE) { jam(); Loading @@ -6045,7 +6047,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) /*---------------------------------------------------------------------*/ Uint32 lcpId = SYSFILE->latestLCP_ID; #ifdef VM_TRACE ndbout_c("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1); g_eventLogger.info("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1); #endif SYSFILE->latestLCP_ID--; }//if Loading @@ -6062,7 +6064,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) * complete before finalising the LCP process. * ------------------------------------------------------------------ */ #ifdef VM_TRACE ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> " g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> " "startLcpRoundLoopLab(table=%u, fragment=%u)", c_lcpMasterTakeOverState.minTableId, c_lcpMasterTakeOverState.minFragId); Loading Loading @@ -7376,8 +7378,8 @@ void Dbdih::checkGcpStopLab(Signal* signal) if (cgcpSameCounter == 1200) { jam(); #ifdef VM_TRACE ndbout << "System crash due to GCP Stop in state = "; ndbout << (Uint32) cgcpStatus << endl; g_eventLogger.error("System crash due to GCP Stop in state = %u", (Uint32) cgcpStatus); #endif crashSystemAtGcpStop(signal); return; Loading @@ -7390,8 +7392,8 @@ void Dbdih::checkGcpStopLab(Signal* signal) if (cgcpSameCounter == 1200) { jam(); #ifdef VM_TRACE ndbout << "System crash due to GCP Stop in state = "; ndbout << (Uint32) cgcpStatus << endl; g_eventLogger.error("System crash due to GCP Stop in state = %u", (Uint32) cgcpStatus); #endif crashSystemAtGcpStop(signal); return; Loading Loading @@ -7582,7 +7584,7 @@ void Dbdih::GCP_SAVEhandling(Signal* signal, Uint32 nodeId) getNodeState().startLevel == NodeState::SL_STARTED){ jam(); #if 0 ndbout_c("Dbdih: Clearing initial start ongoing"); g_eventLogger.info("Dbdih: Clearing initial start ongoing"); #endif Sysfile::clearInitialStartOngoing(SYSFILE->systemRestartBits); } Loading @@ -7601,7 +7603,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal) if (ERROR_INSERTED(7030)) { cgckptflag = true; ndbout_c("Delayed GCP_PREPARE 5s"); g_eventLogger.info("Delayed GCP_PREPARE 5s"); sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000, signal->getLength()); return; Loading @@ -7621,7 +7623,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal) if (ERROR_INSERTED(7031)) { ndbout_c("Crashing delayed in GCP_PREPARE 3s"); g_eventLogger.info("Crashing delayed in GCP_PREPARE 3s"); signal->theData[0] = 9999; sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1); return; Loading Loading @@ -8136,7 +8138,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId) * This is LCP master takeover */ #ifdef VM_TRACE ndbout_c("initLcpLab aborted due to LCP master takeover - 1"); g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 1"); #endif c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__); sendMASTER_LCPCONF(signal); Loading @@ -8149,7 +8151,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId) * Master take over but has not yet received MASTER_LCPREQ */ #ifdef VM_TRACE ndbout_c("initLcpLab aborted due to LCP master takeover - 2"); g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 2"); #endif return; } Loading Loading @@ -9380,9 +9382,10 @@ void Dbdih::checkTcCounterLab(Signal* signal) { CRASH_INSERTION(7009); if (c_lcpState.lcpStatus != LCP_STATUS_IDLE) { ndbout << "lcpStatus = " << (Uint32) c_lcpState.lcpStatus; ndbout << "lcpStatusUpdatedPlace = " << c_lcpState.lcpStatusUpdatedPlace << endl; g_eventLogger.error("lcpStatus = %u" "lcpStatusUpdatedPlace = %d", (Uint32) c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace); ndbrequire(false); return; }//if Loading Loading @@ -9935,9 +9938,8 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal) if(tabPtr.p->tabStatus == TabRecord::TS_DROPPING){ jam(); ndbout_c("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ", tableId, fragId); g_eventLogger.info("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ", tableId, fragId); } else { jam(); /** Loading Loading @@ -10065,7 +10067,7 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, }; #ifdef VM_TRACE ndbout_c("Fragment Replica(node=%d) not found", nodeId); g_eventLogger.info("Fragment Replica(node=%d) not found", nodeId); replicaPtr.i = fragPtrP->oldStoredReplicas; while(replicaPtr.i != RNIL){ ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); Loading @@ -10078,9 +10080,9 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, }//if }; if(replicaPtr.i != RNIL){ ndbout_c("...But was found in oldStoredReplicas"); g_eventLogger.info("...But was found in oldStoredReplicas"); } else { ndbout_c("...And wasn't found in oldStoredReplicas"); g_eventLogger.info("...And wasn't found in oldStoredReplicas"); } #endif ndbrequire(false); Loading Loading @@ -10114,7 +10116,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport) ndbrequire(replicaPtr.p->lcpOngoingFlag == true); if(lcpNo != replicaPtr.p->nextLcp){ ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d", g_eventLogger.error("lcpNo = %d replicaPtr.p->nextLcp = %d", lcpNo, replicaPtr.p->nextLcp); ndbrequire(false); } Loading Loading @@ -10150,7 +10152,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport) // Not all fragments in table have been checkpointed. /* ----------------------------------------------------------------- */ if(0) ndbout_c("reportLcpCompletion: fragment %d not ready", fid); g_eventLogger.info("reportLcpCompletion: fragment %d not ready", fid); return false; }//if }//for Loading Loading @@ -10267,7 +10269,7 @@ void Dbdih::execLCP_COMPLETE_REP(Signal* signal) jamEntry(); #if 0 ndbout_c("LCP_COMPLETE_REP"); g_eventLogger.info("LCP_COMPLETE_REP"); printLCP_COMPLETE_REP(stdout, signal->getDataPtr(), signal->length(), number()); Loading Loading @@ -10353,7 +10355,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal) if(c_lcpMasterTakeOverState.state != LMTOS_IDLE){ jam(); #ifdef VM_TRACE ndbout_c("Exiting from allNodesLcpCompletedLab"); g_eventLogger.info("Exiting from allNodesLcpCompletedLab"); #endif return; } Loading Loading @@ -10582,12 +10584,12 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) infoEvent("Detected GCP stop...sending kill to %s", c_GCP_SAVEREQ_Counter.getText()); ndbout_c("Detected GCP stop...sending kill to %s", g_eventLogger.error("Detected GCP stop...sending kill to %s", c_GCP_SAVEREQ_Counter.getText()); return; } case GCP_SAVE_LQH_FINISHED: ndbout_c("m_copyReason: %d m_waiting: %d", g_eventLogger.error("m_copyReason: %d m_waiting: %d", c_copyGCIMaster.m_copyReason, c_copyGCIMaster.m_waiting); break; Loading @@ -10597,7 +10599,7 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) break; } ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", g_eventLogger.error("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", c_copyGCISlave.m_senderData, c_copyGCISlave.m_senderRef, c_copyGCISlave.m_copyReason, Loading Loading @@ -12804,9 +12806,9 @@ void Dbdih::setLcpActiveStatusEnd() nodePtr.i = getOwnNodeId(); ptrAss(nodePtr, nodeRecord); ndbrequire(nodePtr.p->activeStatus == Sysfile::NS_Active); ndbout_c("NR: setLcpActiveStatusEnd - m_participatingLQH"); g_eventLogger.info("NR: setLcpActiveStatusEnd - m_participatingLQH"); } else { ndbout_c("NR: setLcpActiveStatusEnd - !m_participatingLQH"); g_eventLogger.info("NR: setLcpActiveStatusEnd - !m_participatingLQH"); } } Loading Loading @@ -13637,8 +13639,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) } if(arg == DumpStateOrd::EnableUndoDelayDataWrite){ ndbout << "Dbdih:: delay write of datapages for table = " << dumpState->args[1]<< endl; g_eventLogger.info("Dbdih:: delay write of datapages for table = %s", dumpState->args[1]); // Send this dump to ACC and TUP EXECUTE_DIRECT(DBACC, GSN_DUMP_STATE_ORD, signal, 2); EXECUTE_DIRECT(DBTUP, GSN_DUMP_STATE_ORD, signal, 2); Loading @@ -13655,13 +13657,13 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) }//if if (signal->theData[0] == DumpStateOrd::DihMinTimeBetweenLCP) { // Set time between LCP to min value ndbout << "Set time between LCP to min value" << endl; g_eventLogger.info("Set time between LCP to min value"); c_lcpState.clcpDelay = 0; // TimeBetweenLocalCheckpoints.min return; } if (signal->theData[0] == DumpStateOrd::DihMaxTimeBetweenLCP) { // Set time between LCP to max value ndbout << "Set time between LCP to max value" << endl; g_eventLogger.info("Set time between LCP to max value"); c_lcpState.clcpDelay = 31; // TimeBetweenLocalCheckpoints.max return; } Loading Loading @@ -13697,7 +13699,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) { cgcpDelay = signal->theData[1]; } ndbout_c("Setting time between gcp : %d", cgcpDelay); g_eventLogger.info("Setting time between gcp : %d", cgcpDelay); } if (arg == 7021 && signal->getLength() == 2) Loading Loading @@ -13820,7 +13822,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ while(index < count){ if(nodePtr.p->queuedChkpt[index].tableId == tabPtr.i){ jam(); // ndbout_c("Unqueuing %d", index); // g_eventLogger.info("Unqueuing %d", index); count--; for(Uint32 i = index; i<count; i++){ Loading Loading @@ -13860,7 +13862,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ if(checkLcpAllTablesDoneInLqh()){ jam(); ndbout_c("This is the last table"); g_eventLogger.info("This is the last table"); /** * Then check if saving of tab info is done for all tables Loading @@ -13869,7 +13871,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ checkLcpCompletedLab(signal); if(a != c_lcpState.lcpStatus){ ndbout_c("And all tables are written to already written disk"); g_eventLogger.info("And all tables are written to already written disk"); } } break; Loading
ndb/src/kernel/vm/WatchDog.cpp +5 −2 Original line number Diff line number Diff line Loading @@ -22,6 +22,9 @@ #include <NdbOut.hpp> #include <NdbSleep.h> #include <ErrorHandlingMacros.hpp> #include <EventLogger.hpp> extern EventLogger g_eventLogger; extern "C" void* Loading Loading @@ -125,7 +128,7 @@ WatchDog::run(){ last_stuck_action = "Unknown place"; break; }//switch ndbout << "Ndb kernel is stuck in: " << last_stuck_action << endl; g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action); if(alerts == 3){ shutdownSystem(last_stuck_action); } Loading