Commit be0d6c94 authored by unknown's avatar unknown
Browse files

ndb - wl1760/bug#18216

  add two new start options that will decrease likelyhood of bug#18612
  push cntr-sp2 logic down into qmgr-sp1 to decrease likelyhood of bug#18612


ndb/include/kernel/signaldata/CmRegSignalData.hpp:
  Expand CmRegReq with lots of stuff
ndb/include/mgmapi/ndb_logevent.h:
  Add Start report during sp1
ndb/src/common/debugger/EventLogger.cpp:
  Add Start report during sp1
ndb/src/kernel/blocks/dbdih/DbdihInit.cpp:
  Init cntrref
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Fix small bugs related to partial initial start
ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp:
  Remove timeouts as they are handled in QMGR nowadays
ndb/src/kernel/blocks/qmgr/Qmgr.hpp:
  Push cntr sp2 logic down to QMGR to prevent the forming of multiple QMGR cluster
    that will lead to start problems (crashes...)
ndb/src/kernel/blocks/qmgr/QmgrInit.cpp:
  Push cntr sp2 logic down to QMGR to prevent the forming of multiple QMGR cluster
    that will lead to start problems (crashes...)
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
  Push cntr sp2 logic down to QMGR to prevent the forming of multiple QMGR cluster
    that will lead to start problems (crashes...)
ndb/src/kernel/vm/Configuration.cpp:
  Add new flags
    --initial-start
    --nowait-nodes
parent e40bdb5b
Loading
Loading
Loading
Loading
+23 −6
Original line number Diff line number Diff line
@@ -30,12 +30,17 @@ class CmRegReq {
  friend class Qmgr;
  
public:
  STATIC_CONST( SignalLength = 3 );
  STATIC_CONST( SignalLength = 5 + NdbNodeBitmask::Size );
private:
  
  Uint32 blockRef;
  Uint32 nodeId;
  Uint32 version;    // See ndb_version.h

  Uint32 start_type; // As specified by cmd-line or mgm, NodeState::StartType
  Uint32 latest_gci; // 0 means no fs
  Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ 
                                           // to be part of restart
};

/**
@@ -59,7 +64,6 @@ private:
   * The dynamic id that the node reciving this signal has
   */
  Uint32 dynamicId;
  
  Uint32 allNdbNodes[NdbNodeBitmask::Size];  
};

@@ -73,7 +77,7 @@ class CmRegRef {
  friend class Qmgr;
  
public:
  STATIC_CONST( SignalLength = 4 );
  STATIC_CONST( SignalLength = 7 + NdbNodeBitmask::Size );
  
  enum ErrorCode {
    ZBUSY = 0,          /* Only the president can send this */
@@ -85,14 +89,27 @@ public:
                         * as president. */
    ZNOT_PRESIDENT = 5, /* We are not president */
    ZNOT_DEAD = 6,       /* We are not dead when we are starting  */
    ZINCOMPATIBLE_VERSION = 7
    ZINCOMPATIBLE_VERSION = 7,
    ZINCOMPATIBLE_START_TYPE = 8
  };
private:
  
  Uint32 blockRef;
  Uint32 nodeId;
  Uint32 errorCode;
  /**
   * Applicable if ZELECTION
   */
  Uint32 presidentCandidate;
  Uint32 candidate_latest_gci; // 0 means non

  /**
   * Data for sending node sending node
   */
  Uint32 latest_gci; 
  Uint32 start_type; 
  Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ 
                                           // to be part of restart
};

class CmAdd {
+12 −1
Original line number Diff line number Diff line
@@ -166,10 +166,14 @@ extern "C" {
    /** NDB_MGM_EVENT_CATEGORY_BACKUP */
    NDB_LE_BackupCompleted = 56,
    /** NDB_MGM_EVENT_CATEGORY_BACKUP */
    NDB_LE_BackupAborted = 57
    NDB_LE_BackupAborted = 57,

    /* 58 used in 5.1  */
    /* 59 used */

    /** NDB_MGM_EVENT_CATEGORY_STARTUP */
    NDB_LE_StartReport = 60

    /* 60 unused */
    /* 61 unused */
    /* 62 unused */
@@ -625,6 +629,13 @@ extern "C" {
        unsigned type;
        unsigned node_id;
      } SingleUser;
      /** Log even data @ref NDB_LE_StartReport */
      struct {
	unsigned report_type;
	unsigned remaining_time;
	unsigned bitmask_size;
	unsigned bitmask_data[1];
      } StartReport;
#ifndef DOXYGEN_FIX
    };
#else
+85 −0
Original line number Diff line number Diff line
@@ -707,6 +707,90 @@ void getTextSingleUser(QQQQ) {
  }
}

void getTextStartReport(QQQQ) {
  Uint32 time = theData[2];
  Uint32 sz = theData[3];
  char mask1[100];
  char mask2[100];
  char mask3[100];
  char mask4[100];
  BitmaskImpl::getText(sz, theData + 4 + (0 * sz), mask1);
  BitmaskImpl::getText(sz, theData + 4 + (1 * sz), mask2);
  BitmaskImpl::getText(sz, theData + 4 + (2 * sz), mask3);
  BitmaskImpl::getText(sz, theData + 4 + (3 * sz), mask4);
  switch(theData[1]){
  case 1: // Wait initial
    BaseString::snprintf
      (m_text, m_text_len,
       "Initial start, waiting for %s to connect, "
       " nodes [ all: %s connected: %s no-wait: %s ]",
       mask4, mask1, mask2, mask3);
    break;
  case 2: // Wait partial
    BaseString::snprintf
      (m_text, m_text_len,
       "Waiting until nodes: %s connects, "
       "nodes [ all: %s connected: %s no-wait: %s ]",
       mask4, mask1, mask2, mask3);
    break;
  case 3: // Wait partial timeout
    BaseString::snprintf
      (m_text, m_text_len,
       "Waiting %u sec for nodes %s to connect, "
       "nodes [ all: %s connected: %s no-wait: %s ]",
       
       time, mask4, mask1, mask2, mask3);
    break;
  case 4: // Wait partioned
    BaseString::snprintf
      (m_text, m_text_len,
       "Waiting for non partitioned start, "
       "nodes [ all: %s connected: %s missing: %s no-wait: %s ]",
       
       mask1, mask2, mask4, mask3);
    break;
  case 5:
    BaseString::snprintf
      (m_text, m_text_len,
       "Waiting %u sec for non partitioned start, "
       "nodes [ all: %s connected: %s missing: %s no-wait: %s ]",
       
       time, mask1, mask2, mask4, mask3);
    break;
  case 0x8000: // Do initial
    BaseString::snprintf
      (m_text, m_text_len,
       "Initial start with nodes %s [ missing: %s no-wait: %s ]",
       mask2, mask4, mask3);
    break;
  case 0x8001: // Do start
    BaseString::snprintf
      (m_text, m_text_len,
       "Start with all nodes %s",
       mask2);
    break;
  case 0x8002: // Do partial
    BaseString::snprintf
      (m_text, m_text_len,
       "Start with nodes %s [ missing: %s no-wait: %s ]",
       mask2, mask4, mask3);
    break;
  case 0x8003: // Do partioned
    BaseString::snprintf
      (m_text, m_text_len,
       "Start potentially partitioned with nodes %s "
       " [ missing: %s no-wait: %s ]",
       mask2, mask4, mask3);
    break;
  default:
    BaseString::snprintf
      (m_text, m_text_len,
       "Unknown startreport: 0x%x [ %s %s %s %s ]", 
       theData[1],
       mask1, mask2, mask3, mask4);
  }
}

#if 0
BaseString::snprintf(m_text, 
		     m_text_len, 
@@ -755,6 +839,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = {
  ROW(StartREDOLog,            LogLevel::llStartUp,    10, Logger::LL_INFO ),
  ROW(StartLog,                LogLevel::llStartUp,    10, Logger::LL_INFO ),
  ROW(UNDORecordsExecuted,     LogLevel::llStartUp,    15, Logger::LL_INFO ),
  ROW(StartReport,             LogLevel::llStartUp,     4, Logger::LL_INFO ),
  
  // NODERESTART
  ROW(NR_CopyDict,             LogLevel::llNodeRestart, 8, Logger::LL_INFO ),
+1 −0
Original line number Diff line number Diff line
@@ -71,6 +71,7 @@ void Dbdih::initData()
  cwaitLcpSr       = false;
  c_blockCommit    = false;
  c_blockCommitNo  = 1;
  cntrlblockref    = RNIL;
}//Dbdih::initData()

void Dbdih::initRecords() 
+52 −26
Original line number Diff line number Diff line
@@ -11659,7 +11659,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
  Uint32 tmngNode;
  Uint32 tmngNodeGroup;
  Uint32 tmngLimit;
  Uint32 i;
  Uint32 i, j;

  /**-----------------------------------------------------------------------
   * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED 
@@ -11705,6 +11705,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
      Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup);
    }//if
  }//for

  for (i = 0; i<cnoOfNodeGroups; i++)
  {
    jam();
    bool alive = false;
    NodeGroupRecordPtr NGPtr;
    NGPtr.i = i;
    ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
    for (j = 0; j<NGPtr.p->nodeCount; j++)
    {
      jam();
      mngNodeptr.i = NGPtr.p->nodesInGroup[j];
      ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
      if (checkNodeAlive(NGPtr.p->nodesInGroup[j]))
      {
	alive = true;
	break;
      }
    }

    if (!alive)
    {
      char buf[255];
      BaseString::snprintf
	(buf, sizeof(buf), 
	 "Illegal initial start, no alive node in nodegroup %u", i);
      progError(__LINE__, 
		NDBD_EXIT_SR_RESTARTCONFLICT,
		buf);
      
    }
  }
}//Dbdih::makeNodeGroups()

/**
@@ -12512,7 +12544,6 @@ void Dbdih::sendStartFragreq(Signal* signal,
void Dbdih::setInitialActiveStatus()
{
  NodeRecordPtr siaNodeptr;
  Uint32 tsiaNodeActiveStatus;
  Uint32 tsiaNoActiveNodes;

  tsiaNoActiveNodes = csystemnodes - cnoHotSpare;
@@ -12520,39 +12551,34 @@ void Dbdih::setInitialActiveStatus()
    SYSFILE->nodeStatus[i] = 0;
  for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) {
    ptrAss(siaNodeptr, nodeRecord);
    if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) {
    switch(siaNodeptr.p->nodeStatus){
    case NodeRecord::ALIVE:
    case NodeRecord::DEAD:
      if (tsiaNoActiveNodes == 0) {
        jam();
        siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare;
      } else {
        jam();
        tsiaNoActiveNodes = tsiaNoActiveNodes - 1;
        siaNodeptr.p->activeStatus = Sysfile::NS_Active;
      }//if
    } else {
      jam();
      siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
    }//if
    switch (siaNodeptr.p->activeStatus) {
    case Sysfile::NS_Active:
      jam();
      tsiaNodeActiveStatus = Sysfile::NS_Active;
      break;
    case Sysfile::NS_HotSpare:
      jam();
      tsiaNodeActiveStatus = Sysfile::NS_HotSpare;
      break;
    case Sysfile::NS_NotDefined:
        if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE)
	{
	  jam();
      tsiaNodeActiveStatus = Sysfile::NS_NotDefined;
	  siaNodeptr.p->activeStatus = Sysfile::NS_Active;
	} 
	else
	{
	  siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
	}
      }
      break;
    default:
      ndbrequire(false);
      return;
      jam();
      siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
      break;
    }//switch
    Sysfile::setNodeStatus(siaNodeptr.i, SYSFILE->nodeStatus,
                           tsiaNodeActiveStatus);
    }//if
    Sysfile::setNodeStatus(siaNodeptr.i, 
			   SYSFILE->nodeStatus,
                           siaNodeptr.p->activeStatus);
  }//for
}//Dbdih::setInitialActiveStatus()

Loading