Commit 165d5390 authored by unknown's avatar unknown
Browse files

bug#10987 - ndb - unable to find restorable replica

  Introduce new variable c_newest_restorable_gci
    which is set _after_ both GCP_SAVE and COPY_GCI
    
  This variable is used when cutting redo (calcKeepGci)
  
  Also make sure complete GCI is run inbetween LCP's


ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  Introduce new variable c_newest_restorable_gci
    which is set _after_ both GCP_SAVE and COPY_GCI
  
  This variable is used when cutting redo (calcKeepGci)
  Also make sure complete GCI is run inbetween LCP's
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Introduce new variable c_newest_restorable_gci
    which is set _after_ both GCP_SAVE and COPY_GCI
  
  This variable is used when cutting redo (calcKeepGci)
  Also make sure complete GCI is run inbetween LCP's
parent bbcb4a56
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -774,7 +774,7 @@ private:
//------------------------------------
// Methods for LCP functionality
//------------------------------------
  void checkKeepGci(Uint32 replicaStartIndex);
  void checkKeepGci(TabRecordPtr, Uint32, Fragmentstore*, Uint32);
  void checkLcpStart(Signal *, Uint32 lineNo);
  void checkStartMoreLcp(Signal *, Uint32 nodeId);
  bool reportLcpCompletion(const class LcpFragRep *);
@@ -1292,7 +1292,7 @@ private:
    }

    Uint32 lcpStart;
    Uint32 lcpStartGcp; 
    Uint32 lcpStopGcp; 
    Uint32 keepGci;      /* USED TO CALCULATE THE GCI TO KEEP AFTER A LCP  */
    Uint32 oldestRestorableGci;
    
@@ -1361,6 +1361,7 @@ private:
  Uint32 cstarttype;
  Uint32 csystemnodes;
  Uint32 currentgcp;
  Uint32 c_newest_restorable_gci;

  enum GcpMasterTakeOverState {
    GMTOS_IDLE = 0,
+17 −11
Original line number Diff line number Diff line
@@ -674,6 +674,7 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
    jam();
    coldgcp = SYSFILE->newestRestorableGCI;
    crestartGci = SYSFILE->newestRestorableGCI;
    c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
    Sysfile::setRestartOngoing(SYSFILE->systemRestartBits);
    currentgcp = coldgcp + 1;
    cnewgcp = coldgcp + 1;
@@ -692,6 +693,7 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
    ok = true;
    jam();
    cgcpParticipantState = GCP_PARTICIPANT_COPY_GCI_RECEIVED;
    c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
    setNodeInfo(signal);
    break;
  }//if
@@ -7749,6 +7751,8 @@ void Dbdih::execCOPY_GCICONF(Signal* signal)
    signal->theData[1] = coldgcp;
    sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);    

    c_newest_restorable_gci = coldgcp;

    CRASH_INSERTION(7004);
    emptyWaitGCPMasterQueue(signal);    
    cgcpStatus = GCP_READY;
@@ -9155,7 +9159,7 @@ void Dbdih::checkTcCounterLab(Signal* signal)
  }//if
  c_lcpState.ctimer += 32;
  if ((c_nodeStartMaster.blockLcp == true) ||
      ((c_lcpState.lcpStartGcp + 1) > currentgcp)) {
      (c_lcpState.lcpStopGcp >= c_newest_restorable_gci)) {
    jam();
    /* --------------------------------------------------------------------- */
    // No reason to start juggling the states and checking for start of LCP if
@@ -9238,7 +9242,6 @@ void Dbdih::execTCGETOPSIZECONF(Signal* signal)
  /* ----------------------------------------------------------------------- */
  c_lcpState.ctimer = 0;
  c_lcpState.keepGci = coldgcp;
  c_lcpState.lcpStartGcp = currentgcp;
  /* ----------------------------------------------------------------------- */
  /*       UPDATE THE NEW LATEST LOCAL CHECKPOINT ID.                        */
  /* ----------------------------------------------------------------------- */
@@ -9310,7 +9313,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
  cnoOfActiveTables++;
  FragmentstorePtr fragPtr;
  getFragstore(tabPtr.p, fragId, fragPtr);
  checkKeepGci(fragPtr.p->storedReplicas);
  checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
  fragId++;
  if (fragId >= tabPtr.p->totalfragments) {
    jam();
@@ -10168,6 +10171,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal)
  signal->theData[0] = EventReport::LocalCheckpointCompleted; //Event type
  signal->theData[1] = SYSFILE->latestLCP_ID;
  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
  c_lcpState.lcpStopGcp = c_newest_restorable_gci;
  
  /**
   * Start checking for next LCP
@@ -10522,7 +10526,8 @@ void Dbdih::checkEscalation()
/*       DESCRIPTION: CHECK FOR MINIMUM GCI RESTORABLE WITH NEW LOCAL    */
/*                    CHECKPOINT.                                        */
/*************************************************************************/
void Dbdih::checkKeepGci(Uint32 replicaStartIndex) 
void Dbdih::checkKeepGci(TabRecordPtr tabPtr, Uint32 fragId, Fragmentstore*, 
			 Uint32 replicaStartIndex) 
{
  ReplicaRecordPtr ckgReplicaPtr;
  ckgReplicaPtr.i = replicaStartIndex;
@@ -10544,7 +10549,6 @@ void Dbdih::checkKeepGci(Uint32 replicaStartIndex)
    if (oldestRestorableGci > c_lcpState.oldestRestorableGci) {
      jam();
      c_lcpState.oldestRestorableGci = oldestRestorableGci;
      ndbrequire(((int)c_lcpState.oldestRestorableGci) >= 0);
    }//if
    ckgReplicaPtr.i = ckgReplicaPtr.p->nextReplica;
  }//while
@@ -10838,7 +10842,7 @@ void Dbdih::findMinGci(ReplicaRecordPtr fmgReplicaPtr,
  do {
    ndbrequire(lcpNo < MAX_LCP_STORED);
    if (fmgReplicaPtr.p->lcpStatus[lcpNo] == ZVALID &&
	fmgReplicaPtr.p->maxGciStarted[lcpNo] <= coldgcp)
	fmgReplicaPtr.p->maxGciStarted[lcpNo] < c_newest_restorable_gci)
    {
      jam();
      keepGci = fmgReplicaPtr.p->maxGciCompleted[lcpNo];
@@ -10960,7 +10964,7 @@ void Dbdih::initCommonData()

  c_lcpState.clcpDelay = 0;
  c_lcpState.lcpStart = ZIDLE;
  c_lcpState.lcpStartGcp = 0;
  c_lcpState.lcpStopGcp = 0;
  c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
  c_lcpState.currentFragment.tableId = 0;
  c_lcpState.currentFragment.fragmentId = 0;
@@ -10996,6 +11000,7 @@ void Dbdih::initCommonData()
  csystemnodes = 0;
  c_updateToLock = RNIL;
  currentgcp = 0;
  c_newest_restorable_gci = 0;
  cverifyQueueCounter = 0;
  cwaitLcpSr = false;

@@ -11067,6 +11072,7 @@ void Dbdih::initRestartInfo()
  currentgcp = 2;
  cnewgcp = 2;
  crestartGci = 1;
  c_newest_restorable_gci = 1;

  SYSFILE->keepGCI             = 1;
  SYSFILE->oldestRestorableGCI = 1;
@@ -13038,9 +13044,9 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
  if (signal->theData[0] == 7001) {
    infoEvent("c_lcpState.keepGci = %d",
              c_lcpState.keepGci);
    infoEvent("c_lcpState.lcpStatus = %d, clcpStartGcp = %d",
    infoEvent("c_lcpState.lcpStatus = %d, clcpStopGcp = %d",
              c_lcpState.lcpStatus, 
	      c_lcpState.lcpStartGcp);
	      c_lcpState.lcpStopGcp);
    infoEvent("cgcpStartCounter = %d, cimmediateLcpStart = %d",
              cgcpStartCounter, c_lcpState.immediateLcpStart);
  }//if  
@@ -13221,8 +13227,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
    infoEvent("lcpStatus = %d (update place = %d) ",
	      c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace);
    infoEvent
      ("lcpStart = %d lcpStartGcp = %d keepGci = %d oldestRestorable = %d",
       c_lcpState.lcpStart, c_lcpState.lcpStartGcp, 
      ("lcpStart = %d lcpStopGcp = %d keepGci = %d oldestRestorable = %d",
       c_lcpState.lcpStart, c_lcpState.lcpStopGcp, 
       c_lcpState.keepGci, c_lcpState.oldestRestorableGci);
    
    infoEvent