Commit e5eee3fa authored by unknown's avatar unknown
Browse files

ndb - bug#25468

  handle partially transfered LCP_FRAG_REP after node failure
  recommit to 51-work


storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  handle partially transfered LCP_FRAG_REP after node failure
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  handle partially transfered LCP_FRAG_REP after node failure
storage/ndb/test/ndbapi/testNodeRestart.cpp:
  testcase
storage/ndb/test/run-test/daily-basic-tests.txt:
  testcase
parent f0b50825
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -637,6 +637,7 @@ private:
  void execTCGETOPSIZECONF(Signal *);
  void execTC_CLOPSIZECONF(Signal *);
  
  int handle_invalid_lcp_no(const class LcpFragRep*, ReplicaRecordPtr);
  void execLCP_FRAG_REP(Signal *);
  void execLCP_COMPLETE_REP(Signal *);
  void execSTART_LCP_REQ(Signal *);
+74 −5
Original line number Diff line number Diff line
@@ -4046,6 +4046,11 @@ void Dbdih::execNODE_FAILREP(Signal* signal)
  Uint32 newMasterId = nodeFail->masterNodeId;
  const Uint32 noOfFailedNodes = nodeFail->noOfNodes;
  if (ERROR_INSERTED(7179))
  {
    CLEAR_ERROR_INSERT_VALUE;
  }
  /*-------------------------------------------------------------------------*/
  // The first step is to convert from a bit mask to an array of failed nodes.
  /*-------------------------------------------------------------------------*/
@@ -10257,6 +10262,36 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal)
  
  jamEntry();
  if (ERROR_INSERTED(7178) && nodeId != getOwnNodeId())
  {
    jam();
    Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
    Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
    if (owng == nodeg)
    {
      jam();
      ndbout_c("throwing away LCP_FRAG_REP from  (and killing) %d", nodeId);
      SET_ERROR_INSERT_VALUE(7179);
      signal->theData[0] = 9999;
      sendSignal(numberToRef(CMVMI, nodeId), 
		 GSN_NDB_TAMPER, signal, 1, JBA);  
      return;
    }
  }
 
  if (ERROR_INSERTED(7179) && nodeId != getOwnNodeId())
  {
    jam();
    Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
    Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
    if (owng == nodeg)
    {
      jam();
      ndbout_c("throwing away LCP_FRAG_REP from %d", nodeId);
      return;
    }
  }    
  CRASH_INSERTION2(7025, isMaster());
  CRASH_INSERTION2(7016, !isMaster());
  
@@ -10463,6 +10498,37 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
  ndbrequire(false);
}//Dbdih::findReplica()
int
Dbdih::handle_invalid_lcp_no(const LcpFragRep* rep, 
			     ReplicaRecordPtr replicaPtr)
{
  ndbrequire(!isMaster());
  Uint32 lcpNo = rep->lcpNo;
  Uint32 lcpId = rep->lcpId;
  Uint32 replicaLcpNo = replicaPtr.p->nextLcp;
  Uint32 prevReplicaLcpNo = prevLcpNo(replicaLcpNo);
  warningEvent("Detected previous node failure of %d during lcp",
	       rep->nodeId);
  replicaPtr.p->nextLcp = lcpNo;
  replicaPtr.p->lcpId[lcpNo] = 0;
  replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
  
  for (Uint32 i = lcpNo; i != lcpNo; i = nextLcpNo(i))
  {
    jam();
    if (replicaPtr.p->lcpStatus[i] == ZVALID &&
	replicaPtr.p->lcpId[i] >= lcpId)
    {
      ndbout_c("i: %d lcpId: %d", i, replicaPtr.p->lcpId[i]);
      ndbrequire(false);
    }
  }
  return 0;
}
/**
 * Return true  if table is all fragment replicas have been checkpointed
 *                 to disk (in all LQHs)
@@ -10491,10 +10557,13 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
  
  ndbrequire(replicaPtr.p->lcpOngoingFlag == true);
  if(lcpNo != replicaPtr.p->nextLcp){
    if (handle_invalid_lcp_no(lcpReport, replicaPtr))
    {
      ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d", 
	       lcpNo, replicaPtr.p->nextLcp);
      ndbrequire(false);
    }
  }
  ndbrequire(lcpNo == replicaPtr.p->nextLcp);
  ndbrequire(lcpNo < MAX_LCP_STORED);
  ndbrequire(replicaPtr.p->lcpId[lcpNo] != lcpId);
+60 −0
Original line number Diff line number Diff line
@@ -1073,6 +1073,63 @@ int runBug25364(NDBT_Context* ctx, NDBT_Step* step){
  return NDBT_OK;
}

int runBug25468(NDBT_Context* ctx, NDBT_Step* step){
  
  int result = NDBT_OK;
  int loops = ctx->getNumLoops();
  int records = ctx->getNumRecords();
  NdbRestarter restarter;
  
  for (int i = 0; i<loops; i++)
  {
    int master = restarter.getMasterNodeId();
    int node1, node2;
    switch(i % 5){
    case 0:
      node1 = master;
      node2 = restarter.getRandomNodeSameNodeGroup(master, rand());
      break;
    case 1:
      node1 = restarter.getRandomNodeSameNodeGroup(master, rand());
      node2 = master;
      break;
    case 2:
    case 3:
    case 4:
      node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
      if (node1 == -1)
	node1 = master;
      node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
      break;
    }

    ndbout_c("node1: %d node2: %d master: %d", node1, node2, master);

    int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
  
    if (restarter.dumpStateOneNode(node2, val2, 2))
      return NDBT_FAILED;

    if (restarter.insertErrorInNode(node1, 7178))
      return NDBT_FAILED;

    int val1 = 7099;
    if (restarter.dumpStateOneNode(master, &val1, 1))
      return NDBT_FAILED;

    if (restarter.waitNodesNoStart(&node2, 1))
      return NDBT_FAILED;

    if (restarter.startAll())
      return NDBT_FAILED;

    if (restarter.waitClusterStarted())
      return NDBT_FAILED;
  }    

  return NDBT_OK;
}


NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", 
@@ -1403,6 +1460,9 @@ TESTCASE("Bug24717", ""){
TESTCASE("Bug25364", ""){
  INITIALIZER(runBug25364);
}
TESTCASE("Bug25468", ""){
  INITIALIZER(runBug25468);
}
NDBT_TESTSUITE_END(testNodeRestart);

int main(int argc, const char** argv){
+4 −0
Original line number Diff line number Diff line
@@ -768,6 +768,10 @@ max-time: 1500
cmd: testSystemRestart
args: -n Bug24664

max-time: 1000
cmd: testNodeRestart
args: -n Bug25468 T1

# OLD FLEX
max-time: 500
cmd: flexBench