Commit 19340f22 authored by unknown's avatar unknown
Browse files

ndb - bug#18414

  Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding


ndb/src/kernel/blocks/ERROR_codes.txt:
  New error code
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  remove dumping of LCP info during NF
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
  Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding
ndb/test/ndbapi/testNodeRestart.cpp:
  Add testcase for bug18414
ndb/test/ndbapi/testTimeout.cpp:
  Fix error code checking
ndb/test/run-test/daily-basic-tests.txt:
  Add testcase for bug18414
parent 058019f6
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -226,6 +226,8 @@ Delay execution of COMPLETECONF signal 2 seconds to generate time-out.
8045: (ABORTCONF only as part of take-over)
Delay execution of ABORTCONF signal 2 seconds to generate time-out.

8050: Send ZABORT_TIMEOUT_BREAK delayed

ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------

+0 −4
Original line number Diff line number Diff line
@@ -5982,10 +5982,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
  signal->theData[0] = 7012;
  execDUMP_STATE_ORD(signal);

  signal->theData[0] = 7015;
  signal->theData[1] = 0;
  execDUMP_STATE_ORD(signal);

  c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);

  checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
+44 −8
Original line number Diff line number Diff line
@@ -6386,6 +6386,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
    return;
  }
  
  bool found = false;
  OperationState tmp[16];
  
  Uint32 TloopCount = 0;
@@ -6393,7 +6394,31 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
    jam();
    if (tcConnectptr.i == RNIL) {
      jam();
      if (Tcheck == 0) {

#ifdef VM_TRACE
      ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d",
	       found, Tcheck, apiConnectptr.p->counter);
#endif
      if (found || apiConnectptr.p->counter)
      {
	jam();
	/**
	 * We sent atleast one ABORT/ABORTED
	 *   or ZABORT_TIMEOUT_BREAK is in job buffer
	 *   wait for reception...
	 */
	return;
      }
      
      if (Tcheck == 1)
      {
	jam();
	releaseAbortResources(signal);
	return;
      }
      
      if (Tcheck == 0)
      {
        jam();
	/*------------------------------------------------------------------
	 * All nodes had already reported ABORTED for all tcConnect records.
@@ -6402,9 +6427,11 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
	 *------------------------------------------------------------------*/
	char buf[96]; buf[0] = 0;
	char buf2[96];
	BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:",
		 __LINE__, apiConnectptr.i);
	for(Uint32 i = 0; i<TloopCount; i++){
	BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:",
			     __LINE__, apiConnectptr.i,
			     apiConnectptr.p->counter);
	for(Uint32 i = 0; i<TloopCount; i++)
	{
	  BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]);
	  BaseString::snprintf(buf, sizeof(buf), buf2);
	}
@@ -6412,7 +6439,9 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
	ndbout_c(buf);
	ndbrequire(false);
	releaseAbortResources(signal);
	return;
      }
      
      return;
    }//if
    TloopCount++;
@@ -6427,7 +6456,16 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
      signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK;
      signal->theData[1] = tcConnectptr.i;
      signal->theData[2] = apiConnectptr.i;      
      if (ERROR_INSERTED(8050))
      {
	ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)", 
		 Tcheck, apiConnectptr.p->counter);
	sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3);
      }
      else
      {
	sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
      }
      return;
    }//if
    ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
@@ -6450,7 +6488,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
        jam();
        if (tcConnectptr.p->tcNodedata[Ti] != 0) {
          TloopCount += 31;
          Tcheck = 1;
	  found = true;
          hostptr.i = tcConnectptr.p->tcNodedata[Ti];
          ptrCheckGuard(hostptr, chostFilesize, hostRecord);
          if (hostptr.p->hostStatus == HS_ALIVE) {
@@ -7007,8 +7045,6 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal)
  hostptr.i = tfailedNodeId;
  ptrCheckGuard(hostptr, chostFilesize, hostRecord);

  ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)",
	   tfailedNodeId, signal->getSendersBlockRef(), reference());
  if (signal->getSendersBlockRef() != reference())
  {
    jam();
+73 −0
Original line number Diff line number Diff line
@@ -581,6 +581,73 @@ runBug16772(NDBT_Context* ctx, NDBT_Step* step){
  return ret ? NDBT_OK : NDBT_FAILED;
}

int 
runBug18414(NDBT_Context* ctx, NDBT_Step* step){

  NdbRestarter restarter;
  if (restarter.getNumDbNodes() < 2)
  {
    ctx->stopTest();
    return NDBT_OK;
  }

  Ndb* pNdb = GETNDB(step);
  HugoOperations hugoOps(*ctx->getTab());
  HugoTransactions hugoTrans(*ctx->getTab());
  int loop = 0;
  do 
  {
    if(hugoOps.startTransaction(pNdb) != 0)
      goto err;
    
    if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0)
      goto err;
    
    if(hugoOps.execute_NoCommit(pNdb) != 0)
      goto err;

    int node1 = hugoOps.getTransaction()->getConnectedNodeId();
    int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
    
    if (node1 == -1 || node2 == -1)
      break;
    
    if (loop & 1)
    {
      if (restarter.insertErrorInNode(node1, 8050))
	goto err;
    }
    
    if (restarter.insertErrorInNode(node2, 5003))
      goto err;
    
    int res= hugoOps.execute_Rollback(pNdb);
  
    if (restarter.waitNodesNoStart(&node2, 1) != 0)
      goto err;
    
    if (restarter.insertErrorInAllNodes(0))
      goto err;
    
    if (restarter.startNodes(&node2, 1) != 0)
      goto err;
    
    if (restarter.waitClusterStarted() != 0)
      goto err;
    
    if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0)
      goto err;

    hugoOps.closeTransaction(pNdb);
    
  } while(++loop < 5);
  
  return NDBT_OK;
  
err:
  hugoOps.closeTransaction(pNdb);
  return NDBT_FAILED;    
}

NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", 
@@ -870,6 +937,12 @@ TESTCASE("Bug16772",
	 "Test bug with restarting before NF handling is complete"){
  STEP(runBug16772);
}
TESTCASE("Bug18414",
	 "Test bug with NF during NR"){
  INITIALIZER(runLoadTable);
  STEP(runBug18414);
  FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);

int main(int argc, const char** argv){
+5 −2
Original line number Diff line number Diff line
@@ -173,7 +173,10 @@ int runTimeoutTrans(NDBT_Context* ctx, NDBT_Step* step){
      NdbSleep_MilliSleep(sleep);
      
      // Expect that transaction has timed-out
      CHECK(hugoOps.execute_Commit(pNdb) == 237); 
      int ret = hugoOps.execute_Commit(pNdb);
      CHECK(ret != 0);
      NdbError err = pNdb->getNdbError(ret);
      CHECK(err.classification == NdbError::TimeoutExpired);
      
    } while(false);

Loading