Commit 32c1b70e authored by tomas@whalegate.ndb.mysql.com's avatar tomas@whalegate.ndb.mysql.com
Browse files

bug#28717, make sure only master updates activeStatus

  so that othernodes dont get confused after having recevied status from master
  and then tries to update it self
parent bb1fa5e3
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
Next QMGR 1
Next NDBCNTR 1000
Next NDBCNTR 1002
Next NDBFS 2000
Next DBACC 3002
Next DBTUP 4014
@@ -487,3 +487,8 @@ Dbdict:
6003 Crash in participant @ CreateTabReq::Prepare
6004 Crash in participant @ CreateTabReq::Commit
6005 Crash in participant @ CreateTabReq::CreateDrop

Ndbcntr:
--------

1001: Delay sending NODE_FAILREP (to own node), until error is cleared
+7 −1
Original line number Diff line number Diff line
@@ -4448,12 +4448,18 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
  jam();
  const Uint32 nodeId = failedNodePtr.i;

  if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){
  if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i))
  {
    /*----------------------------------------------------*/
    /*  THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE   */
    /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT     */
    /* THE NODE HAVE MISSED A LOCAL CHECKPOINT.           */
    /*----------------------------------------------------*/

    /**
     * Bug#28717, Only master should do this, as this status is copied
     *   to other nodes
     */
    switch (failedNodePtr.p->activeStatus) {
    case Sysfile::NS_Active:
      jam();
+7 −0
Original line number Diff line number Diff line
@@ -1375,6 +1375,13 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
{
  jamEntry();

  if (ERROR_INSERTED(1001))
  {
    sendSignalWithDelay(reference(), GSN_NODE_FAILREP, signal, 100, 
                        signal->getLength());
    return;
  }
  
  const NodeFailRep * nodeFail = (NodeFailRep *)&signal->theData[0];
  NdbNodeBitmask allFailed; 
  allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes);
+81 −0
Original line number Diff line number Diff line
@@ -1045,6 +1045,84 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
}


int
runBug28717(NDBT_Context* ctx, NDBT_Step* step)
{
  int result = NDBT_OK;
  int loops = ctx->getNumLoops();
  int records = ctx->getNumRecords();
  Ndb* pNdb = GETNDB(step);
  NdbRestarter res;

  if (res.getNumDbNodes() < 4)
  {
    return NDBT_OK;
  }

  int master = res.getMasterNodeId();
  int node0 = res.getRandomNodeOtherNodeGroup(master, rand());
  int node1 = res.getRandomNodeSameNodeGroup(node0, rand());
  
  ndbout_c("master: %d node0: %d node1: %d", master, node0, node1);
  
  if (res.restartOneDbNode(node0, false, true, true))
  {
    return NDBT_FAILED;
  }

  {
    int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
    NdbLogEventHandle handle = 
      ndb_mgm_create_logevent_handle(res.handle, filter);
    

    int dump[] = { DumpStateOrd::DihStartLcpImmediately };
    struct ndb_logevent event;
    
    for (Uint32 i = 0; i<3; i++)
    {
      res.dumpStateOneNode(master, dump, 1);
      while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
            event.type != NDB_LE_LocalCheckpointStarted);
      while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
            event.type != NDB_LE_LocalCheckpointCompleted);
    } 
  }
  
  if (res.waitNodesNoStart(&node0, 1))
    return NDBT_FAILED;
  
  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
  
  if (res.dumpStateOneNode(node0, val2, 2))
    return NDBT_FAILED;
  
  if (res.insertErrorInNode(node0, 5010))
    return NDBT_FAILED;
  
  if (res.insertErrorInNode(node1, 1001))
    return NDBT_FAILED;
  
  if (res.startNodes(&node0, 1))
    return NDBT_FAILED;
  
  NdbSleep_SecSleep(3);

  if (res.insertErrorInNode(node1, 0))
    return NDBT_FAILED;

  if (res.waitNodesNoStart(&node0, 1))
    return NDBT_FAILED;

  if (res.startNodes(&node0, 1))
    return NDBT_FAILED;

  if (res.waitClusterStarted())
    return NDBT_FAILED;
  
  return NDBT_OK;
}

NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", 
	 "Test that one node at a time can be stopped and then restarted "\
@@ -1366,6 +1444,9 @@ TESTCASE("Bug25364", ""){
TESTCASE("Bug25554", ""){
  INITIALIZER(runBug25554);
}
TESTCASE("Bug28717", ""){
  INITIALIZER(runBug28717);
}
NDBT_TESTSUITE_END(testNodeRestart);

int main(int argc, const char** argv){
+4 −0
Original line number Diff line number Diff line
@@ -492,6 +492,10 @@ max-time: 1500
cmd: testDict
args: -n CreateAndDrop 

max-time: 1000
cmd: testNodeRestart
args: -n Bug28717 T1

max-time: 1500
cmd: testDict
args: -n CreateAndDropAtRandom -l 200 T1