Commit acdd08f0 authored by unknown's avatar unknown
Browse files

fixed startup suma bug when ndb api connects during node restart

parent 68c1d22b
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -43,10 +43,11 @@ public:
  enum ErrorCode {
    GCPStopDetected = 3,
    CopyFragRefError = 5,
    TestStopOnError = 6
    TestStopOnError = 6,
    CopySubscriptionRef = 7,
    CopySubscriberRef = 8
  };
  
private:
  Uint32 errorRef;
  Uint32 errorCode;
  Uint32 data1;
+14 −0
Original line number Diff line number Diff line
@@ -203,6 +203,20 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* signal)
	     killingNode, data1);
    break;

  case SystemError::CopySubscriptionRef:
    BaseString::snprintf(buf, sizeof(buf), 
	     "Node %d killed this node because "
	     "it could not copy a subscription during node restart. "
	     "Copy subscription error code: %u.",
	     killingNode, data1);
    break;
  case SystemError::CopySubscriberRef:
    BaseString::snprintf(buf, sizeof(buf), 
	     "Node %d killed this node because "
	     "it could not start a subscriber during node restart. "
	     "Copy subscription error code: %u.",
	     killingNode, data1);
    break;
  default:
    BaseString::snprintf(buf, sizeof(buf), "System error %d, "
	     " this node was killed by node %d", 
+54 −3
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@
#include <signaldata/DropTab.hpp>
#include <signaldata/AlterTab.hpp>
#include <signaldata/DihFragCount.hpp>
#include <signaldata/SystemError.hpp>

#include <ndbapi/NdbDictionary.hpp>

@@ -690,7 +691,7 @@ Suma::execNODE_FAILREP(Signal* signal){
  
  if(failed.get(Restart.nodeId))
  {
    Restart.nodeId = 0;
    Restart.resetRestart(signal);
  }

  signal->theData[0] = SumaContinueB::RESEND_BUCKET;
@@ -3756,7 +3757,33 @@ Suma::execSUB_CREATE_REF(Signal* signal) {
  jamEntry();
  DBUG_ENTER("Suma::execSUB_CREATE_REF");
  ndbassert(signal->getNoOfSections() == 0);
  ndbrequire(false);
  SubCreateRef *const ref= (SubCreateRef *)signal->getDataPtr();
  Uint32 error= ref->errorCode;
  if (error != 1415)
  {
    /*
     * This will happen if an api node connects during while other node
     * is restarting, and in this case the subscription will already
     * have been created.
     * ToDo: more complete handling of api nodes joining during
     * node restart
     */
    Uint32 senderRef = signal->getSendersBlockRef();
    BlockReference cntrRef = calcNdbCntrBlockRef(refToNode(senderRef));
    // for some reason we did not manage to create a subscription
    // on the starting node
    SystemError * const sysErr = (SystemError*)&signal->theData[0];
    sysErr->errorCode = SystemError::CopySubscriptionRef;
    sysErr->errorRef = reference();
    sysErr->data1 = error;
    sysErr->data2 = 0;
    sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,
               SystemError::SignalLength, JBB);
    Restart.resetRestart(signal);
    DBUG_VOID_RETURN;
  }
  // SubCreateConf has same signaldata as SubCreateRef
  Restart.runSUB_CREATE_CONF(signal);
  DBUG_VOID_RETURN;
}

@@ -3785,7 +3812,22 @@ Suma::execSUB_START_REF(Signal* signal) {
  jamEntry();
  DBUG_ENTER("Suma::execSUB_START_REF");
  ndbassert(signal->getNoOfSections() == 0);
  ndbrequire(false);
  SubStartRef *const ref= (SubStartRef *)signal->getDataPtr();
  Uint32 error= ref->errorCode;
  {
    Uint32 senderRef = signal->getSendersBlockRef();
    BlockReference cntrRef = calcNdbCntrBlockRef(refToNode(senderRef));
    // for some reason we did not manage to start a subscriber
    // on the starting node
    SystemError * const sysErr = (SystemError*)&signal->theData[0];
    sysErr->errorCode = SystemError::CopySubscriberRef;
    sysErr->errorRef = reference();
    sysErr->data1 = error;
    sysErr->data2 = 0;
    sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,
               SystemError::SignalLength, JBB);
    Restart.resetRestart(signal);
  }
  DBUG_VOID_RETURN;
}

@@ -4080,6 +4122,15 @@ Suma::Restart::completeRestartingNode(Signal* signal, Uint32 sumaRef)
  //SumaStartMeConf *conf= (SumaStartMeConf*)signal->getDataPtrSend();
  suma.sendSignal(sumaRef, GSN_SUMA_START_ME_CONF, signal,
		  SumaStartMeConf::SignalLength, JBB);
  resetRestart(signal);
  DBUG_VOID_RETURN;
}

void
Suma::Restart::resetRestart(Signal* signal)
{
  jam();
  DBUG_ENTER("Suma::Restart::resetRestart");
  nodeId = 0;
  DBUG_VOID_RETURN;
}
+1 −0
Original line number Diff line number Diff line
@@ -489,6 +489,7 @@ public:
    void completeSubscriber(Signal* signal, Uint32 sumaRef);

    void completeRestartingNode(Signal* signal, Uint32 sumaRef);
    void resetRestart(Signal* signal);
  } Restart;

private: