Commit 354fa102 authored by unknown's avatar unknown
Browse files

Bug #12992 Cluster StopOnError = Y restarts ndbd indefinitly

parent 4345853e
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -2493,6 +2493,14 @@ void Ndbcntr::Missra::sendNextSTTOR(Signal* signal){
    
    const Uint32 start = currentBlockIndex;
    
    if (currentStartPhase == ZSTART_PHASE_6)
    {
      // Ndbd has passed the critical startphases.
      // Change error handler from "startup" state
      // to normal state.
      ErrorReporter::setErrorHandlerShutdownType();
    }

    for(; currentBlockIndex < ALL_BLOCKS_SZ; currentBlockIndex++){
      jam();
      if(ALL_BLOCKS[currentBlockIndex].NextSP == currentStartPhase){
+12 −2
Original line number Diff line number Diff line
@@ -152,6 +152,14 @@ ErrorReporter::formatMessage(ErrorCategory type,
  return;
}

NdbShutdownType ErrorReporter::s_errorHandlerShutdownType = NST_ErrorHandler;

void
ErrorReporter::setErrorHandlerShutdownType(NdbShutdownType nst)
{
  s_errorHandlerShutdownType = nst;
}

void
ErrorReporter::handleAssert(const char* message, const char* file, int line)
{
@@ -170,7 +178,7 @@ ErrorReporter::handleAssert(const char* message, const char* file, int line)
  WriteMessage(assert, ERR_ERROR_PRGERR, message, refMessage,
	       theEmulatedJamIndex, theEmulatedJam);

  NdbShutdown(NST_ErrorHandler);
  NdbShutdown(s_errorHandlerShutdownType);
}

void
@@ -182,7 +190,7 @@ ErrorReporter::handleThreadAssert(const char* message,
  BaseString::snprintf(refMessage, 100, "file: %s lineNo: %d - %s",
	   file, line, message);
  
  NdbShutdown(NST_ErrorHandler);
  NdbShutdown(s_errorHandlerShutdownType);
}//ErrorReporter::handleThreadAssert()


@@ -201,6 +209,8 @@ ErrorReporter::handleError(ErrorCategory type, int messageID,
  if(messageID == ERR_ERROR_INSERT){
    NdbShutdown(NST_ErrorInsert);
  } else {
    if (nst == NST_ErrorHandler)
      nst = s_errorHandlerShutdownType;
    NdbShutdown(nst);
  }
}
+2 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
class ErrorReporter
{
public:
  static void setErrorHandlerShutdownType(NdbShutdownType nst = NST_ErrorHandler);
  static void handleAssert(const char* message, 
			   const char* file, 
			   int line);
@@ -57,6 +58,7 @@ public:
  static const char* formatTimeStampString();
  
private:
  static enum NdbShutdownType s_errorHandlerShutdownType;
};

#endif
+37 −0
Original line number Diff line number Diff line
@@ -45,8 +45,14 @@ extern NdbMutex * theShutdownMutex;

void catchsigs(bool ignore); // for process signal handling

#define MAX_FAILED_STARTUPS 3
// Flag set by child through SIGUSR1 to signal a failed startup
static bool failed_startup_flag = false;
// Counter for consecutive failed startups
static Uint32 failed_startups = 0;
extern "C" void handler_shutdown(int signum);  // for process signal handling
extern "C" void handler_error(int signum);  // for process signal handling
extern "C" void handler_sigusr1(int signum);  // child signalling failed restart

// Shows system information
void systemInfo(const Configuration & conf,
@@ -92,6 +98,8 @@ int main(int argc, char** argv)
  }
  
#ifndef NDB_WIN32
  signal(SIGUSR1, handler_sigusr1);

  for(pid_t child = fork(); child != 0; child = fork()){
    /**
     * Parent
@@ -137,6 +145,20 @@ int main(int argc, char** argv)
       */
      exit(0);
    }
    if (!failed_startup_flag)
    {
      // Reset the counter for consecutive failed startups
      failed_startups = 0;
    }
    else if (failed_startups >= MAX_FAILED_STARTUPS && !theConfig->stopOnError())
    {
      /**
       * Error shutdown && stopOnError()
       */
      g_eventLogger.alert("Ndbd has failed %u consecutive startups. Not restarting", failed_startups);
      exit(0);
    }
    failed_startup_flag = false;
    g_eventLogger.info("Ndb has terminated (pid %d) restarting", child);
    theConfig->fetch_configuration();
  }
@@ -170,6 +192,9 @@ int main(int argc, char** argv)
  /**
   * Do startup
   */

  ErrorReporter::setErrorHandlerShutdownType(NST_ErrorHandlerStartup);

  switch(globalData.theRestartFlag){
  case initial_state:
    globalEmulatorData.theThreadConfig->doStart(NodeState::SL_CMVMI);
@@ -359,3 +384,15 @@ handler_error(int signum){
  BaseString::snprintf(errorData, 40, "Signal %d received", signum);
  ERROR_SET_SIGNAL(fatal, 0, errorData, __FILE__);
}

extern "C"
void 
handler_sigusr1(int signum)
{
  if (!failed_startup_flag)
  {
    failed_startups++;
    failed_startup_flag = true;
  }
  g_eventLogger.info("Received signal %d. Ndbd failed startup (%u).", signum, failed_startups);
}
+6 −0
Original line number Diff line number Diff line
@@ -154,6 +154,9 @@ NdbShutdown(NdbShutdownType type,
    case NST_ErrorHandlerSignal:
      g_eventLogger.info("Error handler signal %s system", shutting);
      break;
    case NST_ErrorHandlerStartup:
      g_eventLogger.info("Error handler startup %s system", shutting);
      break;
    case NST_Restart:
      g_eventLogger.info("Restarting system");
      break;
@@ -229,6 +232,9 @@ NdbShutdown(NdbShutdownType type,
    }
    
    if(type != NST_Normal && type != NST_Restart){
      // Signal parent that error occured during startup
      if (type == NST_ErrorHandlerStartup)
	kill(getppid(), SIGUSR1);
      g_eventLogger.info("Error handler shutdown completed - %s", exitAbort);
#if ( defined VM_TRACE || defined ERROR_INSERT ) && ( ! ( defined NDB_OSE || defined NDB_SOFTOSE) )
      signal(6, SIG_DFL);
Loading