Commit 2609e057 authored by jonas@perch.ndb.mysql.com's avatar jonas@perch.ndb.mysql.com
Browse files

ndb - bug#29057 bug#29067 and bug#29118

  Fix 3 problems in REDO validation that could(will) lead to "Error while reading REDO log"
parent 92ad59d4
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -1617,7 +1617,8 @@ public:
      ACTIVE_WRITE_LOG = 17,             ///< A write operation during 
                                        ///< writing of log
      READ_SR_INVALIDATE_PAGES = 18,
      WRITE_SR_INVALIDATE_PAGES = 19
      WRITE_SR_INVALIDATE_PAGES = 19,
      WRITE_SR_INVALIDATE_PAGES_UPDATE_PAGE0 = 20
    };
    /**
     * We have to remember the log pages read. 
@@ -2437,7 +2438,7 @@ private:
  void errorReport(Signal* signal, int place);
  void warningReport(Signal* signal, int place);
  void invalidateLogAfterLastGCI(Signal *signal);
  void readFileInInvalidate(Signal *signal);
  void readFileInInvalidate(Signal *signal, bool stepNext);
  void exitFromInvalidate(Signal* signal);
  Uint32 calcPageCheckSum(LogPageRecordPtr logP);

+262 −83
Original line number Diff line number Diff line
@@ -110,6 +110,10 @@ operator<<(NdbOut& out, Dblqh::ScanRecord::ScanType state){
//#define MARKER_TRACE 1
//#define TRACE_SCAN_TAKEOVER 1
#ifndef DEBUG_REDO
#define DEBUG_REDO 0
#endif
const Uint32 NR_ScanNo = 0;
void Dblqh::execACC_COM_BLOCK(Signal* signal)
@@ -11987,9 +11991,6 @@ void Dblqh::execFSCLOSECONF(Signal* signal)
  case LogFileRecord::CLOSE_SR_INVALIDATE_PAGES:
    jam();
    logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
    // Set the prev file to check if we shall close it.
    logFilePtr.i = logFilePtr.p->prevLogFile;
    ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
    logPartPtr.i = logFilePtr.p->logPartRec;
    ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
@@ -12039,7 +12040,7 @@ void Dblqh::execFSOPENCONF(Signal* signal)
  case LogFileRecord::OPEN_SR_INVALIDATE_PAGES:
    jam();
    logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
    readFileInInvalidate(signal);
    readFileInInvalidate(signal, false);
    return;
  case LogFileRecord::OPENING_INIT:
    jam();
@@ -12219,6 +12220,7 @@ void Dblqh::execFSWRITECONF(Signal* signal)
  case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES:
    jam();
    invalidateLogAfterLastGCI(signal);
    CRASH_INSERTION(5047);
    return;
  case LogFileOperationRecord::WRITE_PAGE_ZERO:
    jam();
@@ -12256,6 +12258,14 @@ void Dblqh::execFSWRITECONF(Signal* signal)
    jam();
    firstPageWriteLab(signal);
    return;
  case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES_UPDATE_PAGE0:
    jam();
    // We are done...send completed signal and exit this phase.
    releaseLfo(signal);
    signal->theData[0] = ZSR_FOURTH_COMP;
    signal->theData[1] = logPartPtr.i;
    sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
    return;
  default:
    jam();
    systemErrorLab(signal, __LINE__);
@@ -13326,6 +13336,12 @@ void Dblqh::writeSinglePage(Signal* signal, Uint32 pageNo,
  signal->theData[6] = logPagePtr.i;
  signal->theData[7] = pageNo;
  sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA);
  if (DEBUG_REDO)
    ndbout_c("writeSingle 1 page at part: %u file: %u pos: %u",
	     logPartPtr.i,
	     logFilePtr.p->fileNo,
	     pageNo);
}//Dblqh::writeSinglePage()
/* ##########################################################################
@@ -13391,6 +13407,12 @@ void Dblqh::openSrLastFileLab(Signal* signal)
void Dblqh::readSrLastFileLab(Signal* signal) 
{
  logPartPtr.p->logLap = logPagePtr.p->logPageWord[ZPOS_LOG_LAP];
  if (DEBUG_REDO)
    ndbout_c("readSrLastFileLab part: %u logExecState: %u logPartState: %u logLap: %u",
             logPartPtr.i,
 	     logPartPtr.p->logExecState,
 	     logPartPtr.p->logPartState,
 	     logPartPtr.p->logLap);
  if (logPartPtr.p->noLogFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) {
    jam();
    initGciInLogFileRec(signal, ZMAX_LOG_FILES_IN_PAGE_ZERO);
@@ -14608,6 +14630,20 @@ void Dblqh::srLogLimits(Signal* signal)
      break;
    }//if
  }//while
  if (DEBUG_REDO)
  {
    LogFileRecordPtr tmp;
    tmp.i = logPartPtr.p->stopLogfile;
    ptrCheckGuard(tmp, clogFileFileSize, logFileRecord);
    ndbout_c("srLogLimits part: %u start file: %u mb: %u stop file: %u mb: %u",
             logPartPtr.i,
             tlastPrepRef >> 16,
             tlastPrepRef & 65535,
             tmp.p->fileNo,
             logPartPtr.p->stopMbyte);           
  }
  /* ------------------------------------------------------------------------
   *  WE HAVE NOW FOUND BOTH THE START AND THE STOP OF THE LOG. NOW START
   *  EXECUTING THE LOG. THE FIRST ACTION IS TO OPEN THE LOG FILE WHERE TO
@@ -15034,6 +15070,12 @@ void Dblqh::execSr(Signal* signal)
    case ZCOMPLETED_GCI_TYPE:
      jam();
      logWord = readLogword(signal);
      if (DEBUG_REDO)
	ndbout_c("found gci: %u part: %u file: %u page: %u",
		 logWord,
		 logPartPtr.i,
		 logFilePtr.p->fileNo,
		 logFilePtr.p->currentFilepage);
      if (logWord == logPartPtr.p->logLastGci) {
        jam();
/*---------------------------------------------------------------------------*/
@@ -15050,6 +15092,10 @@ void Dblqh::execSr(Signal* signal)
          logPartPtr.p->headPageNo = logFilePtr.p->currentFilepage;
          logPartPtr.p->headPageIndex = 
                  logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
	  logPartPtr.p->logLap = logPagePtr.p->logPageWord[ZPOS_LOG_LAP];
	  if (DEBUG_REDO)
	    ndbout_c("execSr part: %u logLap: %u",
		     logPartPtr.i, logPartPtr.p->logLap);
        }//if
/*---------------------------------------------------------------------------*/
/* THERE IS NO NEED OF EXECUTING PAST THIS LINE SINCE THERE WILL ONLY BE LOG */
@@ -15212,67 +15258,140 @@ void Dblqh::invalidateLogAfterLastGCI(Signal* signal) {
  }
  switch (lfoPtr.p->lfoState) {
  case LogFileOperationRecord::READ_SR_INVALIDATE_PAGES:
    jam();
    // Check if this page must be invalidated.
    // If the log lap number on a page after the head of the tail is the same 
    // as the actual log lap number we must invalidate this page. Otherwise it
    // could be impossible to find the end of the log in a later system/node 
    // restart.
    if (logPagePtr.p->logPageWord[ZPOS_LOG_LAP] == logPartPtr.p->logLap) 
    {
      // This page must be invalidated.
      // We search for end
      // read next
      releaseLfo(signal);
      releaseLogpage(signal); 
      readFileInInvalidate(signal, true);
      lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_INVALIDATE_PAGES;
      return;
    }
    /**
     * We found the "last" page to invalidate...
     *   Invalidate backwards until head...
     */
    // Fall through...
  case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES:
    jam();
    releaseLfo(signal);
    releaseLogpage(signal); 
    if (logPartPtr.p->invalidatePageNo < (ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE - 1)) {
      // We continue in this file.
      logPartPtr.p->invalidatePageNo++;
    } else {
      // We continue in the next file.
      logFilePtr.i = logFilePtr.p->nextLogFile;
      ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
      logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo;
      // Page 0 is used for file descriptors.
      logPartPtr.p->invalidatePageNo = 1; 
      if (logFilePtr.p->logFileStatus != LogFileRecord::OPEN) {
    // Step backwards...
    logPartPtr.p->invalidatePageNo--;
    if (logPartPtr.p->invalidatePageNo == 0)
    {
      jam();
	logFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_INVALIDATE_PAGES;
	openFileRw(signal, logFilePtr);
	return;
	break;
      if (logFilePtr.p->fileNo == 0)
      {
	/**
	 * We're wrapping in the log...
	 *   update logLap
	 */
	logPartPtr.p->logLap--;
	ndbrequire(logPartPtr.p->logLap); // Should always be > 0
	if (DEBUG_REDO)
	  ndbout_c("invalidateLogAfterLastGCI part: %u wrap from file 0 -> logLap: %u",
		   logPartPtr.i, logPartPtr.p->logLap);
      }
      
      /**
       * Move to prev file
       */
      logFilePtr.i = logFilePtr.p->prevLogFile;
      ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
      logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo;
      logPartPtr.p->invalidatePageNo = ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE - 1;
    }
    // Read a page from the log file. 
    readFileInInvalidate(signal);
    
    if (logPartPtr.p->invalidateFileNo == logPartPtr.p->headFileNo &&
	logPartPtr.p->invalidatePageNo == logPartPtr.p->headPageNo)
    {
      /**
       * Done...
       */
      logFilePtr.i = logPartPtr.p->currentLogfile;
      ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
      
      logFilePtr.i = logFilePtr.p->nextLogFile;
      ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
      // Close files if necessary. Current file and the next file should be 
      // left open.
      exitFromInvalidate(signal);
      return;
    break;
    }
  case LogFileOperationRecord::READ_SR_INVALIDATE_PAGES:
    jam();
    releaseLfo(signal);
    // Check if this page must be invalidated.
    // If the log lap number on a page after the head of the tail is the same 
    // as the actual log lap number we must invalidate this page. Otherwise it
    // could be impossible to find the end of the log in a later system/node 
    // restart.
    if (logPagePtr.p->logPageWord[ZPOS_LOG_LAP] == logPartPtr.p->logLap) {
      // This page must be invalidated.
      logPagePtr.p->logPageWord[ZPOS_LOG_LAP] = 0;
      // Contact NDBFS. Real time break.
    seizeLogpage(signal);
    /**
     * Make page really empty
     */
    bzero(logPagePtr.p, sizeof(LogPageRecord));
    writeSinglePage(signal, logPartPtr.p->invalidatePageNo,
		    ZPAGE_SIZE - 1, __LINE__);
    lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES;
    } else {
      // We are done with invalidating. Finish start phase 3.4. 
      exitFromInvalidate(signal);
    }
    return;
    break;
  default:
    jam();
    systemError(signal, __LINE__);
    return;
    break;
  }
  return;
}//Dblqh::invalidateLogAfterLastGCI
void Dblqh::readFileInInvalidate(Signal* signal) {
void Dblqh::readFileInInvalidate(Signal* signal, bool stepNext) 
{
  jam();
  if (stepNext)
  {
    logPartPtr.p->invalidatePageNo++;
    if (logPartPtr.p->invalidatePageNo == (ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE)) 
    {
      // We continue in the next file.
      logFilePtr.i = logFilePtr.p->nextLogFile;
      ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
      logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo;
      // Page 0 is used for file descriptors.
      logPartPtr.p->invalidatePageNo = 1; 
      if (logFilePtr.p->fileNo == 0)
      {
	/**
	 * We're wrapping in the log...
	 *   update logLap
	 */
	logPartPtr.p->logLap++;
	if (DEBUG_REDO)
	  ndbout_c("readFileInInvalidate part: %u wrap to file 0 -> logLap: %u",
		   logPartPtr.i, logPartPtr.p->logLap);
      }
      if (logFilePtr.p->logFileStatus != LogFileRecord::OPEN) 
      {
	jam();
	logFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_INVALIDATE_PAGES;
	openFileRw(signal, logFilePtr);
	return;
      }
    }
  }
  
  // Contact NDBFS. Real time break.
  readSinglePage(signal, logPartPtr.p->invalidatePageNo); 
  lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_INVALIDATE_PAGES;
@@ -15280,35 +15399,58 @@ void Dblqh::readFileInInvalidate(Signal* signal) {
void Dblqh::exitFromInvalidate(Signal* signal) {
  jam();
  // Close files if necessary. Current file and the next file should be 
  // left open.
  if (logFilePtr.i != logPartPtr.p->currentLogfile) {
    LogFileRecordPtr currentLogFilePtr;
    LogFileRecordPtr nextAfterCurrentLogFilePtr;
    currentLogFilePtr.i = logPartPtr.p->currentLogfile;
    ptrCheckGuard(currentLogFilePtr, clogFileFileSize, logFileRecord);
loop:  
  logFilePtr.i = logFilePtr.p->nextLogFile;
  ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
    nextAfterCurrentLogFilePtr.i = currentLogFilePtr.p->nextLogFile;
  if (logFilePtr.i == logPartPtr.p->currentLogfile)
  {
    jam();
    goto done;
  }
  
  if (logFilePtr.p->fileNo == 0)
  {
    jam();
    /**
     * Logfile 0 shoult *not* be closed
     */
    goto loop;
  }
  
  if (logFilePtr.p->logFileStatus == LogFileRecord::CLOSED)
  {
    jam();
    goto done;
  }
    if (logFilePtr.i != nextAfterCurrentLogFilePtr.i) {
      // This file should be closed.
  jam();
  ndbrequire(logFilePtr.p->logFileStatus == LogFileRecord::OPEN);
  logFilePtr.p->logFileStatus = LogFileRecord::CLOSE_SR_INVALIDATE_PAGES;
  closeFile(signal, logFilePtr);
      // Return from this function and wait for close confirm. Then come back 
      // and test the previous file for closing.
  return;
    }
  }    
  // We are done with closing files, send completed signal and exit this phase.
  signal->theData[0] = ZSR_FOURTH_COMP;
  signal->theData[1] = logPartPtr.i;
  sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
done:
  if (DEBUG_REDO)
    ndbout_c("exitFromInvalidate part: %u head file: %u page: %u", 
	     logPartPtr.i,
	     logPartPtr.p->headFileNo,
	     logPartPtr.p->headPageNo);
  
  logFilePtr.i = logPartPtr.p->firstLogfile;
  ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
  logPagePtr.i = logFilePtr.p->logPageZero;
  ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
  logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_FILE_NO] = 
    logPartPtr.p->headFileNo;
  writeSinglePage(signal, 0, ZPAGE_SIZE - 1, __LINE__);
  lfoPtr.p->logFileRec = logFilePtr.i;
  lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES_UPDATE_PAGE0;
  return;
}
/*---------------------------------------------------------------------------*/
/* THE EXECUTION OF A LOG RECORD IS COMPLETED. RELEASE PAGES IF THEY WERE    */
/* READ FROM DISK FOR THIS PARTICULAR OPERATION.                             */
@@ -15694,20 +15836,10 @@ void Dblqh::readSrFourthZeroLab(Signal* signal)
  ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
  logPartPtr.p->invalidateFileNo = logPartPtr.p->headFileNo;
  logPartPtr.p->invalidatePageNo = logPartPtr.p->headPageNo;
   
  logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG_INVALIDATE;
  seizeLfo(signal);
  initLfo(signal);
  // The state here is a little confusing, but simulates that we return
  // to invalidateLogAfterLastGCI() from an invalidate write and are ready
  // to read a page from file. 
  lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES;
   
  /**
   * Make sure we dont release zero page
   */
  seizeLogpage(signal);
  invalidateLogAfterLastGCI(signal);
  readFileInInvalidate(signal, true);
  lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_INVALIDATE_PAGES;
  return;
}//Dblqh::readSrFourthZeroLab()
@@ -16279,6 +16411,14 @@ void Dblqh::completedLogPage(Signal* signal, Uint32 clpType, Uint32 place)
  signal->theData[4] = ZVAR_NO_LOG_PAGE_WORD;
  signal->theData[5] = twlpNoPages;
  sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 15, JBA);
  if (DEBUG_REDO)
    ndbout_c("writing %d pages at part: %u file: %u pos: %u",
	     twlpNoPages,
	     logPartPtr.i,
	     logFilePtr.p->fileNo,
	     logFilePtr.p->filePosition);
  if (twlpType == ZNORMAL) {
    jam();
    lfoPtr.p->lfoState = LogFileOperationRecord::ACTIVE_WRITE_LOG;
@@ -17518,6 +17658,14 @@ void Dblqh::readExecLog(Signal* signal)
  signal->theData[14] = lfoPtr.p->logPageArray[8];
  signal->theData[15] = lfoPtr.p->logPageArray[9];
  sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 16, JBA);
  if (DEBUG_REDO)
    ndbout_c("readExecLog %u page at part: %u file: %u pos: %u",
	     lfoPtr.p->noPagesRw,
	     logPartPtr.i,
	     logFilePtr.p->fileNo,
	     logPartPtr.p->execSrStartPageNo);
}//Dblqh::readExecLog()
/* ------------------------------------------------------------------------- */
@@ -17580,6 +17728,14 @@ void Dblqh::readExecSr(Signal* signal)
  signal->theData[13] = lfoPtr.p->logPageArray[7];
  signal->theData[14] = tresPageid;
  sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 15, JBA);
  if (DEBUG_REDO)
    ndbout_c("readExecSr %u page at part: %u file: %u pos: %u",
	     8,
	     logPartPtr.i,
	     logFilePtr.p->fileNo,
	     tresPageid);
}//Dblqh::readExecSr()
/* ------------------------------------------------------------------------- */
@@ -17729,6 +17885,13 @@ void Dblqh::readSinglePage(Signal* signal, Uint32 pageNo)
  signal->theData[6] = logPagePtr.i;
  signal->theData[7] = pageNo;
  sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 8, JBA);
  if (DEBUG_REDO)
    ndbout_c("readSinglePage 1 page at part: %u file: %u pos: %u",
	     logPartPtr.i,
	     logFilePtr.p->fileNo,
	     pageNo);
}//Dblqh::readSinglePage()
/* --------------------------------------------------------------------------
@@ -18311,8 +18474,17 @@ void Dblqh::writeCompletedGciLog(Signal* signal)
    jam();
    changeMbyte(signal);
  }//if
  logFilePtr.p->remainingWordsInMbyte = 
    logFilePtr.p->remainingWordsInMbyte - ZCOMPLETED_GCI_LOG_SIZE;
  if (DEBUG_REDO)
    ndbout_c("writeCompletedGciLog gci: %u part: %u file: %u page: %u",
	     cnewestCompletedGci,
	     logPartPtr.i,
	     logFilePtr.p->fileNo,
	     logFilePtr.p->currentFilepage);
  writeLogWord(signal, ZCOMPLETED_GCI_TYPE);
  writeLogWord(signal, cnewestCompletedGci);
  logPartPtr.p->logPartNewestCompletedGCI = cnewestCompletedGci;
@@ -18349,6 +18521,13 @@ void Dblqh::writeDirty(Signal* signal, Uint32 place)
  signal->theData[6] = logPagePtr.i;
  signal->theData[7] = logPartPtr.p->prevFilepage;
  sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA);
  if (DEBUG_REDO)
    ndbout_c("writeDirty 1 page at part: %u file: %u pos: %u",
	     logPartPtr.i,
	     logFilePtr.p->fileNo,
	     logPartPtr.p->prevFilepage);
}//Dblqh::writeDirty()
/* --------------------------------------------------------------------------