Commit a182963c authored by unknown's avatar unknown
Browse files

ndb - bug#18781 (5.0) handle rolling upgrade, minor fixes, logging, docs


ndb/src/kernel/blocks/dbdict/DictLock.txt:
  NR signals
ndb/src/kernel/blocks/dbdict/Dbdict.cpp:
  call removeStaleDictLocks at right place, comment why it works
  more checks, better logging
ndb/src/kernel/blocks/dbdict/Dbdict.hpp:
  call removeStaleDictLocks at right place, comment why it works
  more checks, better logging
ndb/include/kernel/signaldata/DictLock.hpp:
  2 more REFs
ndb/include/ndb_version.h.in:
  DICT LOCK appeared in 5.0.23
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  DICT LOCK rolling upgrade from version < 5.0.23
ndb/src/kernel/blocks/ERROR_codes.txt:
  more DICT LOCK related testing
ndb/test/ndbapi/testDict.cpp:
  more DICT LOCK related testing
parent 40f44b48
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -55,7 +55,9 @@ public:
  enum ErrorCode {
    NotMaster = 1,
    InvalidLockType = 2,
    TooManyRequests = 3
    BadUserRef = 3,
    TooLate = 4,
    TooManyRequests = 5
  };
private:
  Uint32 userPtr;
+2 −0
Original line number Diff line number Diff line
@@ -60,5 +60,7 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ];
#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17)
#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18)

#define NDBD_DICT_LOCK_VERSION_5 MAKE_VERSION(5,0,23)

#endif
 
+4 −2
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4013
Next DBLQH 5043
Next DBDICT 6007
Next DBDIH 7175
Next DBDIH 7177
Next DBTC 8037
Next CMVMI 9000
Next BACKUP 10022
@@ -312,7 +312,9 @@ Test Crashes in handling node restarts

7170: Crash when receiving START_PERMREF (InitialStartRequired)

7174: Send one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR)
7174: Crash starting node before sending DICT_LOCK_REQ
7175: Master sends one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR)
7176: Slave NR pretends master does not support DICT lock (rolling upgrade)

DICT:
6000  Crash during NR when receiving DICTSTARTREQ
+85 −16
Original line number Diff line number Diff line
@@ -205,7 +205,7 @@ void Dbdict::execCONTINUEB(Signal* signal)

  case ZDICT_LOCK_POLL:
    jam();
    checkDictLockQueue(signal);
    checkDictLockQueue(signal, true);
    break;

  default :
@@ -2836,7 +2836,6 @@ void Dbdict::execNODE_FAILREP(Signal* signal)
  case BS_NODE_RESTART:
    jam();
    ok = true;
    removeStaleDictLocks(signal, theFailedNodes);
    break;
  }
  ndbrequire(ok);
@@ -2860,6 +2859,15 @@ void Dbdict::execNODE_FAILREP(Signal* signal)
    }//if
  }//for

  /*
   * NODE_FAILREP guarantees that no "in flight" signal from
   * a dead node is accepted, and also that the job buffer contains
   * no such (un-executed) signals.  Therefore no DICT_UNLOCK_ORD
   * from a dead node (leading to master crash) is possible after
   * this clean-up removes the lock record.
   */
  removeStaleDictLocks(signal, theFailedNodes);

}//execNODE_FAILREP()


@@ -12210,7 +12218,7 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask)
const Dbdict::DictLockType*
Dbdict::getDictLockType(Uint32 lockType)
{
  static DictLockType lt[] = {
  static const DictLockType lt[] = {
    { DictLockReq::NodeRestartLock, BS_NODE_RESTART, "NodeRestart" }
  };
  for (int i = 0; i < sizeof(lt)/sizeof(lt[0]); i++) {
@@ -12220,12 +12228,40 @@ Dbdict::getDictLockType(Uint32 lockType)
  return NULL;
}

void
Dbdict::sendDictLockInfoEvent(Uint32 pollCount)
{
  DictLockPtr loopPtr;
  c_dictLockQueue.first(loopPtr);
  unsigned count = 0;

  char queue_buf[100];
  char *p = &queue_buf[0];
  const char *const q = &queue_buf[sizeof(queue_buf)];
  *p = 0;

  while (loopPtr.i != RNIL) {
    jam();
    my_snprintf(p, q-p, "%s%u%s",
                ++count == 1 ? "" : " ",
                (unsigned)refToNode(loopPtr.p->req.userRef),
                loopPtr.p->locked ? "L" : "");
    p += strlen(p);
    c_dictLockQueue.next(loopPtr);
  }

  infoEvent("DICT: lock bs: %d ops: %d poll: %d cnt: %d queue: %s",
      (int)c_blockState,
      c_opRecordPool.getSize() - c_opRecordPool.getNoOfFree(),
      c_dictLockPoll, (int)pollCount, queue_buf);
}

void
Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text)
{
  infoEvent("DICT: %s %u for %s",
      text,
      (unsigned int)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text);
      (unsigned)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text);
}

void
@@ -12234,6 +12270,8 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal)
  jamEntry();
  const DictLockReq* req = (const DictLockReq*)&signal->theData[0];

  // make sure bad request crashes slave, not master (us)

  if (getOwnNodeId() != c_masterNodeId) {
    jam();
    sendDictLockRef(signal, *req, DictLockRef::NotMaster);
@@ -12247,6 +12285,19 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal)
    return;
  }

  if (req->userRef != signal->getSendersBlockRef() ||
      getNodeInfo(refToNode(req->userRef)).m_type != NodeInfo::DB) {
    jam();
    sendDictLockRef(signal, *req, DictLockRef::BadUserRef);
    return;
  }

  if (c_aliveNodes.get(refToNode(req->userRef))) {
    jam();
    sendDictLockRef(signal, *req, DictLockRef::TooLate);
    return;
  }

  DictLockPtr lockPtr;
  if (! c_dictLockQueue.seize(lockPtr)) {
    jam();
@@ -12258,21 +12309,23 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal)
  lockPtr.p->locked = false;
  lockPtr.p->lt = lt;

  checkDictLockQueue(signal);
  checkDictLockQueue(signal, false);

  if (! lockPtr.p->locked)
    sendDictLockInfoEvent(lockPtr, "lock request by node");
}

void
Dbdict::checkDictLockQueue(Signal* signal)
Dbdict::checkDictLockQueue(Signal* signal, bool poll)
{
  Uint32 pollCount = ! poll ? 0 : signal->theData[1];

  DictLockPtr lockPtr;

  do {
    if (! c_dictLockQueue.first(lockPtr)) {
      jam();
      setDictLockPoll(signal, false);
      setDictLockPoll(signal, false, pollCount);
      return;
    }

@@ -12299,7 +12352,7 @@ Dbdict::checkDictLockQueue(Signal* signal)
  // this routine is called again when it is removed for any reason

  bool on = ! lockPtr.p->locked;
  setDictLockPoll(signal, on);
  setDictLockPoll(signal, on, pollCount);
}

void
@@ -12326,7 +12379,7 @@ Dbdict::execDICT_UNLOCK_ORD(Signal* signal)

  c_dictLockQueue.release(lockPtr);

  checkDictLockQueue(signal);
  checkDictLockQueue(signal, false);
}

void
@@ -12359,21 +12412,32 @@ Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode)
// control polling

void
Dbdict::setDictLockPoll(Signal* signal, bool on)
Dbdict::setDictLockPoll(Signal* signal, bool on, Uint32 pollCount)
{
  if (on) {
    jam();
    signal->theData[0] = ZDICT_LOCK_POLL;
    sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1);
    signal->theData[1] = pollCount + 1;
    sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
  }

  if (c_dictLockPoll != on) {
  bool change = (c_dictLockPoll != on);

  if (change) {
    jam();
#ifdef VM_TRACE
    infoEvent("DICT: lock polling %s", on ? "On" : "Off");
#endif
    c_dictLockPoll = on;
  }

  // avoid too many messages if master is stuck busy (BS_NODE_FAILURE)
  bool periodic =
    pollCount < 8 ||
    pollCount < 64 && pollCount % 8 == 0 ||
    pollCount < 512 && pollCount % 64 == 0 ||
    pollCount < 4096 && pollCount % 512 == 0 ||
    pollCount % 4096 == 0; // about every 6 minutes

  if (change || periodic)
    sendDictLockInfoEvent(pollCount);
}

// NF handling
@@ -12384,6 +12448,11 @@ Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes)
  DictLockPtr loopPtr;
  c_dictLockQueue.first(loopPtr);

  if (getOwnNodeId() != c_masterNodeId) {
    ndbrequire(loopPtr.i == RNIL);
    return;
  }

  while (loopPtr.i != RNIL) {
    jam();
    DictLockPtr lockPtr = loopPtr;
@@ -12409,7 +12478,7 @@ Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes)
    }
  }

  checkDictLockQueue(signal);
  checkDictLockQueue(signal, false);
}


+3 −2
Original line number Diff line number Diff line
@@ -1804,14 +1804,15 @@ private:
  bool c_dictLockPoll;

  static const DictLockType* getDictLockType(Uint32 lockType);
  void sendDictLockInfoEvent(Uint32 pollCount);
  void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text);

  void checkDictLockQueue(Signal* signal);
  void checkDictLockQueue(Signal* signal, bool poll);
  void sendDictLockConf(Signal* signal, DictLockPtr lockPtr);
  void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode);

  // control polling i.e. continueB loop
  void setDictLockPoll(Signal* signal, bool on);
  void setDictLockPoll(Signal* signal, bool on, Uint32 pollCount);

  // NF handling
  void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes);
Loading