Commit 80abad58 authored by unknown's avatar unknown
Browse files

bug#10358 - ndb

  Cluster failure with non started nodes can result in timedout transactions


ndb/src/mgmapi/mgmapi.cpp:
  Increase timeout for restarts
ndb/src/ndbapi/ClusterMgr.cpp:
  Report NFCOMPLETEREP if no alive node exists 
    (instead of no connected node exists)
ndb/src/ndbapi/ClusterMgr.hpp:
  Report NFCOMPLETEREP if no alive node exists 
    (instead of no connected node exists)
parent 55c9c4d7
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -857,7 +857,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
    args.put("initialstart", initial);
    args.put("nostart", nostart);
    const Properties *reply;
    const int timeout = handle->read_timeout;
    handle->read_timeout= 5*60*1000; // 5 minutes
    reply = ndb_mgm_call(handle, restart_reply, "restart all", &args);
    handle->read_timeout= timeout;
    CHECK_REPLY(reply, -1);

    BaseString result;
@@ -890,7 +893,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
  args.put("nostart", nostart);

  const Properties *reply;
  const int timeout = handle->read_timeout;
  handle->read_timeout= 5*60*1000; // 5 minutes
  reply = ndb_mgm_call(handle, restart_reply, "restart node", &args);
  handle->read_timeout= timeout;
  if(reply != NULL) {
    BaseString result;
    reply->get("result", result);
+7 −6
Original line number Diff line number Diff line
@@ -66,6 +66,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
{
  ndbSetOwnVersion();
  clusterMgrThreadMutex = NdbMutex_Create();
  noOfAliveNodes= 0;
  noOfConnectedNodes= 0;
  theClusterMgrThread= 0;
}
@@ -335,9 +336,9 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
  node.m_state = apiRegConf->nodeState;
  if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED  ||
			  node.m_state.startLevel == NodeState::SL_SINGLEUSER)){
    node.m_alive = true;
    set_node_alive(node, true);
  } else {
    node.m_alive = false;
    set_node_alive(node, false);
  }//if
  node.hbSent = 0;
  node.hbCounter = 0;
@@ -360,7 +361,7 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){
  assert(node.defined == true);

  node.compatible = false;
  node.m_alive = false;
  set_node_alive(node, false);
  node.m_state = NodeState::SL_NOTHING;
  node.m_info.m_version = ref->version;

@@ -437,7 +438,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){

  Node & theNode = theNodes[nodeId];
 
  theNode.m_alive = false;
  set_node_alive(theNode, false);
  if(theNode.connected)
    theFacade.doDisconnect(nodeId);
  
@@ -450,7 +451,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){

  theNode.nfCompleteRep = false;
  
  if(noOfConnectedNodes == 0){
  if(noOfAliveNodes == 0){
    NFCompleteRep rep;
    for(Uint32 i = 1; i<MAX_NODES; i++){
      if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
+14 −0
Original line number Diff line number Diff line
@@ -80,6 +80,7 @@ public:
  Uint32        getNoOfConnectedNodes() const;
  
private:
  Uint32        noOfAliveNodes;
  Uint32        noOfConnectedNodes;
  Node          theNodes[MAX_NODES];
  NdbThread*    theClusterMgrThread;
@@ -100,6 +101,19 @@ private:
  void execAPI_REGREF    (const Uint32 * theData);
  void execNODE_FAILREP  (const Uint32 * theData);
  void execNF_COMPLETEREP(const Uint32 * theData);

  inline void set_node_alive(Node& node, bool alive){
    if(node.m_alive && !alive)
    {
      assert(noOfAliveNodes);
      noOfAliveNodes--;
    }
    else if(!node.m_alive && alive)
    {
      noOfAliveNodes++;
    }
    node.m_alive = alive;
  }
};

inline