Commit e1c9dd5f authored by unknown's avatar unknown
Browse files

BUG#18966 Change in stop/shutdown behaviour

Improvements that were discussed with Tomas.

Maintain protocol backwards/forwards compatibilty for client and server and support
the commands from ndb_mgm 'as expected'


ndb/include/mgmapi/mgmapi.h:
  Add ndb_mgm_stop3 mgmapi function.
   - This supports stopping all DB nodes, or all DB and MGM nodes.
   - It also returns to mgmapi program if it needs to disconnect to apply changes.
  
  Add ndb_mgm_restart3 mgmapi function.
   - Tells mgmapi program if it needs to disconnect to apply changes
  
  Add (internal) ndb_mgm_get_version
   - designed to be used to find out what protocol version we need to speak to the server.
ndb/src/mgmapi/mgmapi.cpp:
  Add cache of mgmd version to ndb_mgm_handle. Only filled out in functions that
  need to know the version of the mgmd we're talking to.
  Initialize these members in create handle.
  added ndb_mgm_get_version which asks the mgm server what version it is. This call
  has been supported since the dawn of time, no compatibility issues here.
  
  Add implementation of ndb_mgm_stop3
  Check what version of the protocol the server speaks, and speak it.
  Add compatibility for ndb_mgm_stop2
  
  Same for ndb_mgm_restart3.
ndb/src/mgmclient/CommandInterpreter.cpp:
  Simplify stop and restart code.
  
  Use the new ndb_mgm_(stop|restart)3 calls to find out if we need to disconnect.
ndb/src/mgmsrv/MgmtSrvr.cpp:
  Add nice call for shutting down MGM servers (like shutdownDB)
ndb/src/mgmsrv/MgmtSrvr.hpp:
  add prototype for shutdownMGM
ndb/src/mgmsrv/Services.cpp:
  Add restart node v2, stop v2, stop all v2 while maintaining protocol backwards
  compatibility.
  
  Unfortunately we can't add result lines due to protocol errors in clients :(
  Neither can we add extra things to the 'result: Ok' line due to the use of
  strcmp instead of strncmp.
ndb/src/mgmsrv/Services.hpp:
  Add prototypes for restart, stop and stopall v1 and v2
parent 564ba97b
Loading
Loading
Loading
Loading
+57 −0
Original line number Diff line number Diff line
@@ -694,6 +694,28 @@ extern "C" {
  int ndb_mgm_stop2(NdbMgmHandle handle, int no_of_nodes,
		    const int * node_list, int abort);

  /**
   * Stops cluster nodes
   *
   * @param   handle        Management handle.
   * @param   no_of_nodes   Number of database nodes to stop<br>
   *                         -1: All database and management nodes<br>
   *                          0: All database nodes in cluster<br>
   *                          n: Stop the <var>n</var> node(s) specified in
   *                            the array node_list
   * @param   node_list     List of node IDs of database nodes to be stopped
   * @param   abort         Don't perform graceful stop,
   *                        but rather stop immediately
   * @param   disconnect    Returns true if you need to disconnect to apply
   *                        the stop command (e.g. stopping the mgm server
   *                        that handle is connected to)
   *
   * @return                Number of nodes stopped (-1 on error).
   */
  int ndb_mgm_stop3(NdbMgmHandle handle, int no_of_nodes,
		    const int * node_list, int abort, int *disconnect);


  /**
   * Restart database nodes
   *
@@ -733,6 +755,31 @@ extern "C" {
		       const int * node_list, int initial,
		       int nostart, int abort);

  /**
   * Restart nodes
   *
   * @param   handle        Management handle.
   * @param   no_of_nodes   Number of database nodes to be restarted:<br>
   *                          0: Restart all database nodes in the cluster<br>
   *                          n: Restart the <var>n</var> node(s) specified in the
   *                            array node_list
   * @param   node_list     List of node IDs of database nodes to be restarted
   * @param   initial       Remove filesystem from restarting node(s)
   * @param   nostart       Don't actually start node(s) but leave them
   *                        waiting for start command
   * @param   abort         Don't perform graceful restart,
   *                        but rather restart immediately
   * @param   disconnect    Returns true if mgmapi client must disconnect from
   *                        server to apply the requested operation. (e.g.
   *                        restart the management server)
   *
   *
   * @return                Number of nodes stopped (-1 on error).
   */
  int ndb_mgm_restart3(NdbMgmHandle handle, int no_of_nodes,
		       const int * node_list, int initial,
		       int nostart, int abort, int *disconnect);

  /**
   * Start database nodes
   *
@@ -1023,6 +1070,16 @@ extern "C" {
   */
  Uint32 ndb_mgm_get_mgmd_nodeid(NdbMgmHandle handle);

  /**
   * Get the version of the mgm server we're talking to.
   * Designed to allow switching of protocol depending on version
   * so that new clients can speak to old servers in a compat mode
   */
  int ndb_mgm_get_version(NdbMgmHandle handle,
                          int *major, int *minor, int* build,
                          int len, char* str);


  /**
   * Config iterator
   */
+174 −22
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include <mgmapi_debug.h>
#include "mgmapi_configuration.hpp"
#include <socket_io.h>
#include <version.h>

#include <NdbOut.hpp>
#include <SocketServer.hpp>
@@ -103,6 +104,9 @@ struct ndb_mgm_handle {
#endif
  FILE *errstream;
  char *m_name;
  int mgmd_version_major;
  int mgmd_version_minor;
  int mgmd_version_build;
};

#define SET_ERROR(h, e, s) setError(h, e, __LINE__, s)
@@ -168,6 +172,10 @@ ndb_mgm_create_handle()
  h->logfile = 0;
#endif

  h->mgmd_version_major= -1;
  h->mgmd_version_minor= -1;
  h->mgmd_version_build= -1;

  DBUG_PRINT("info", ("handle=0x%x", (UintPtr)h));
  DBUG_RETURN(h);
}
@@ -826,37 +834,75 @@ ndb_mgm_stop(NdbMgmHandle handle, int no_of_nodes, const int * node_list)
  return ndb_mgm_stop2(handle, no_of_nodes, node_list, 0);
}


extern "C"
int
ndb_mgm_stop2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
	      int abort)
{
  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_stop2");
  const ParserRow<ParserDummy> stop_reply[] = {
  int disconnect;
  return ndb_mgm_stop3(handle, no_of_nodes, node_list, abort, &disconnect);
}


extern "C"
int
ndb_mgm_stop3(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
	      int abort, int *disconnect)
{
  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_stop3");
  const ParserRow<ParserDummy> stop_reply_v1[] = {
    MGM_CMD("stop reply", NULL, ""),
    MGM_ARG("stopped", Int, Optional, "No of stopped nodes"),
    MGM_ARG("result", String, Mandatory, "Error message"),
    MGM_END()
  };
  const ParserRow<ParserDummy> stop_reply_v2[] = {
    MGM_CMD("stop reply", NULL, ""),
    MGM_ARG("stopped", Int, Optional, "No of stopped nodes"),
    MGM_ARG("result", String, Mandatory, "Error message"),
    MGM_ARG("disconnect", Int, Mandatory, "Need to disconnect"),
    MGM_END()
  };

  CHECK_HANDLE(handle, -1);
  CHECK_CONNECTED(handle, -1);

  if(no_of_nodes < 0){
  if(handle->mgmd_version_build==-1)
  {
    char verstr[50];
    ndb_mgm_get_version(handle,
                        &(handle->mgmd_version_major),
                        &(handle->mgmd_version_minor),
                        &(handle->mgmd_version_build),
                        sizeof(verstr),
                        verstr);
  }
  int use_v2= (handle->mgmd_version_major==5)
    && (
        (handle->mgmd_version_minor==0 && handle->mgmd_version_build>=21)
        ||(handle->mgmd_version_minor==1 && handle->mgmd_version_build>=12)
        );

  if(no_of_nodes < -1){
    SET_ERROR(handle, NDB_MGM_ILLEGAL_NUMBER_OF_NODES, 
	      "Negative number of nodes requested to stop");
    return -1;
  }

  Uint32 stoppedNoOfNodes = 0;
  if(no_of_nodes == 0){
  if(no_of_nodes <= 0){
    /**
     * All database nodes should be stopped
     * All nodes should be stopped (all or just db)
     */
    Properties args;
    args.put("abort", abort);
    if(use_v2)
      args.put("stop", (no_of_nodes==-1)?"mgm,db":"db");
    const Properties *reply;
    reply = ndb_mgm_call(handle, stop_reply, "stop all", &args);
    if(use_v2)
      reply = ndb_mgm_call(handle, stop_reply_v2, "stop all v2", &args);
    else
      reply = ndb_mgm_call(handle, stop_reply_v1, "stop all", &args);
    CHECK_REPLY(reply, -1);

    if(!reply->get("stopped", &stoppedNoOfNodes)){
@@ -865,6 +911,10 @@ ndb_mgm_stop2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
      delete reply;
      return -1;
    }
    if(use_v2)
      reply->get("disconnect", (Uint32*)disconnect);
    else
      *disconnect= 0;
    BaseString result;
    reply->get("result", result);
    if(strcmp(result.c_str(), "Ok") != 0) {
@@ -890,7 +940,11 @@ ndb_mgm_stop2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
  args.put("abort", abort);

  const Properties *reply;
  reply = ndb_mgm_call(handle, stop_reply, "stop", &args);
  if(use_v2)
    reply = ndb_mgm_call(handle, stop_reply_v2, "stop v2", &args);
  else
    reply = ndb_mgm_call(handle, stop_reply_v1, "stop", &args);

  CHECK_REPLY(reply, stoppedNoOfNodes);
  if(!reply->get("stopped", &stoppedNoOfNodes)){
    SET_ERROR(handle, NDB_MGM_STOP_FAILED, 
@@ -898,6 +952,10 @@ ndb_mgm_stop2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
    delete reply;
    return -1;
  }
  if(use_v2)
    reply->get("disconnect", (Uint32*)disconnect);
  else
    *disconnect= 0;
  BaseString result;
  reply->get("result", result);
  if(strcmp(result.c_str(), "Ok") != 0) {
@@ -909,22 +967,65 @@ ndb_mgm_stop2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
  return stoppedNoOfNodes;
}

extern "C"
int
ndb_mgm_restart(NdbMgmHandle handle, int no_of_nodes, const int *node_list) 
{
  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_restart");
  return ndb_mgm_restart2(handle, no_of_nodes, node_list, 0, 0, 0);
}

extern "C"
int
ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
		 int initial, int nostart, int abort)
{
  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_restart2");
  int disconnect;

  return ndb_mgm_restart3(handle, no_of_nodes, node_list, initial, nostart,
                          abort, &disconnect);
}

extern "C"
int
ndb_mgm_restart3(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
		 int initial, int nostart, int abort, int *disconnect)
{
  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_restart3");
  Uint32 restarted = 0;
  const ParserRow<ParserDummy> restart_reply[] = {
  const ParserRow<ParserDummy> restart_reply_v1[] = {
    MGM_CMD("restart reply", NULL, ""),
    MGM_ARG("result", String, Mandatory, "Error message"),
    MGM_ARG("restarted", Int, Optional, "No of restarted nodes"),
    MGM_END()
  };
  const ParserRow<ParserDummy> restart_reply_v2[] = {
    MGM_CMD("restart reply", NULL, ""),
    MGM_ARG("result", String, Mandatory, "Error message"),
    MGM_ARG("restarted", Int, Optional, "No of restarted nodes"),
    MGM_ARG("disconnect", Int, Optional, "Disconnect to apply"),
    MGM_END()
  };

  CHECK_HANDLE(handle, -1);
  CHECK_CONNECTED(handle, -1);

  if(handle->mgmd_version_build==-1)
  {
    char verstr[50];
    ndb_mgm_get_version(handle,
                        &(handle->mgmd_version_major),
                        &(handle->mgmd_version_minor),
                        &(handle->mgmd_version_build),
                        sizeof(verstr),
                        verstr);
  }
  int use_v2= (handle->mgmd_version_major==5)
    && (
        (handle->mgmd_version_minor==0 && handle->mgmd_version_build>=21)
        ||(handle->mgmd_version_minor==1 && handle->mgmd_version_build>=12)
        );

  if(no_of_nodes < 0){
    SET_ERROR(handle, NDB_MGM_RESTART_FAILED, 
	      "Restart requested of negative number of nodes");
@@ -939,7 +1040,7 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
    const Properties *reply;
    const int timeout = handle->read_timeout;
    handle->read_timeout= 5*60*1000; // 5 minutes
    reply = ndb_mgm_call(handle, restart_reply, "restart all", &args);
    reply = ndb_mgm_call(handle, restart_reply_v1, "restart all", &args);
    handle->read_timeout= timeout;
    CHECK_REPLY(reply, -1);

@@ -975,7 +1076,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
  const Properties *reply;
  const int timeout = handle->read_timeout;
  handle->read_timeout= 5*60*1000; // 5 minutes
  reply = ndb_mgm_call(handle, restart_reply, "restart node", &args);
  if(use_v2)
    reply = ndb_mgm_call(handle, restart_reply_v2, "restart node v2", &args);
  else
    reply = ndb_mgm_call(handle, restart_reply_v1, "restart node", &args);
  handle->read_timeout= timeout;
  if(reply != NULL) {
    BaseString result;
@@ -986,20 +1090,16 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
      return -1;
    }
    reply->get("restarted", &restarted);
    if(use_v2)
      reply->get("disconnect", (Uint32*)disconnect);
    else
      *disconnect= 0;
    delete reply;
  } 
  
  return restarted;
}

extern "C"
int
ndb_mgm_restart(NdbMgmHandle handle, int no_of_nodes, const int *node_list) 
{
  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_restart");
  return ndb_mgm_restart2(handle, no_of_nodes, node_list, 0, 0, 0);
}

static const char *clusterlog_severity_names[]=
  { "enabled", "debug", "info", "warning", "error", "critical", "alert" };

@@ -2353,4 +2453,56 @@ int ndb_mgm_end_session(NdbMgmHandle handle)
  DBUG_RETURN(0);
}

extern "C"
int ndb_mgm_get_version(NdbMgmHandle handle,
                        int *major, int *minor, int *build, int len, char* str)
{
  DBUG_ENTER("ndb_mgm_get_version");
  CHECK_HANDLE(handle, 0);
  CHECK_CONNECTED(handle, 0);

  Properties args;

  const ParserRow<ParserDummy> reply[]= {
    MGM_CMD("version", NULL, ""),
    MGM_ARG("id", Int, Mandatory, "ID"),
    MGM_ARG("major", Int, Mandatory, "Major"),
    MGM_ARG("minor", Int, Mandatory, "Minor"),
    MGM_ARG("string", String, Mandatory, "String"),
    MGM_END()
  };

  const Properties *prop;
  prop = ndb_mgm_call(handle, reply, "get version", &args);
  CHECK_REPLY(prop, 0);

  Uint32 id;
  if(!prop->get("id",&id)){
    fprintf(handle->errstream, "Unable to get value\n");
    return 0;
  }
  *build= getBuild(id);

  if(!prop->get("major",(Uint32*)major)){
    fprintf(handle->errstream, "Unable to get value\n");
    return 0;
  }

  if(!prop->get("minor",(Uint32*)minor)){
    fprintf(handle->errstream, "Unable to get value\n");
    return 0;
  }

  BaseString result;
  if(!prop->get("string", result)){
    fprintf(handle->errstream, "Unable to get value\n");
    return 0;
  }

  strncpy(str, result.c_str(), len);

  delete prop;
  DBUG_RETURN(1);
}

template class Vector<const ParserRow<ParserDummy>*>;
+19 −56
Original line number Diff line number Diff line
@@ -1057,7 +1057,8 @@ CommandInterpreter::executeShutdown(char* parameters)
  NdbAutoPtr<char> ap1((char*)state);

  int result = 0;
  result = ndb_mgm_stop(m_mgmsrv, 0, 0);
  int need_disconnect;
  result = ndb_mgm_stop3(m_mgmsrv, -1, 0, 0, &need_disconnect);
  if (result < 0) {
    ndbout << "Shutdown of NDB Cluster node(s) failed." << endl;
    printError();
@@ -1066,39 +1067,11 @@ CommandInterpreter::executeShutdown(char* parameters)

  ndbout << result << " NDB Cluster node(s) have shutdown." << endl;

  int nodeId= 0;
  int this_mgmd= 0;
  this_mgmd= ndb_mgm_get_mgmd_nodeid(m_mgmsrv);
  while(get_next_nodeid(state, &nodeId, NDB_MGM_NODE_TYPE_MGM))
  {
    if(nodeId==this_mgmd)
      continue;
    ndbout << "Shutting down NDB Cluster management server nodeId="
           << nodeId << "...";
    result = ndb_mgm_stop(m_mgmsrv, 1, &nodeId);
    if (result <= 0) {
      ndbout << " failed." << endl;
      printError();
    }
    else
      ndbout << "Done." << endl;
  }

  ndbout << "Shutting down NDB Cluster management server nodeId="
         << this_mgmd << "...";
  result= ndb_mgm_stop(m_mgmsrv, 1, &this_mgmd);
  if (result <= 0) {
    ndbout << " failed." << endl;
    printError();
  }
  else
  {
    ndbout << "Done." << endl;
  if(need_disconnect) {
    ndbout << "Disconnecting to allow management server to shutdown."
           << endl;
    disconnect();
  }
  ndbout << "NDB Cluster management servers shutdown." << endl;
  return 0;
}

@@ -1487,6 +1460,7 @@ CommandInterpreter::executeStop(Vector<BaseString> &command_list,
                                unsigned command_pos,
                                int *node_ids, int no_of_nodes)
{
  int need_disconnect;
  int abort= 0;
  for (; command_pos < command_list.size(); command_pos++)
  {
@@ -1501,7 +1475,8 @@ CommandInterpreter::executeStop(Vector<BaseString> &command_list,
    return;
  }

  int result= ndb_mgm_stop2(m_mgmsrv, no_of_nodes, node_ids, abort);
  int result= ndb_mgm_stop3(m_mgmsrv, no_of_nodes, node_ids, abort,
                            &need_disconnect);
  if (result < 0)
  {
    ndbout_c("Shutdown failed.");
@@ -1513,27 +1488,19 @@ CommandInterpreter::executeStop(Vector<BaseString> &command_list,
      ndbout_c("NDB Cluster has shutdown.");
    else
    {
      int mgm_id= 0;
      int need_reconnect= 0;
      mgm_id= ndb_mgm_get_mgmd_nodeid(m_mgmsrv);
      ndbout << "Node";
      for (int i= 0; i < no_of_nodes; i++)
      {
        if(node_ids[i] == mgm_id)
          need_reconnect= 1;
        else
          ndbout << " " << node_ids[i];
      }
      ndbout_c(" has shutdown.");
      if(need_reconnect)
      {
        ndbout << "You are connected to node " << mgm_id
               << ", disconnecting to allow it to shutdown"
               << endl;
        disconnect();
    }
  }

  if(need_disconnect)
  {
    ndbout << "Disconnecting to allow Management Server to shutdown" << endl;
    disconnect();
  }

}

void
@@ -1624,6 +1591,7 @@ CommandInterpreter::executeRestart(Vector<BaseString> &command_list,
  int nostart= 0;
  int initialstart= 0;
  int abort= 0;
  int need_disconnect= 0;

  for (; command_pos < command_list.size(); command_pos++)
  {
@@ -1648,8 +1616,8 @@ CommandInterpreter::executeRestart(Vector<BaseString> &command_list,
    return;
  }

  result= ndb_mgm_restart2(m_mgmsrv, no_of_nodes, node_ids,
                           initialstart, nostart, abort);
  result= ndb_mgm_restart3(m_mgmsrv, no_of_nodes, node_ids,
                           initialstart, nostart, abort, &need_disconnect);

  if (result <= 0) {
    ndbout_c("Restart failed.");
@@ -1661,18 +1629,13 @@ CommandInterpreter::executeRestart(Vector<BaseString> &command_list,
      ndbout_c("NDB Cluster is being restarted.");
    else
    {
      int mgm_id= 0;
      mgm_id= ndb_mgm_get_mgmd_nodeid(m_mgmsrv);

      ndbout << "Node";
      for (int i= 0; i < no_of_nodes; i++)
      {
        if(node_ids[i] == mgm_id)
          disconnect();
        ndbout << " " << node_ids[i];
      }
      ndbout_c(" is being restarted");
    }
    if(need_disconnect)
      disconnect();
  }
}

+21 −0
Original line number Diff line number Diff line
@@ -1196,6 +1196,27 @@ int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
  return ret;
}

int MgmtSrvr::shutdownMGM(int *stopCount, bool abort, int *stopSelf)
{
  NodeId nodeId = 0;
  int error;

  while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_MGM))
  {
    if(nodeId==getOwnNodeId())
      continue;
    error= sendStopMgmd(nodeId, abort, true, false,
                        false, false);
    if (error == 0)
      *stopCount++;
  }

  *stopSelf= 1;
  *stopCount++;

  return 0;
}

/*
 * Perform DB nodes shutdown.
 * MGM servers are left in their current state
+2 −0
Original line number Diff line number Diff line
@@ -256,6 +256,8 @@ public:
  int stopNodes(const Vector<NodeId> &node_ids, int *stopCount, bool abort,
                int *stopSelf);

  int shutdownMGM(int *stopCount, bool abort, int *stopSelf);

  /**
   * shutdown the DB nodes
   */
Loading