Commit 8f318cf1 authored by unknown's avatar unknown
Browse files

fix bug#7193

    added start backup parameter to enable setting wait state
    and set different timeouts depending on waitstate
    moved listen of backup event out of backup and into separete thread
    thread created at connect() and destroyed at disconnect()
    added start backup command options "nowait" "wait completed" "wait started"
    fixed log level settings


ndb/include/mgmapi/mgmapi.h:
  added start backup parameter to enable setting wait state
ndb/src/common/debugger/EventLogger.cpp:
  changed to unsigned printout
ndb/src/mgmapi/mgmapi.cpp:
  added start backup parameter to enable setting wait state,
  and set different timeouts depending on waitstate
ndb/src/mgmclient/CommandInterpreter.cpp:
  moved listen of backup event out of backup and into separete thread
  thread created at connect() and destroyed at disconnect()
  added start backup command options "nowait" "wait completed" "wait started"
ndb/src/mgmsrv/MgmtSrvr.cpp:
  added more options to start backup to set wait state
  + set timeout depending on wait state
  + some debug printouts
ndb/src/mgmsrv/MgmtSrvr.hpp:
  added more options to start backup to set wait state
ndb/src/mgmsrv/Services.cpp:
  removed old backup code
  added use of stard backup otions
  fixed log level settings
ndb/test/src/NdbBackup.cpp:
  adopted to new wait state option in start backup
parent 45ec54e4
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -666,11 +666,15 @@ extern "C" {
   * Start backup
   *
   * @param   handle        NDB management handle.
   * @param   wait_completed 0=don't wait for confirmation
                             1=wait for backup started
                             2=wait for backup completed
   * @param   backup_id     Backup id is returned from function.
   * @param   reply         Reply message.
   * @return                -1 on error.
   */
  int ndb_mgm_start_backup(NdbMgmHandle handle, unsigned int* backup_id,
  int ndb_mgm_start_backup(NdbMgmHandle handle, int wait_completed,
			   unsigned int* backup_id,
			   struct ndb_mgm_reply* reply);

  /**
+4 −4
Original line number Diff line number Diff line
@@ -1280,10 +1280,10 @@ EventLogger::getText(char * m_text, size_t m_text_len,
  case EventReport::BackupCompleted:
    BaseString::snprintf(m_text,
	       m_text_len,
	       "%sBackup %d started from node %d completed\n" 
	       " StartGCP: %d StopGCP: %d\n"
	       " #Records: %d #LogRecords: %d\n"
	       " Data: %d bytes Log: %d bytes",
	       "%sBackup %u started from node %u completed\n" 
	       " StartGCP: %u StopGCP: %u\n"
	       " #Records: %u #LogRecords: %u\n"
	       " Data: %u bytes Log: %u bytes",
	       theNodeId, theData[2], refToNode(theData[1]),
	       theData[3], theData[4], theData[6], theData[8],
	       theData[5], theData[7]);
+12 −2
Original line number Diff line number Diff line
@@ -1545,7 +1545,8 @@ ndb_mgm_start(NdbMgmHandle handle, int no_of_nodes, const int * node_list)
 *****************************************************************************/
extern "C"
int 
ndb_mgm_start_backup(NdbMgmHandle handle, unsigned int* _backup_id,
ndb_mgm_start_backup(NdbMgmHandle handle, int wait_completed,
		     unsigned int* _backup_id,
		     struct ndb_mgm_reply* /*reply*/) 
{
  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_start_backup");
@@ -1559,8 +1560,17 @@ ndb_mgm_start_backup(NdbMgmHandle handle, unsigned int* _backup_id,
  CHECK_CONNECTED(handle, -1);

  Properties args;
  args.put("completed", wait_completed);
  const Properties *reply;
  { // start backup can take some time, set timeout high
    Uint64 old_timeout= handle->read_timeout;
    if (wait_completed == 2)
      handle->read_timeout= 30*60*1000; // 30 minutes
    else if (wait_completed == 1)
      handle->read_timeout= 5*60*1000; // 5 minutes
    reply = ndb_mgm_call(handle, start_backup_reply, "start backup", &args);
    handle->read_timeout= old_timeout;
  }
  CHECK_REPLY(reply, -1);

  BaseString result;
+164 −35
Original line number Diff line number Diff line
@@ -154,7 +154,8 @@ class CommandInterpreter {
		     const char * param);

  NdbMgmHandle m_mgmsrv;
  bool connected;
  NdbMgmHandle m_mgmsrv2;
  bool m_connected;
  int m_verbose;
  int try_reconnect;
  int m_error;
@@ -163,6 +164,7 @@ class CommandInterpreter {
  const char *rep_host;
  bool rep_connected;
#endif
  struct NdbThread* m_event_thread;
};


@@ -261,7 +263,8 @@ static const char* helpText =
"SHOW CONFIG                            Print configuration\n"
"SHOW PARAMETERS                        Print configuration parameters\n"
#endif
"START BACKUP                           Start backup\n"
"START BACKUP [NOWAIT | WAIT STARTED | WAIT COMPLETED]\n"
"                                       Start backup (default WAIT COMPLETED)\n"
"ABORT BACKUP <backup id>               Abort backup\n"
"SHUTDOWN                               Shutdown all processes in cluster and quit\n"
"CLUSTERLOG ON [<severity>] ...         Enable Cluster logging\n"
@@ -386,13 +389,19 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose)
    ndbout_c("Cannot create handle to management server.");
    exit(-1);
  }
  m_mgmsrv2 = ndb_mgm_create_handle();
  if(m_mgmsrv2 == NULL) {
    ndbout_c("Cannot create handle to management server.");
    exit(-1);
  }
  if (ndb_mgm_set_connectstring(m_mgmsrv, _host))
  {
    printError();
    exit(-1);
  }

  connected = false;
  m_connected= false;
  m_event_thread= 0;
  try_reconnect = 0;
#ifdef HAVE_GLOBAL_REPLICATION
  rep_host = NULL;
@@ -406,8 +415,9 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose)
 */
CommandInterpreter::~CommandInterpreter() 
{
  connected = false;
  disconnect();
  ndb_mgm_destroy_handle(&m_mgmsrv);
  ndb_mgm_destroy_handle(&m_mgmsrv2);
}

static bool 
@@ -430,7 +440,10 @@ void
CommandInterpreter::printError() 
{
  if (ndb_mgm_check_connection(m_mgmsrv))
    connected= false;
  {
    m_connected= false;
    disconnect();
  }
  ndbout_c("* %5d: %s", 
	   ndb_mgm_get_latest_error(m_mgmsrv),
	   ndb_mgm_get_latest_error_msg(m_mgmsrv));
@@ -440,32 +453,90 @@ CommandInterpreter::printError()
//*****************************************************************************
//*****************************************************************************

static int do_event_thread;
static void*
event_thread_run(void* m)
{
  NdbMgmHandle handle= *(NdbMgmHandle*)m;

  my_thread_init();

  int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 0 };
  int fd = ndb_mgm_listen_event(handle, filter);
  if (fd > 0)
  {
    char *tmp= 0;
    char buf[1024];
    SocketInputStream in(fd,10);
    do {
      if (tmp == 0) NdbSleep_MilliSleep(10);
      if((tmp = in.gets(buf, 1024)))
	ndbout << tmp;
    } while(do_event_thread);
  }

  my_thread_end();
  NdbThread_Exit(0);
  return 0;
}

bool
CommandInterpreter::connect() 
{
  if(!connected) {
  if(!m_connected)
  {
    if(!ndb_mgm_connect(m_mgmsrv, try_reconnect-1, 5, 1))
    {
      connected = true;
      const char *host= ndb_mgm_get_connected_host(m_mgmsrv);
      unsigned port= ndb_mgm_get_connected_port(m_mgmsrv);
      if(!ndb_mgm_set_connectstring(m_mgmsrv2,
				    BaseString(host).appfmt(":%d",port).c_str())
	 &&
	 !ndb_mgm_connect(m_mgmsrv2, try_reconnect-1, 5, 1))
      {
	m_connected= true;
	if (m_verbose)
	{
	  printf("Connected to Management Server at: %s:%d\n",
	       ndb_mgm_get_connected_host(m_mgmsrv),
	       ndb_mgm_get_connected_port(m_mgmsrv));
		 host, port);
	}
	{
	  do_event_thread= 1;
	  m_event_thread = NdbThread_Create(event_thread_run,
					    (void**)&m_mgmsrv2,
					    32768,
					    "CommandInterpreted_event_thread",
					    NDB_THREAD_PRIO_LOW);
	}
      }
      else
      {
	ndb_mgm_disconnect(m_mgmsrv);
      }
    }
  }
  return connected;
  return m_connected;
}

bool 
CommandInterpreter::disconnect() 
{
  if (connected && (ndb_mgm_disconnect(m_mgmsrv) == -1)) {
  if (m_event_thread) {
    void *res;
    do_event_thread= 0;
    NdbThread_WaitFor(m_event_thread, &res);
    NdbThread_Destroy(&m_event_thread);
    m_event_thread= 0;
    ndb_mgm_disconnect(m_mgmsrv2);
  }
  if (m_connected)
  {
    if (ndb_mgm_disconnect(m_mgmsrv) == -1) {
      ndbout_c("Could not disconnect from management server");
      printError();
    }
  connected = false;
    m_connected= false;
  }
  return true;
}

@@ -914,7 +985,8 @@ CommandInterpreter::executeShutdown(char* parameters)
    return result;
  }

  connected = false;
  m_connected= false;
  disconnect();
  ndbout << "NDB Cluster management server shutdown." << endl;
  return 0;
}
@@ -1882,21 +1954,68 @@ CommandInterpreter::executeEventReporting(int processId,
 * Backup
 *****************************************************************************/
int
CommandInterpreter::executeStartBackup(char* /*parameters*/) 
CommandInterpreter::executeStartBackup(char* parameters)
{
  struct ndb_mgm_reply reply;
  unsigned int backupId;

#if 0
  int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 0 };
  int fd = ndb_mgm_listen_event(m_mgmsrv, filter);
  int result = ndb_mgm_start_backup(m_mgmsrv, &backupId, &reply);
  if (fd < 0)
  {
    ndbout << "Initializing start of backup failed" << endl;
    printError();
    return fd;
  }
#endif
  Vector<BaseString> args;
  {
    BaseString(parameters).split(args);
    for (unsigned i= 0; i < args.size(); i++)
      if (args[i].length() == 0)
	args.erase(i--);
      else
	args[i].ndb_toupper();
  }
  int sz= args.size();

  int result;
  if (sz == 2 &&
      args[1] == "NOWAIT")
  {
    result = ndb_mgm_start_backup(m_mgmsrv, 0, &backupId, &reply);
  }
  else if (sz == 1 ||
	   (sz == 3 &&
	    args[1] == "WAIT" &&
	    args[2] == "COMPLETED"))
  {
    ndbout_c("Waiting for completed, this may take several minutes");
    result = ndb_mgm_start_backup(m_mgmsrv, 2, &backupId, &reply);
  }
  else if (sz == 3 &&
	   args[1] == "WAIT" &&
	   args[2] == "STARTED")
  {
    ndbout_c("Waiting for started, this may take several minutes");
    result = ndb_mgm_start_backup(m_mgmsrv, 1, &backupId, &reply);
  }
  else
  {
    invalid_command(parameters);
    return -1;
  }

  if (result != 0) {
    ndbout << "Start of backup failed" << endl;
    printError();
#if 0
    close(fd);
#endif
    return result;
  }

#if 0
  ndbout_c("Waiting for completed, this may take several minutes");
  char *tmp;
  char buf[1024];
  {
@@ -1925,20 +2044,25 @@ CommandInterpreter::executeStartBackup(char* /*parameters*/)
  } while(tmp && tmp[0] != 0);

  close(fd);
#endif  
  return 0;
}

void
CommandInterpreter::executeAbortBackup(char* parameters) 
{
  strtok(parameters, " ");
  int bid = -1;
  struct ndb_mgm_reply reply;
  if (emptyString(parameters))
    goto executeAbortBackupError1;

  {
    strtok(parameters, " ");
    char* id = strtok(NULL, "\0");
  int bid = -1;
  if(id == 0 || sscanf(id, "%d", &bid) != 1){
    ndbout << "Invalid arguments: expected <BackupId>" << endl;
    return;
    if(id == 0 || sscanf(id, "%d", &bid) != 1)
      goto executeAbortBackupError1;
  }
  {
    int result= ndb_mgm_abort_backup(m_mgmsrv, bid, &reply);
    if (result != 0) {
      ndbout << "Abort of backup " << bid << " failed" << endl;
@@ -1947,6 +2071,11 @@ CommandInterpreter::executeAbortBackup(char* parameters)
      ndbout << "Abort of backup " << bid << " ordered" << endl;
    }
  }
  return;
 executeAbortBackupError1:
  ndbout << "Invalid arguments: expected <BackupId>" << endl;
  return;
}

#ifdef HAVE_GLOBAL_REPLICATION
/*****************************************************************************
+23 −5
Original line number Diff line number Diff line
@@ -2397,7 +2397,7 @@ MgmtSrvr::eventReport(NodeId nodeId, const Uint32 * theData)
 * Backup
 ***************************************************************************/
int
MgmtSrvr::startBackup(Uint32& backupId, bool waitCompleted)
MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted)
{
  bool next;
  NodeId nodeId = 0;
@@ -2419,11 +2419,16 @@ MgmtSrvr::startBackup(Uint32& backupId, bool waitCompleted)
  req->backupDataLen = 0;

  int result;
  if (waitCompleted) {
    result = sendRecSignal(nodeId, WAIT_BACKUP_COMPLETED, signal, true);
  if (waitCompleted == 2) {
    result = sendRecSignal(nodeId, WAIT_BACKUP_COMPLETED,
			   signal, true, 30*60*1000 /*30 secs*/);
  }
  else if (waitCompleted == 1) {
    result = sendRecSignal(nodeId, WAIT_BACKUP_STARTED,
			   signal, true, 5*60*1000 /*5 mins*/);
  }
  else {
    result = sendRecSignal(nodeId, WAIT_BACKUP_STARTED, signal, true);
    result = sendRecSignal(nodeId, NO_WAIT, signal, true);
  }
  if (result == -1) {
    return SEND_OR_RECEIVE_FAILED;
@@ -2502,18 +2507,31 @@ MgmtSrvr::abortBackup(Uint32 backupId)
void
MgmtSrvr::backupCallback(BackupEvent & event)
{
  DBUG_ENTER("MgmtSrvr::backupCallback");
  m_lastBackupEvent = event;
  switch(event.Event){
  case BackupEvent::BackupFailedToStart:
    DBUG_PRINT("info",("BackupEvent::BackupFailedToStart"));
    theWaitState = NO_WAIT;
    break;
  case BackupEvent::BackupAborted:
    DBUG_PRINT("info",("BackupEvent::BackupAborted"));
    theWaitState = NO_WAIT;
    break;
  case BackupEvent::BackupCompleted:
    DBUG_PRINT("info",("BackupEvent::BackupCompleted"));
    theWaitState = NO_WAIT;
    break;
  case BackupEvent::BackupStarted:
    if(theWaitState == WAIT_BACKUP_STARTED)
    {
      DBUG_PRINT("info",("BackupEvent::BackupStarted NO_WAIT"));
      theWaitState = NO_WAIT;
    } else {
      DBUG_PRINT("info",("BackupEvent::BackupStarted"));
    }
  return;
  }
  DBUG_VOID_RETURN;
}


Loading