Commit 597d84fa authored by unknown's avatar unknown
Browse files

Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail

- adoptions to handle binlog

parent 72232c69
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -3,9 +3,9 @@ create table t1 (a int key) engine=ndbcluster;
begin;
insert into t1 values (1);
insert into t1 values (2);
ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from ndbcluster
ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from NDBCLUSTER
commit;
ERROR HY000: Got error 4350 'Transaction already aborted' from ndbcluster
ERROR HY000: Got error 4350 'Transaction already aborted' from NDBCLUSTER
drop table t1;
create table t2 (a int, b int, primary key(a,b)) engine=ndbcluster;
insert into t2 values (1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1);
@@ -14,6 +14,8 @@ a b
1	1
2	1
3	1
show tables like 't2';
Tables_in_test (t2)
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
@@ -22,14 +24,12 @@ a
2
3
select * from t2 order by a limit 3;
ERROR HY000: Can't lock file (errno: 241)
select * from t2 order by a limit 3;
a
1
2
3
show tables;
Tables_in_test
show tables like 't2';
Tables_in_test (t2)
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
+6 −3
Original line number Diff line number Diff line
@@ -36,23 +36,26 @@ select * from t2 order by a limit 3;

--exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
--exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
# to ensure mysqld has connected again, and recreated system tables
--sleep 3

--connection server2
show tables like 't2';
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;

# server 1 should have a stale cache, and in this case wrong frm, transaction must be retried
--connection server1
--error 1015
select * from t2 order by a limit 3;
select * from t2 order by a limit 3;

--exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
--exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
# to ensure mysqld has connected again, and recreated system tables
--sleep 3

--connection server1
show tables;
show tables like 't2';
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
+100 −4
Original line number Diff line number Diff line
@@ -1775,6 +1775,8 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
      // skip
      break;
    case NDBEVENT::TE_CLUSTER_FAILURE:
      if (ndb_extra_logging)
        sql_print_information("NDB Binlog: cluster failure for %s.", schema_share->key);
      // fall through
    case NDBEVENT::TE_DROP:
      if (ndb_extra_logging &&
@@ -1784,6 +1786,7 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
      free_share(&schema_share);
      schema_share= 0;
      ndb_binlog_tables_inited= FALSE;
      close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0, FALSE);
      // fall through
    case NDBEVENT::TE_ALTER:
      ndb_handle_schema_change(thd, ndb, pOp, tmp_share);
@@ -2101,7 +2104,14 @@ int ndb_add_binlog_index(THD *thd, void *_row)
  Functions for start, stop, wait for ndbcluster binlog thread
*********************************************************************/

static int do_ndbcluster_binlog_close_connection= 0;
enum Binlog_thread_state
{
  BCCC_running= 0,
  BCCC_exit= 1,
  BCCC_restart= 2
};

static enum Binlog_thread_state do_ndbcluster_binlog_close_connection= BCCC_restart;

int ndbcluster_binlog_start()
{
@@ -2139,7 +2149,7 @@ static void ndbcluster_binlog_close_connection(THD *thd)
  DBUG_ENTER("ndbcluster_binlog_close_connection");
  const char *save_info= thd->proc_info;
  thd->proc_info= "ndbcluster_binlog_close_connection";
  do_ndbcluster_binlog_close_connection= 1;
  do_ndbcluster_binlog_close_connection= BCCC_exit;
  while (ndb_binlog_thread_running > 0)
    sleep(1);
  thd->proc_info= save_info;
@@ -3296,10 +3306,48 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
    thd->db= db;
  }

  for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
            ndb_latest_handled_binlog_epoch >= g_latest_trans_gci); )
restart:
  {
    // wait for the first event
    thd->proc_info= "Waiting for first event from ndbcluster";
    DBUG_PRINT("info", ("Waiting for the first event"));
    int schema_res= 0;
    Uint64 schema_gci= 0;
    while (schema_res == 0 && !abort_loop)
    {
      schema_res= s_ndb->pollEvents(100, &schema_gci);
    }
    // now check that we have epochs consistant with what we had before the restart
    if (schema_res > 0)
    {
      if (schema_gci < ndb_latest_handled_binlog_epoch)
      {
        sql_print_error("NDB Binlog: cluster has been restarted --initial or with older filesystem. "
                        "ndb_latest_handled_binlog_epoch: %u, while current epoch: %u. "
                        "RESET MASTER should be issued. Resetting ndb_latest_handled_binlog_epoch.",
                        (unsigned) ndb_latest_handled_binlog_epoch, (unsigned) schema_gci);
        g_latest_trans_gci= 0;
        ndb_latest_handled_binlog_epoch= 0;
        ndb_latest_applied_binlog_epoch= 0;
        ndb_latest_received_binlog_epoch= 0;
      }
    }
  }

  do_ndbcluster_binlog_close_connection= BCCC_running;
  for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
            ndb_latest_handled_binlog_epoch >= g_latest_trans_gci) &&
          do_ndbcluster_binlog_close_connection != BCCC_restart; )
  {
#ifndef DBUG_OFF
    if (do_ndbcluster_binlog_close_connection)
    {
      DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection: %d, "
                          "ndb_latest_handled_binlog_epoch: %llu, "
                          "g_latest_trans_gci: %llu", do_ndbcluster_binlog_close_connection,
                          ndb_latest_handled_binlog_epoch, g_latest_trans_gci));
    }
#endif
#ifdef RUN_NDB_BINLOG_TIMER
    main_timer.stop();
    sql_print_information("main_timer %ld ms",  main_timer.elapsed_ms());
@@ -3324,7 +3372,13 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
    ndb_latest_received_binlog_epoch= gci;

    while (gci > schema_gci && schema_res >= 0)
    {
      static char buf[64];
      thd->proc_info= "Waiting for schema epoch";
      my_snprintf(buf, sizeof(buf), "%s %u(%u)", thd->proc_info, (unsigned) schema_gci, (unsigned) gci);
      thd->proc_info= buf;
      schema_res= s_ndb->pollEvents(10, &schema_gci);
    }

    if ((abort_loop || do_ndbcluster_binlog_close_connection) &&
        (ndb_latest_handled_binlog_epoch >= g_latest_trans_gci ||
@@ -3360,10 +3414,31 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
      while (pOp != NULL)
      {
        if (!pOp->hasError())
        {
          ndb_binlog_thread_handle_schema_event(thd, s_ndb, pOp,
                                                &post_epoch_log_list,
                                                &post_epoch_unlock_list,
                                                &mem_root);
          DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
                              s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
                              "<empty>"));
          DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
                              i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
                              "<empty>"));
          if (i_ndb->getEventOperation() == NULL &&
              s_ndb->getEventOperation() == NULL &&
              do_ndbcluster_binlog_close_connection == BCCC_running)
          {
            DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart"));
            do_ndbcluster_binlog_close_connection= BCCC_restart;
            if (ndb_latest_received_binlog_epoch < g_latest_trans_gci && ndb_binlog_running)
            {
              sql_print_error("NDB Binlog: latest transaction in epoch %lld not in binlog "
                              "as latest received epoch is %lld",
                              g_latest_trans_gci, ndb_latest_received_binlog_epoch);
            }
          }
        }
        else
          sql_print_error("NDB: error %lu (%s) on handling "
                          "binlog schema event",
@@ -3532,6 +3607,25 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
            ndb_binlog_thread_handle_non_data_event(thd, i_ndb, pOp, row);
            // reset to catch errors
            i_ndb->setDatabaseName("");
            DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
                                s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
                                "<empty>"));
            DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
                                i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
                                "<empty>"));
            if (i_ndb->getEventOperation() == NULL &&
                s_ndb->getEventOperation() == NULL &&
                do_ndbcluster_binlog_close_connection == BCCC_running)
            {
              DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart"));
              do_ndbcluster_binlog_close_connection= BCCC_restart;
              if (ndb_latest_received_binlog_epoch < g_latest_trans_gci && ndb_binlog_running)
              {
                sql_print_error("NDB Binlog: latest transaction in epoch %lld not in binlog "
                                "as latest received epoch is %lld",
                                g_latest_trans_gci, ndb_latest_received_binlog_epoch);
              }
            }
          }

          pOp= i_ndb->nextEvent();
@@ -3587,6 +3681,8 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
    *root_ptr= old_root;
    ndb_latest_handled_binlog_epoch= ndb_latest_received_binlog_epoch;
  }
  if (do_ndbcluster_binlog_close_connection != BCCC_exit)
    goto restart;
err:
  DBUG_PRINT("info",("Shutting down cluster binlog thread"));
  thd->proc_info= "Shutting down";
+13 −0
Original line number Diff line number Diff line
@@ -71,6 +71,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
  noOfConnectedNodes= 0;
  theClusterMgrThread= 0;
  m_connect_count = 0;
  m_cluster_state = CS_waiting_for_clean_cache;
  DBUG_VOID_RETURN;
}

@@ -175,6 +176,16 @@ ClusterMgr::threadMain( ){
    int send_heartbeat_now= global_flag_send_heartbeat_now;
    global_flag_send_heartbeat_now= 0;

    if (m_cluster_state == CS_waiting_for_clean_cache)
    {
      theFacade.m_globalDictCache.lock();
      unsigned sz= theFacade.m_globalDictCache.get_size();
      theFacade.m_globalDictCache.unlock();
      if (sz)
        goto next;
      m_cluster_state = CS_waiting_for_first_connect;
    }

    theFacade.lock_mutex();
    for (int i = 1; i < MAX_NODES; i++){
      /**
@@ -223,6 +234,7 @@ ClusterMgr::threadMain( ){
     */
    theFacade.unlock_mutex();
    
next:
    // Sleep for 100 ms between each Registration Heartbeat
    Uint64 before = now;
    NdbSleep_MilliSleep(100); 
@@ -450,6 +462,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
    theFacade.m_globalDictCache.invalidate_all();
    theFacade.m_globalDictCache.unlock();
    m_connect_count ++;
    m_cluster_state = CS_waiting_for_clean_cache;
    NFCompleteRep rep;
    for(Uint32 i = 1; i<MAX_NODES; i++){
      if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
+6 −1
Original line number Diff line number Diff line
@@ -57,6 +57,11 @@ private:
  class TransporterFacade & theFacade;
  
public:
  enum Cluster_state {
    CS_waiting_for_clean_cache = 0,
    CS_waiting_for_first_connect,
    CS_connected
  };
  struct Node {
    Node();
    bool defined;
@@ -86,7 +91,7 @@ private:
  Uint32        noOfConnectedNodes;
  Node          theNodes[MAX_NODES];
  NdbThread*    theClusterMgrThread;
  
  enum Cluster_state m_cluster_state;
  /**
   * Used for controlling start/stop of the thread
   */
Loading