Commit f8080302 authored by unknown's avatar unknown
Browse files

Fix for BUG#2921 "Replication problem on mutex lock in mySQL-4.0.18":

re-using unused LOCK_active_mi to serialize all administrative
commands related to replication:
START SLAVE, STOP SLAVE, RESET SLAVE, CHANGE MASTER, init_slave()
(replication autostart at server startup), end_slave() (replication
autostop at server shutdown), LOAD DATA FROM MASTER.
This protects us against a handful of deadlocks (like BUG#2921
when two START SLAVE, but when two STOP SLAVE too).
Removing unused variables.


sql/item_func.cc:
  We don't need LOCK_active_mi just to MASTER_POS_WAIT().
sql/repl_failsafe.cc:
  no need for macro
sql/set_var.cc:
  no need for macro
sql/slave.cc:
  Re-using unused LOCK_active_mi to serialize all administrative
  commands related to replication:
  START SLAVE, STOP SLAVE, RESET SLAVE, CHANGE MASTER, init_slave()
  (replication autostart at server startup), end_slave() (replication
  autostop at server shutdown), LOAD DATA FROM MASTER.
  This protects us against a handful of deadlocks.
  Removing unused variables.
sql/slave.h:
  Re-using LOCK_active_mi to serialize administrative replication commands.
  Macros unneeded. Removing unneeded variables.
sql/sql_parse.cc:
  found unused variable.
  Replacing macros.
sql/sql_show.cc:
  replacing macros
parent c6d91e00
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -1543,13 +1543,11 @@ longlong Item_master_pos_wait::val_int()
  }
  longlong pos = args[1]->val_int();
  longlong timeout = (arg_count==3) ? args[2]->val_int() : 0 ;
  LOCK_ACTIVE_MI;
  if ((event_count = active_mi->rli.wait_for_pos(thd, log_name, pos, timeout)) == -2)
  {
    null_value = 1;
    event_count=0;
  }
  UNLOCK_ACTIVE_MI;
  return event_count;
}

+4 −4
Original line number Diff line number Diff line
@@ -750,7 +750,7 @@ int load_master_data(THD* thd)
    We do not want anyone messing with the slave at all for the entire
    duration of the data load.
  */
  LOCK_ACTIVE_MI;
  pthread_mutex_lock(&LOCK_active_mi);
  lock_slave_threads(active_mi);
  init_thread_mask(&restart_thread_mask,active_mi,0 /*not inverse*/);
  if (restart_thread_mask &&
@@ -759,7 +759,7 @@ int load_master_data(THD* thd)
  {
    send_error(&thd->net,error);
    unlock_slave_threads(active_mi);
    UNLOCK_ACTIVE_MI;
    pthread_mutex_unlock(&LOCK_active_mi);
    return 1;
  }
  
@@ -913,7 +913,7 @@ int load_master_data(THD* thd)
  {
    send_error(&thd->net, 0, "Failed purging old relay logs");
    unlock_slave_threads(active_mi);
    UNLOCK_ACTIVE_MI;
    pthread_mutex_unlock(&LOCK_active_mi);
    return 1;
  }
  pthread_mutex_lock(&active_mi->rli.data_lock);
@@ -934,7 +934,7 @@ int load_master_data(THD* thd)

err:
  unlock_slave_threads(active_mi);
  UNLOCK_ACTIVE_MI;
  pthread_mutex_unlock(&LOCK_active_mi);
  thd->proc_info = 0;

  mc_mysql_close(&mysql); // safe to call since we always do mc_mysql_init()
+4 −4
Original line number Diff line number Diff line
@@ -1271,7 +1271,7 @@ byte *sys_var_insert_id::value_ptr(THD *thd, enum_var_type type)
bool sys_var_slave_skip_counter::check(THD *thd, set_var *var)
{
  int result= 0;
  LOCK_ACTIVE_MI;
  pthread_mutex_lock(&LOCK_active_mi);
  pthread_mutex_lock(&active_mi->rli.run_lock);
  if (active_mi->rli.slave_running)
  {
@@ -1279,14 +1279,14 @@ bool sys_var_slave_skip_counter::check(THD *thd, set_var *var)
    result=1;
  }
  pthread_mutex_unlock(&active_mi->rli.run_lock);
  UNLOCK_ACTIVE_MI;
  pthread_mutex_unlock(&LOCK_active_mi);
  return result;
}


bool sys_var_slave_skip_counter::update(THD *thd, set_var *var)
{
  LOCK_ACTIVE_MI;
  pthread_mutex_lock(&LOCK_active_mi);
  pthread_mutex_lock(&active_mi->rli.run_lock);
  /*
    The following test should normally never be true as we test this
@@ -1300,7 +1300,7 @@ bool sys_var_slave_skip_counter::update(THD *thd, set_var *var)
    pthread_mutex_unlock(&active_mi->rli.data_lock);
  }
  pthread_mutex_unlock(&active_mi->rli.run_lock);
  UNLOCK_ACTIVE_MI;
  pthread_mutex_unlock(&LOCK_active_mi);
  return 0;
}

+17 −4
Original line number Diff line number Diff line
@@ -34,7 +34,6 @@ typedef bool (*CHECK_KILLED_FUNC)(THD*,void*);
volatile bool slave_sql_running = 0, slave_io_running = 0;
char* slave_load_tmpdir = 0;
MASTER_INFO *active_mi;
volatile int active_mi_in_use = 0;
HASH replicate_do_table, replicate_ignore_table;
DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table;
bool do_table_inited = 0, ignore_table_inited = 0;
@@ -114,8 +113,12 @@ int init_slave()
{
  DBUG_ENTER("init_slave");

  /* This is called when mysqld starts */

  /*
    This is called when mysqld starts. Before client connections are
    accepted. However bootstrap may conflict with us if it does START SLAVE.
    So it's safer to take the lock.
  */
  pthread_mutex_lock(&LOCK_active_mi);
  /*
    TODO: re-write this to interate through the list of files
    for multi-master
@@ -160,9 +163,11 @@ int init_slave()
      goto err;
    }
  }
  pthread_mutex_unlock(&LOCK_active_mi);
  DBUG_RETURN(0);

err:
  pthread_mutex_unlock(&LOCK_active_mi);
  DBUG_RETURN(1);
}

@@ -806,7 +811,14 @@ static int end_slave_on_walk(MASTER_INFO* mi, gptr /*unused*/)

void end_slave()
{
  /* This is called when the server terminates, in close_connections(). */
  /*
    This is called when the server terminates, in close_connections().
    It terminates slave threads. However, some CHANGE MASTER etc may still be
    running presently. If a START SLAVE was in progress, the mutex lock below
    will make us wait until slave threads have started, and START SLAVE
    returns, then we terminate them here.
  */
  pthread_mutex_lock(&LOCK_active_mi);
  if (active_mi)
  {
    /*
@@ -827,6 +839,7 @@ void end_slave()
    delete active_mi;
    active_mi= 0;
  }
  pthread_mutex_unlock(&LOCK_active_mi);
}


+16 −20
Original line number Diff line number Diff line
@@ -27,12 +27,19 @@
/*
  MUTEXES in replication:

  LOCK_active_mi: this is meant for multimaster, when we can switch from a
  master to another. It protects active_mi. We don't care of it for the moment,
  as active_mi never moves (it's created at startup and deleted at shutdown, and
  not changed: it always points to the same MASTER_INFO struct), because we
  don't have multimaster. So for the moment, mi does not move, and mi->rli does
  not either.
  LOCK_active_mi: [note: this was originally meant for multimaster, to switch
  from a master to another, to protect active_mi] It is used to SERIALIZE ALL
  administrative commands of replication: START SLAVE, STOP SLAVE, CHANGE
  MASTER, RESET SLAVE, end_slave() (when mysqld stops) [init_slave() does not
  need it it's called early]. Any of these commands holds the mutex from the
  start till the end. This thus protects us against a handful of deadlocks
  (consider start_slave_thread() which, when starting the I/O thread, releases
  mi->run_lock, keeps rli->run_lock, and tries to re-acquire mi->run_lock).

  Currently active_mi never moves (it's created at startup and deleted at
  shutdown, and not changed: it always points to the same MASTER_INFO struct),
  because we don't have multimaster. So for the moment, mi does not move, and
  mi->rli does not either.

  In MASTER_INFO: run_lock, data_lock
  run_lock protects all information about the run state: slave_running, and the
@@ -43,6 +50,9 @@
  In RELAY_LOG_INFO: run_lock, data_lock
  see MASTER_INFO
  
  Order of acquisition: if you want to have LOCK_active_mi and a run_lock, you
  must acquire LOCK_active_mi first.

  In MYSQL_LOG: LOCK_log, LOCK_index of the binlog and the relay log
  LOCK_log: when you write to it. LOCK_index: when you create/delete a binlog
  (so that you have to update the .index file).
@@ -64,19 +74,6 @@ enum enum_binlog_formats {
  BINLOG_FORMAT_323_LESS_57, 
  BINLOG_FORMAT_323_GEQ_57 };

/*
  TODO: this needs to be redone, but for now it does not matter since
  we do not have multi-master yet.
*/

#define LOCK_ACTIVE_MI { pthread_mutex_lock(&LOCK_active_mi); \
 ++active_mi_in_use; \
 pthread_mutex_unlock(&LOCK_active_mi);}

#define UNLOCK_ACTIVE_MI { pthread_mutex_lock(&LOCK_active_mi); \
 --active_mi_in_use; \
 pthread_mutex_unlock(&LOCK_active_mi); }

/*
  st_relay_log_info contains information on the current relay log and
  relay log offset, and master log name and log sequence corresponding to the
@@ -441,7 +438,6 @@ extern "C" pthread_handler_decl(handle_slave_io,arg);
extern "C" pthread_handler_decl(handle_slave_sql,arg);
extern bool volatile abort_loop;
extern MASTER_INFO main_mi, *active_mi; /* active_mi for multi-master */
extern volatile int active_mi_in_use;
extern LIST master_list;
extern HASH replicate_do_table, replicate_ignore_table;
extern DYNAMIC_ARRAY  replicate_wild_do_table, replicate_wild_ignore_table;
Loading