Commit 45baf2b5 authored by unknown's avatar unknown
Browse files

Merge heikki@bk-internal.mysql.com:/home/bk/mysql-5.0

into hundin.mysql.fi:/home/heikki/mysql-5.0


sql/mysqld.cc:
  Auto merged
parents 79b88ac4 19429826
Loading
Loading
Loading
Loading
+281 −6
Original line number Diff line number Diff line
@@ -45,10 +45,58 @@ have disables the InnoDB inlining in this file. */

#include "ha_innodb.h"

pthread_mutex_t innobase_share_mutex, // to protect innobase_open_files
                prepare_commit_mutex; // to force correct commit order in binlog
pthread_mutex_t innobase_share_mutex, /* to protect innobase_open_files */
                prepare_commit_mutex; /* to force correct commit order in
				      binlog */
bool innodb_inited= 0;

/*-----------------------------------------------------------------*/
/* These variables are used to implement (semi-)synchronous MySQL binlog
replication for InnoDB tables. */

pthread_cond_t  innobase_repl_cond;             /* Posix cond variable;
                                                this variable is signaled
                                                when enough binlog has been
                                                sent to slave, so that a
                                                waiting trx can return the
                                                'ok' message to the client
                                                for a commit */
pthread_mutex_t innobase_repl_cond_mutex;       /* Posix cond variable mutex
                                                that also protects the next
                                                innobase_repl_... variables */
uint            innobase_repl_state;            /* 1 if synchronous replication
                                                is switched on and is working
                                                ok; else 0 */
uint            innobase_repl_file_name_inited  = 0; /* This is set to 1 when
                                                innobase_repl_file_name
                                                contains meaningful data */
char*           innobase_repl_file_name;        /* The binlog name up to which
                                                we have sent some binlog to
                                                the slave */
my_off_t        innobase_repl_pos;              /* The position in that file
                                                up to which we have sent the
                                                binlog to the slave */
uint            innobase_repl_n_wait_threads    = 0; /* This tells how many
                                                transactions currently are
                                                waiting for the binlog to be
                                                sent to the client */
uint            innobase_repl_wait_file_name_inited = 0; /* This is set to 1
                                                when we know the 'smallest'
                                                wait position */
char*           innobase_repl_wait_file_name;   /* NULL, or the 'smallest'
                                                innobase_repl_file_name that
                                                a transaction is waiting for */
my_off_t        innobase_repl_wait_pos;         /* The smallest position in
                                                that file that a trx is
                                                waiting for: the trx can
                                                proceed and send an 'ok' to
                                                the client when MySQL has sent
                                                the binlog up to this position
                                                to the slave */
/*-----------------------------------------------------------------*/



/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
uses unsigned char; the header univ.i which we include next defines
'byte' as a macro which expands to 'unsigned char' */
@@ -1531,10 +1579,10 @@ innobase_commit(
	DBUG_RETURN(0);
}

/* The following defined-out code will be enabled later when we put the
/* TODO: put the
MySQL-4.1 functionality back to 5.0. This is needed to get InnoDB Hot Backup
to work. */
#if 0

/*********************************************************************
This is called when MySQL writes the binlog entry for the current
transaction. Writes to the InnoDB tablespace info which tells where the
@@ -1563,6 +1611,24 @@ innobase_report_binlog_offset_and_commit(
	trx->mysql_log_file_name = log_file_name;
	trx->mysql_log_offset = (ib_longlong)end_offset;

        if (thd->variables.sync_replication) {
                /* Let us store the binlog file name and the position, so that
                we know how long to wait for the binlog to the replicated to
                the slave in synchronous replication. */

                if (trx->repl_wait_binlog_name == NULL) {

                        trx->repl_wait_binlog_name =
                                        (char*)mem_alloc(FN_REFLEN + 100);
                }

                ut_a(strlen(log_file_name) <= FN_REFLEN + 100);

                strcpy(trx->repl_wait_binlog_name, log_file_name);

                trx->repl_wait_binlog_pos = (ib_longlong)end_offset;
        }

	trx->flush_log_later = TRUE;

	innobase_commit(thd, trx_handle);
@@ -1572,6 +1638,7 @@ innobase_report_binlog_offset_and_commit(
	return(0);
}

#if 0
/***********************************************************************
This function stores the binlog offset and flushes logs. */

@@ -1602,7 +1669,6 @@ innobase_store_binlog_offset_and_flush_log(
	/* Syncronous flush of the log buffer to disk */
	log_buffer_flush_to_disk();
}

#endif

/*********************************************************************
@@ -1615,7 +1681,10 @@ innobase_commit_complete(
                                /* out: 0 */
        THD*    thd)            /* in: user thread */
{
	struct timespec abstime;
	trx_t*	trx;
	int	cmp;
	int	ret;

        trx = (trx_t*) thd->ha_data[innobase_hton.slot];

@@ -1631,9 +1700,215 @@ innobase_commit_complete(
                trx_commit_complete_for_mysql(trx);
        }

        printf("Wait binlog name %s, repl state %lu\n",
                        trx->repl_wait_binlog_name,
                        (uint)innobase_repl_state);

        if (thd->variables.sync_replication
            && trx->repl_wait_binlog_name
            && innobase_repl_state != 0) {

                /* In synchronous replication, let us wait until the MySQL
                replication has sent the relevant binlog segment to the
                replication slave. */

/* TODO: Make sure MySQL uses some way (TCP_NODELAY?) to ensure that the data
has been received in the slave! */

                pthread_mutex_lock(&innobase_repl_cond_mutex);
try_again:
                if (innobase_repl_state == 0) {

                        pthread_mutex_unlock(&innobase_repl_cond_mutex);

                        return(0);
                }

                cmp = strcmp(innobase_repl_file_name,
                                        trx->repl_wait_binlog_name);
                if (cmp > 0
                    || (cmp == 0 && innobase_repl_pos
                                    >= (my_off_t)trx->repl_wait_binlog_pos)) {
                        /* We have already sent the relevant binlog to the
                        slave: no need to wait here */

                        pthread_mutex_unlock(&innobase_repl_cond_mutex);

/*                      printf("Binlog now sent\n"); */

                        return(0);
                }

                /* Let us update the info about the minimum binlog position
                of waiting threads in the innobase_repl_... variables */

                if (innobase_repl_wait_file_name_inited != 0) {
                        cmp = strcmp(trx->repl_wait_binlog_name,
                                        innobase_repl_wait_file_name);
                        if (cmp < 0
                            || (cmp == 0 && (my_off_t)trx->repl_wait_binlog_pos
                                         <= innobase_repl_wait_pos)) {
                                /* This thd has an even lower position, let
                                us update the minimum info */

                                strcpy(innobase_repl_wait_file_name,
                                        trx->repl_wait_binlog_name);

                                innobase_repl_wait_pos =
                                        trx->repl_wait_binlog_pos;
                        }
                } else {
                        strcpy(innobase_repl_wait_file_name,
                                                trx->repl_wait_binlog_name);

                        innobase_repl_wait_pos = trx->repl_wait_binlog_pos;

                        innobase_repl_wait_file_name_inited = 1;
                }
                set_timespec(abstime, thd->variables.sync_replication_timeout);

                /* Let us suspend this thread to wait on the condition;
                when replication has progressed far enough, we will release
                these waiting threads. The following call
                pthread_cond_timedwait also atomically unlocks
                innobase_repl_cond_mutex. */

                innobase_repl_n_wait_threads++;

/*              printf("Waiting for binlog to be sent\n"); */

                ret = pthread_cond_timedwait(&innobase_repl_cond,
                                        &innobase_repl_cond_mutex, &abstime);
                innobase_repl_n_wait_threads--;

                if (ret != 0) {
                        ut_print_timestamp(stderr);

                        fprintf(stderr,
"  InnoDB: Error: MySQL synchronous replication\n"
"InnoDB: was not able to send the binlog to the slave within the\n"
"InnoDB: timeout %lu. We assume that the slave has become inaccessible,\n"
"InnoDB: and switch off synchronous replication until the communication.\n"
"InnoDB: to the slave works again.\n",
				thd->variables.sync_replication_timeout);
                        fprintf(stderr,
"InnoDB: MySQL synchronous replication has sent binlog\n"
"InnoDB: to the slave up to file %s, position %lu\n", innobase_repl_file_name,
                                        (ulong)innobase_repl_pos);
                        fprintf(stderr,
"InnoDB: This transaction needs it to be sent up to\n"
"InnoDB: file %s, position %lu\n", trx->repl_wait_binlog_name,
                                        (uint)trx->repl_wait_binlog_pos);

                        innobase_repl_state = 0;

                        pthread_mutex_unlock(&innobase_repl_cond_mutex);

                        return(0);
                }

                goto try_again;
        }

	return(0);
}

/*********************************************************************
In synchronous replication, reports to InnoDB up to which binlog position
we have sent the binlog to the slave. Note that replication is synchronous
for one slave only. For other slaves, we do nothing in this function. This
function is used in a replication master. */

int
innobase_repl_report_sent_binlog(
/*=============================*/
                                /* out: 0 */
        THD*    thd,            /* in: thread doing the binlog communication to
                                the slave */
        char*   log_file_name,  /* in: binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file up to
                                which we sent the contents to the slave */
{
        int     cmp;
        ibool   can_release_threads     = 0;

        /* If synchronous replication is not switched on, or this thd is
        sending binlog to a slave where we do not need synchronous replication,
        then return immediately */

        if (thd->server_id != thd->variables.sync_replication_slave_id) {

                /* Do nothing */

                return(0);
        }

        pthread_mutex_lock(&innobase_repl_cond_mutex);

        if (innobase_repl_state == 0) {

                ut_print_timestamp(stderr);
                fprintf(stderr,
"  InnoDB: Switching MySQL synchronous replication on again at\n"
"InnoDB: binlog file %s, position %lu\n", log_file_name, (ulong)end_offset);

                innobase_repl_state = 1;
        }

        /* The position should increase monotonically, since just one thread
        is sending the binlog to the slave for which we want synchronous
        replication. Let us check this, and print an error to the .err log
        if that is not the case. */

        if (innobase_repl_file_name_inited) {
                cmp = strcmp(log_file_name, innobase_repl_file_name);

                if (cmp < 0
                    || (cmp == 0 && end_offset < innobase_repl_pos)) {

                        ut_print_timestamp(stderr);
                        fprintf(stderr,
"  InnoDB: Error: MySQL synchronous replication has sent binlog\n"
"InnoDB: to the slave up to file %s, position %lu\n", innobase_repl_file_name,
                                        (ulong)innobase_repl_pos);
                        fprintf(stderr,
"InnoDB: but now MySQL reports that it sent the binlog only up to\n"
"InnoDB: file %s, position %lu\n", log_file_name, (ulong)end_offset);

                }
        }

        strcpy(innobase_repl_file_name, log_file_name);
        innobase_repl_pos = end_offset;
        innobase_repl_file_name_inited = 1;

        if (innobase_repl_n_wait_threads > 0) {
                /* Let us check if some of the waiting threads doing a trx
                commit can now proceed */

                cmp = strcmp(innobase_repl_file_name,
                                        innobase_repl_wait_file_name);
                if (cmp > 0
                    || (cmp == 0 && innobase_repl_pos
                                    >= innobase_repl_wait_pos)) {

                        /* Yes, at least one waiting thread can now proceed:
                        let us release all waiting threads with a broadcast */

                        can_release_threads = 1;

                        innobase_repl_wait_file_name_inited = 0;
                }
        }

        pthread_mutex_unlock(&innobase_repl_cond_mutex);

        if (can_release_threads) {

                pthread_cond_broadcast(&innobase_repl_cond);
        }
}

/*********************************************************************
Rolls back a transaction or the latest SQL statement. */

+2 −0
Original line number Diff line number Diff line
@@ -321,3 +321,5 @@ int innobase_rollback_by_xid(
int innobase_xa_end(THD *thd);


int innobase_repl_report_sent_binlog(THD *thd, char *log_file_name,
                               my_off_t end_offset);
+57 −0
Original line number Diff line number Diff line
@@ -2411,3 +2411,60 @@ TYPELIB *ha_known_exts(void)
  }
  return &known_extensions;
}

/*
  Reports to table handlers up to which position we have sent the binlog
  to a slave in replication

  SYNOPSIS
    ha_repl_report_sent_binlog()

  NOTES
    Only works for InnoDB at the moment

  RETURN VALUE
    Always 0 (= success)  

  PARAMETERS
    THD    *thd             in: thread doing the binlog communication to
                                the slave
    char   *log_file_name   in: binlog file name
    my_off_t end_offset     in: the offset in the binlog file up to
                                which we sent the contents to the slave
*/

int ha_repl_report_sent_binlog(THD *thd, char *log_file_name,
                               my_off_t end_offset)
{
#ifdef HAVE_INNOBASE_DB
  return innobase_repl_report_sent_binlog(thd,log_file_name,end_offset);
#else
  /* remove warnings about unused parameters */
  thd=thd; log_file_name=log_file_name; end_offset=end_offset;
  return 0;
#endif
}

/*
  Reports to table handlers that we stop replication to a specific slave

  SYNOPSIS
    ha_repl_report_replication_stop()

  NOTES
    Does nothing at the moment

  RETURN VALUE
    Always 0 (= success)  

  PARAMETERS
    THD    *thd             in: thread doing the binlog communication to
                                the slave
*/

int ha_repl_report_replication_stop(THD *thd)
{
  thd = thd;

  return 0;
}
+5 −1
Original line number Diff line number Diff line
@@ -843,7 +843,7 @@ int ha_change_key_cache_param(KEY_CACHE *key_cache);
int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache);
int ha_end_key_cache(KEY_CACHE *key_cache);

/* weird stuff */
/* report to InnoDB that control passes to the client */
int ha_release_temporary_latches(THD *thd);

/* transactions: interface to handlerton functions */
@@ -875,3 +875,7 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht);
#define trans_need_2pc(thd, all)                   ((total_ha_2pc > 1) && \
        !((all ? &thd->transaction.all : &thd->transaction.stmt)->no_2pc))

/* semi-synchronous replication */
int ha_repl_report_sent_binlog(THD *thd, char *log_file_name,
                               my_off_t end_offset);
int ha_repl_report_replication_stop(THD *thd);
+0 −2
Original line number Diff line number Diff line
@@ -5495,7 +5495,6 @@ The minimum value for this variable is 4096.",
  {"sync-frm", OPT_SYNC_FRM, "Sync .frm to disk on create. Enabled by default.",
   (gptr*) &opt_sync_frm, (gptr*) &opt_sync_frm, 0, GET_BOOL, NO_ARG, 1, 0,
   0, 0, 0, 0},
#ifdef DOES_NOTHING_YET
  {"sync-replication", OPT_SYNC_REPLICATION,
   "Enable synchronous replication.",
   (gptr*) &global_system_variables.sync_replication,
@@ -5511,7 +5510,6 @@ The minimum value for this variable is 4096.",
   (gptr*) &global_system_variables.sync_replication_timeout,
   (gptr*) &global_system_variables.sync_replication_timeout,
   0, GET_ULONG, REQUIRED_ARG, 10, 0, ~0L, 0, 1, 0},
#endif
  {"table_cache", OPT_TABLE_CACHE,
   "The number of open tables for all threads.", (gptr*) &table_cache_size,
   (gptr*) &table_cache_size, 0, GET_ULONG, REQUIRED_ARG, 64, 1, 512*1024L,
Loading