Bug#37721: ORDER BY when WHERE contains non-partitioned (44630e09) · Commits · Software / OSDI20 Artifacts / mariadb

mysql-test/r/partition_innodb.result

+65 −0

Original line number	Diff line number	Diff line
		# Bug#37721, test of ORDER BY on PK and WHERE on INDEX
		CREATE TABLE t1 (
		a INT,
		b INT,
		PRIMARY KEY (a),
		INDEX (b))
		ENGINE InnoDB
		PARTITION BY HASH(a)
		PARTITIONS 3;
		INSERT INTO t1 VALUES (0,0),(4,0),(2,0);
		SELECT a FROM t1 WHERE b = 0 ORDER BY a ASC;
		a
		0
		2
		4
		SELECT a FROM t1 WHERE b = 0 ORDER BY a DESC;
		a
		4
		2
		0
		ALTER TABLE t1 DROP INDEX b;
		SELECT a FROM t1 WHERE b = 0 ORDER BY a ASC;
		a
		0
		2
		4
		SELECT a FROM t1 WHERE b = 0 ORDER BY a DESC;
		a
		4
		2
		0
		DROP TABLE t1;
		CREATE TABLE t1 (
		a VARCHAR(600),
		b VARCHAR(600),
		PRIMARY KEY (a),
		INDEX (b))
		ENGINE InnoDB
		PARTITION BY KEY(a)
		PARTITIONS 3;
		INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'1'),repeat('0',257));
		INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'3'),repeat('0',257));
		INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'2'),repeat('0',257));
		SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a ASC;
		right(a,1)
		1
		2
		3
		SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a DESC;
		right(a,1)
		3
		2
		1
		ALTER TABLE t1 DROP INDEX b;
		SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a ASC;
		right(a,1)
		1
		2
		3
		SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a DESC;
		right(a,1)
		3
		2
		1
		DROP TABLE t1;
		# Bug#32948
		CREATE TABLE t1 (c1 INT, PRIMARY KEY (c1)) ENGINE=INNODB;
		CREATE TABLE t2 (c1 INT, PRIMARY KEY (c1),

mysql-test/t/partition_innodb.test

+40 −0

Original line number	Diff line number	Diff line
		--source include/have_partition.inc
		--source include/have_innodb.inc

		#
		# Bug37721: ORDER BY when WHERE contains non-partitioned index column
		# wrong order since it did not use pk as second compare
		--echo # Bug#37721, test of ORDER BY on PK and WHERE on INDEX
		CREATE TABLE t1 (
		a INT,
		b INT,
		PRIMARY KEY (a),
		INDEX (b))
		ENGINE InnoDB
		PARTITION BY HASH(a)
		PARTITIONS 3;
		# This will give the middle partition the highest value
		INSERT INTO t1 VALUES (0,0),(4,0),(2,0);
		SELECT a FROM t1 WHERE b = 0 ORDER BY a ASC;
		SELECT a FROM t1 WHERE b = 0 ORDER BY a DESC;
		ALTER TABLE t1 DROP INDEX b;
		SELECT a FROM t1 WHERE b = 0 ORDER BY a ASC;
		SELECT a FROM t1 WHERE b = 0 ORDER BY a DESC;
		DROP TABLE t1;
		CREATE TABLE t1 (
		a VARCHAR(600),
		b VARCHAR(600),
		PRIMARY KEY (a),
		INDEX (b))
		ENGINE InnoDB
		PARTITION BY KEY(a)
		PARTITIONS 3;
		# This will give the middle partition the highest value
		INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'1'),repeat('0',257));
		INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'3'),repeat('0',257));
		INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'2'),repeat('0',257));
		SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a ASC;
		SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a DESC;
		ALTER TABLE t1 DROP INDEX b;
		SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a ASC;
		SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a DESC;
		DROP TABLE t1;

		#
		# Bug#32948 - FKs allowed to reference partitioned table
		#
		-- echo # Bug#32948

sql/ha_partition.cc

+40 −13

Original line number	Diff line number	Diff line
		@@ -239,7 +239,8 @@ void ha_partition::init_handler_variables()
		m_rec_length= 0;
		m_last_part= 0;
		m_rec0= 0;
		m_curr_key_info= 0;
		m_curr_key_info[0]= NULL;
		m_curr_key_info[1]= NULL;
		/*
		this allows blackhole to work properly
		*/
		@@ -3604,11 +3605,24 @@ int ha_partition::index_init(uint inx, bool sorted)
		handler **file;
		DBUG_ENTER("ha_partition::index_init");

		DBUG_PRINT("info", ("inx %u sorted %u", inx, sorted));
		active_index= inx;
		m_part_spec.start_part= NO_CURRENT_PART_ID;
		m_start_key.length= 0;
		m_ordered= sorted;
		m_curr_key_info= table->key_info+inx;
		m_curr_key_info[0]= table->key_info+inx;
		if (m_pkey_is_clustered && table->s->primary_key != MAX_KEY)
		{
		/*
		if PK is clustered, then the key cmp must use the pk to
		differentiate between equal key in given index.
		*/
		DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp"));
		m_curr_key_info[1]= table->key_info+table->s->primary_key;
		m_curr_key_info[2]= NULL;
		}
		else
		m_curr_key_info[1]= NULL;
		/*
		Some handlers only read fields as specified by the bitmap for the
		read set. For partitioned handlers we always require that the
		@@ -3633,9 +3647,13 @@ int ha_partition::index_init(uint inx, bool sorted)
		TODO: handle COUNT(*) queries via unordered scan.
		*/
		uint i;
		for (i= 0; i < m_curr_key_info->key_parts; i++)
		KEY **key_info= m_curr_key_info;
		do
		{
		for (i= 0; i < (*key_info)->key_parts; i++)
		bitmap_set_bit(table->read_set,
		m_curr_key_info->key_part[i].field->field_index);
		(*key_info)->key_part[i].field->field_index);
		} while (*(++key_info));
		}
		file= m_file;
		do
		@@ -3692,10 +3710,10 @@ int ha_partition::index_end()
		Read one record in an index scan and start an index scan

		SYNOPSIS
		index_read()
		index_read_map()
		buf Read row in MySQL Row Format
		key Key parts in consecutive order
		key_len Total length of key parts
		keypart_map Which part of key is used
		find_flag What type of key condition is used

		RETURN VALUE
		@@ -3703,12 +3721,12 @@ int ha_partition::index_end()
		0 Success

		DESCRIPTION
		index_read starts a new index scan using a start key. The MySQL Server
		index_read_map starts a new index scan using a start key. The MySQL Server
		will check the end key on its own. Thus to function properly the
		partitioned handler need to ensure that it delivers records in the sort
		order of the MySQL Server.
		index_read can be restarted without calling index_end on the previous
		index scan and without calling index_init. In this case the index_read
		index_read_map can be restarted without calling index_end on the previous
		index scan and without calling index_init. In this case the index_read_map
		is on the same index as the previous index_scan. This is particularly
		used in conjuntion with multi read ranges.
		*/
		@@ -3765,11 +3783,15 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key)
		DBUG_ENTER("ha_partition::common_index_read");
		LINT_INIT(key_len); /* used if have_start_key==TRUE */

		DBUG_PRINT("info", ("m_ordered %u m_ordered_scan_ong %u have_start_key %u",
		m_ordered, m_ordered_scan_ongoing, have_start_key));

		if (have_start_key)
		{
		m_start_key.length= key_len= calculate_key_len(table, active_index,
		m_start_key.key,
		m_start_key.keypart_map);
		DBUG_ASSERT(key_len);
		}
		if ((error= partition_scan_set_up(buf, have_start_key)))
		{
		@@ -3784,9 +3806,12 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key)
		reverse_order= TRUE;
		m_ordered_scan_ongoing= TRUE;
		}
		DBUG_PRINT("info", ("m_ordered %u m_o_scan_ong %u have_start_key %u",
		m_ordered, m_ordered_scan_ongoing, have_start_key));
		if (!m_ordered_scan_ongoing \|\|
		(have_start_key && m_start_key.flag == HA_READ_KEY_EXACT &&
		(key_len >= m_curr_key_info->key_length \|\| key_len == 0)))
		!m_pkey_is_clustered &&
		key_len >= m_curr_key_info[0]->key_length))
		{
		/*
		We use unordered index scan either when read_range is used and flag
		@@ -3799,6 +3824,8 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key)
		Need to set unordered scan ongoing since we can come here even when
		it isn't set.
		*/
		DBUG_PRINT("info", ("key_len %lu (%lu), doing unordered scan",
		key_len, m_curr_key_info[0]->key_length));
		m_ordered_scan_ongoing= FALSE;
		error= handle_unordered_scan_next_partition(buf);
		}
		@@ -3900,7 +3927,7 @@ int ha_partition::common_first_last(uchar *buf)
		Read last using key

		SYNOPSIS
		index_read_last()
		index_read_last_map()
		buf Read row in MySQL Row Format
		key Key
		keypart_map Which part of key is used
		@@ -4057,7 +4084,7 @@ int ha_partition::read_range_first(const key_range *start_key,
		(end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
		}

		range_key_part= m_curr_key_info->key_part;
		range_key_part= m_curr_key_info[0]->key_part;
		if (start_key)
		m_start_key= *start_key;
		else

sql/ha_partition.h

+10 −3

Original line number	Diff line number	Diff line
		@@ -75,7 +75,14 @@ class ha_partition :public handler
		partition_info *m_part_info; // local reference to partition
		Field **m_part_field_array; // Part field array locally to save acc
		uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan
		KEY *m_curr_key_info; // Current index
		/*
		Current index.
		When used in key_rec_cmp: If clustered pk, index compare
		must compare pk if given index is same for two rows.
		So normally m_curr_key_info[0]= current index and m_curr_key[1]= NULL,
		and if clustered pk, [0]= current index, [1]= pk, [2]= NULL
		*/
		KEY *m_curr_key_info[3]; // Current index
		uchar *m_rec0; // table->record[0]
		QUEUE m_queue; // Prio queue used by sorted read
		/*

sql/key.cc

+82 −62

Original line number	Diff line number	Diff line
		@@ -448,16 +448,9 @@ int key_cmp(KEY_PART_INFO key_part, const uchar key, uint key_length)
		}


		/*
		Compare two records in index order
		SYNOPSIS
		key_rec_cmp()
		key Index information
		rec0 Pointer to table->record[0]
		first_rec Pointer to record compare with
		second_rec Pointer to record compare against first_rec
		/**
		Compare two records in index order.

		DESCRIPTION
		This method is set-up such that it can be called directly from the
		priority queue and it is attempted to be optimised as much as possible
		since this will be called O(N * log N) times while performing a merge
		@@ -471,21 +464,43 @@ int key_cmp(KEY_PART_INFO key_part, const uchar key, uint key_length)
		We first check for NULL values, if there are no NULL values we use
		a compare method that gets two field pointers and a max length
		and return the result of the comparison.

		key is a null terminated array, since in some cases (clustered
		primary key) it must compare more than one index.

		@param key Null terminated array of index information
		@param first_rec Pointer to record compare with
		@param second_rec Pointer to record compare against first_rec

		@return Return value is SIGN(first_rec - second_rec)
		@retval 0 Keys are equal
		@retval -1 second_rec is greater than first_rec
		@retval +1 first_rec is greater than second_rec
		*/

		int key_rec_cmp(void key, uchar first_rec, uchar *second_rec)
		int key_rec_cmp(void key_p, uchar first_rec, uchar *second_rec)
		{
		KEY key_info= (KEY)key;
		uint key_parts= key_info->key_parts, i= 0;
		KEY key= (KEY) key_p;
		KEY key_info= (key++); // Start with first key
		uint key_parts, key_part_num;
		KEY_PART_INFO *key_part= key_info->key_part;
		uchar *rec0= key_part->field->ptr - key_part->offset;
		my_ptrdiff_t first_diff= first_rec - rec0, sec_diff= second_rec - rec0;
		int result= 0;
		Field *field;
		DBUG_ENTER("key_rec_cmp");

		/* loop over all given keys */
		do
		{
		Field *field= key_part->field;
		key_parts= key_info->key_parts;
		key_part= key_info->key_part;
		key_part_num= 0;

		/* loop over every key part */
		do
		{
		field= key_part->field;

		if (key_part->null_bit)
		{
		@@ -522,10 +537,15 @@ int key_rec_cmp(void key, uchar first_rec, uchar *second_rec)
		max length. The exceptions are the BLOB and VARCHAR field types
		that take the max length into account.
		*/
		result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff,
		key_part->length);
		if ((result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff,
		key_part->length)))
		DBUG_RETURN(result);
		next_loop:
		key_part++;
		} while (!result && ++i < key_parts);
		DBUG_RETURN(result);
		key_part_num++;
		} while (key_part_num < key_parts); /* this key is done */

		key_info= *(key++);
		} while (key_info); /* no more keys to test */
		DBUG_RETURN(0);
		}