Commit baae7a97 authored by unknown's avatar unknown
Browse files

BUG#18068: SELECT DISTINCT (with duplicates and covering index)

When converting DISTINCT to GROUP BY where the columns are from the covering
index and they are quoted twice in the SELECT list the optimizer is creating
improper processing sequence. This is because of the fact that the columns
of the covering index are not recognized as such and treated as non-index
columns.

Generally speaking duplicate columns can safely be removed from the GROUP
BY/DISTINCT list because this will not add or remove new rows in the
resulting set. Duplicates can be removed even if they are not consecutive
(as is the case for ORDER BY, where the duplicate columns can be removed
only if they are consecutive).

So we can safely transform "SELECT DISTINCT a,a FROM ... ORDER BY a" to
"SELECT a,a FROM ... GROUP BY a ORDER BY a" instead of 
"SELECT a,a FROM .. GROUP BY a,a ORDER BY a". We can even transform 
"SELECT DISTINCT a,b,a FROM ... ORDER BY a,b" to
"SELECT a,b,a FROM ... GROUP BY a,b ORDER BY a,b".

The fix to this bug consists of checking for duplicate columns in the SELECT
list when constructing the GROUP BY list in transforming DISTINCT to GROUP
BY and skipping the ones that are already in.


mysql-test/r/distinct.result:
  test case for the bug without loose index scan
mysql-test/r/group_min_max.result:
  test case for the bug
mysql-test/t/distinct.test:
  test case for the bug without loose index scan
mysql-test/t/group_min_max.test:
  test case for the bug
sql/sql_select.cc:
  duplicates check and removal
parent 33417297
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -533,3 +533,15 @@ select count(distinct concat(x,y)) from t1;
count(distinct concat(x,y))
2
drop table t1;
CREATE TABLE t1 (a INT, b INT, PRIMARY KEY (a,b));
INSERT INTO t1 VALUES (1, 101);
INSERT INTO t1 SELECT a + 1, a + 101 FROM t1;
INSERT INTO t1 SELECT a + 2, a + 102 FROM t1;
INSERT INTO t1 SELECT a + 4, a + 104 FROM t1;
INSERT INTO t1 SELECT a + 8, a + 108 FROM t1;
EXPLAIN SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t1	index	NULL	PRIMARY	8	NULL	16	Using where; Using index
SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
a	a
DROP TABLE t1;
+22 −0
Original line number Diff line number Diff line
@@ -2116,3 +2116,25 @@ COUNT(DISTINCT a)
1
DROP TABLE t1;
DROP PROCEDURE a;
CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a));
INSERT INTO t1 (a) VALUES 
(''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');
EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t1	range	NULL	PRIMARY	66	NULL	12	Using index for group-by
SELECT DISTINCT a,a FROM t1 ORDER BY a;
a	a
	
CENTRAL	CENTRAL
EASTERN	EASTERN
GREATER LONDON	GREATER LONDON
NORTH CENTRAL	NORTH CENTRAL
NORTH EAST	NORTH EAST
NORTH WEST	NORTH WEST
SCOTLAND	SCOTLAND
SOUTH EAST	SOUTH EAST
SOUTH WEST	SOUTH WEST
WESTERN	WESTERN
DROP TABLE t1;
+16 −0
Original line number Diff line number Diff line
@@ -382,3 +382,19 @@ INSERT INTO t1 VALUES
select count(distinct x,y) from t1;
select count(distinct concat(x,y)) from t1;
drop table t1;

#
# Bug #18068: SELECT DISTINCT
#
CREATE TABLE t1 (a INT, b INT, PRIMARY KEY (a,b));

INSERT INTO t1 VALUES (1, 101);
INSERT INTO t1 SELECT a + 1, a + 101 FROM t1;
INSERT INTO t1 SELECT a + 2, a + 102 FROM t1;
INSERT INTO t1 SELECT a + 4, a + 104 FROM t1;
INSERT INTO t1 SELECT a + 8, a + 108 FROM t1;

EXPLAIN SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;

DROP TABLE t1;
+16 −0
Original line number Diff line number Diff line
@@ -782,3 +782,19 @@ SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0;

DROP TABLE t1;
DROP PROCEDURE a;

#
# Bug #18068: SELECT DISTINCT
#

CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a));

INSERT INTO t1 (a) VALUES 
  (''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
  ('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
  ('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');

EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;  
SELECT DISTINCT a,a FROM t1 ORDER BY a;  

DROP TABLE t1;
+11 −0
Original line number Diff line number Diff line
@@ -12698,6 +12698,17 @@ create_distinct_group(THD *thd, Item **ref_pointer_array,
  {
    if (!item->const_item() && !item->with_sum_func && !item->marker)
    {
      /* 
        Don't put duplicate columns from the SELECT list into the 
        GROUP BY list.
      */
      ORDER *ord_iter;
      for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
        if ((*ord_iter->item)->eq(item, 1))
          break;
      if (ord_iter)
        continue;
      
      ORDER *ord=(ORDER*) thd->calloc(sizeof(ORDER));
      if (!ord)
	return 0;