Commit 9cf4750e authored by unknown's avatar unknown
Browse files

BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>)

In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)"
where N>1000, was incorrectly converted to
  (-inf < X < c_min) OR (c_max < X)
Now this conversion is removed, we dont produce any range lists for such
conditions.


mysql-test/r/range.result:
  BUG#21282: Testcase
mysql-test/t/range.test:
  BUG#21282: Testcase
sql/opt_range.cc:
  BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>) 
  In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)"
  where N>1000, was incorrectly converted to 
    (-inf < X < c_min) OR (c_max < X)
  Now this conversion is removed, we dont produce any range lists for such
  conditions.
parent 2f5ae7c5
Loading
Loading
Loading
Loading
+22 −0
Original line number Diff line number Diff line
@@ -838,3 +838,25 @@ select a, hex(filler) from t1 where a not between 'b' and 'b';
a	hex(filler)
a	0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
drop table t1,t2,t3;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, key(a));
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
set @a="select * from t2 force index (a) where a NOT IN(0";
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
count(*)
1000
set @a=concat(@a, ')');
insert into t2 values (11),(13),(15);
set @b= concat("explain ", @a);
prepare stmt1 from @b;
execute stmt1;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t2	index	a	a	5	NULL	1003	Using where; Using index
prepare stmt1 from @a;
execute stmt1;
a
11
13
15
drop table t1, t2;
+25 −0
Original line number Diff line number Diff line
@@ -656,3 +656,28 @@ explain select * from t1 where a not between 'b' and 'b';
select a, hex(filler) from t1 where a not between 'b' and 'b'; 

drop table t1,t2,t3;

#
# BUG#21282
#
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, key(a));
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;

set @a="select * from t2 force index (a) where a NOT IN(0";
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
set @a=concat(@a, ')');

insert into t2 values (11),(13),(15);

set @b= concat("explain ", @a);

prepare stmt1 from @b;
execute stmt1;

prepare stmt1 from @a;
execute stmt1;

drop table t1, t2;
# End of 5.0 tests
+46 −61
Original line number Diff line number Diff line
@@ -3608,41 +3608,33 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
      if (func->array && func->cmp_type != ROW_RESULT)
      {
        /*
          We get here for conditions in form "t.key NOT IN (c1, c2, ...)" 
          (where c{i} are constants).
          Our goal is to produce a SEL_ARG graph that represents intervals:
          We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
          where c{i} are constants. Our goal is to produce a SEL_TREE that 
          represents intervals:
          
          ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ...    (*)
          
          where $MIN is either "-inf" or NULL.
          
          The most straightforward way to handle NOT IN would be to convert
          it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range
          optimizer to build SEL_ARG graph from that. However that will cause
          the range optimizer to use O(N^2) memory (it's a bug, not filed),
          and people do use big NOT IN lists (see BUG#15872). Also, for big          
          NOT IN lists constructing/using graph (*) does not make the query
          faster.
          
          So, we will handle NOT IN manually in the following way:
          * if the number of entries in the NOT IN list is less then 
            NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*)
            manually.
          * Otherwise, we will construct a smaller graph: for 
            "t.key NOT IN (c1,...cN)" we construct a graph representing 
            ($MIN < t.key) OR (cN < t.key)  // here sequence of c_i is
                                            // ordered.

          A note about partially-covering indexes: for those (e.g. for 
          "a CHAR(10), KEY(a(5))") the handling is correct (albeit not very
          efficient):
          Instead of "t.key < c1" we get "t.key <= prefix-val(c1)".
          Combining the intervals in (*) together, we get:
          (-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ...
          i.e. actually we get intervals combined into one interval:
          (-inf<=t.key<=+inf). This doesn't make much sense but it doesn't
          cause any problems.
          The most straightforward way to produce it is to convert NOT IN
          into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
          analyzer to build SEL_TREE from that. The problem is that the
          range analyzer will use O(N^2) memory (which is probably a bug),
          and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
          will run out of memory.

          Another problem with big lists like (*) is that a big list is
          unlikely to produce a good "range" access, while considering that
          range access will require expensive CPU calculations (and for 
          MyISAM even index accesses). In short, big NOT IN lists are rarely
          worth analyzing.

          Considering the above, we'll handle NOT IN as follows:
          * if the number of entries in the NOT IN list is less than
            NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
          * Otherwise, don't produce a SEL_TREE.
        */
#define NOT_IN_IGNORE_THRESHOLD 1000
        MEM_ROOT *tmp_root= param->mem_root;
        param->thd->mem_root= param->old_root;
        /* 
@@ -3656,7 +3648,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
        Item *value_item= func->array->create_item();
        param->thd->mem_root= tmp_root;

        if (!value_item)
        if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
          break;

        /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval.  */
@@ -3677,10 +3669,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
          tree= NULL;
          break;
        }
#define NOT_IN_IGNORE_THRESHOLD 1000        
        SEL_TREE *tree2;
        if (func->array->count < NOT_IN_IGNORE_THRESHOLD)
        {
        for (; i < func->array->count; i++)
        {
          if (func->array->compare_elems(i, i-1))
@@ -3713,9 +3702,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
            tree= tree_or(param, tree, tree2);
          }
        }
        }
        else
          func->array->value_to_item(func->array->count - 1, value_item);
        
        if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
        {
@@ -3780,7 +3766,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
  }

  DBUG_RETURN(tree);

}

	/* make a select tree of all keys in condition */