Commit 31169b80 authored by unknown's avatar unknown
Browse files

Merge spetrunia@bk-internal.mysql.com:/home/bk/mysql-5.0

into mysql.com:/home/psergey/mysql-5.0-bug19618

parents 6513fda8 3b168d1d
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -326,3 +326,20 @@ deallocate prepare s;
set @str=NULL;
drop table t2;
drop table t1;
create table t1 (
some_id smallint(5) unsigned,
key (some_id)
);
insert into t1 values (1),(2);
select some_id from t1 where some_id not in(2,-1);
some_id
1
select some_id from t1 where some_id not in(-4,-1,-4);
some_id
1
2
select some_id from t1 where some_id not in(-4,-1,3423534,2342342);
some_id
1
2
drop table t1;
+12 −0
Original line number Diff line number Diff line
@@ -220,3 +220,15 @@ set @str=NULL;
drop table t2;
drop table t1;

# BUG#19618: Crash in range optimizer for 
#   "unsigned_keypart NOT IN(negative_number,...)" 
#   (introduced in fix BUG#15872) 
create table t1 (
  some_id smallint(5) unsigned,
  key (some_id)
);
insert into t1 values (1),(2);
select some_id from t1 where some_id not in(2,-1);
select some_id from t1 where some_id not in(-4,-1,-4);
select some_id from t1 where some_id not in(-4,-1,3423534,2342342);
drop table t1;
+80 −29
Original line number Diff line number Diff line
@@ -3503,17 +3503,46 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,

    if (inv)
    {
      /*
        We get here for conditions like "t.keypart NOT IN (....)".
        
        If the IN-list contains only constants (and func->array is an ordered
        array of them), we construct the appropriate SEL_ARG tree manually, 
        because constructing it using the range analyzer (as 
        AND_i( t.keypart != c_i)) will cause lots of memory to be consumed
        (see BUG#15872). 
      */
      if (func->array && func->cmp_type != ROW_RESULT)
      {
        /*
          We get here for conditions in form "t.key NOT IN (c1, c2, ...)" 
          (where c{i} are constants).
          Our goal is to produce a SEL_ARG graph that represents intervals:
          
          ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ...    (*)
          
          where $MIN is either "-inf" or NULL.
          
          The most straightforward way to handle NOT IN would be to convert
          it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range
          optimizer to build SEL_ARG graph from that. However that will cause
          the range optimizer to use O(N^2) memory (it's a bug, not filed),
          and people do use big NOT IN lists (see BUG#15872). Also, for big          
          NOT IN lists constructing/using graph (*) does not make the query
          faster.
          
          So, we will handle NOT IN manually in the following way:
          * if the number of entries in the NOT IN list is less then 
            NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*)
            manually.
          * Otherwise, we will construct a smaller graph: for 
            "t.key NOT IN (c1,...cN)" we construct a graph representing 
            ($MIN < t.key) OR (cN < t.key)  // here sequence of c_i is
                                            // ordered.

          A note about partially-covering indexes: for those (e.g. for 
          "a CHAR(10), KEY(a(5))") the handling is correct (albeit not very
          efficient):
          Instead of "t.key < c1" we get "t.key <= prefix-val(c1)".
          Combining the intervals in (*) together, we get:
          (-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ...
          i.e. actually we get intervals combined into one interval:
          (-inf<=t.key<=+inf). This doesn't make much sense but it doesn't
          cause any problems.
        */
        MEM_ROOT *tmp_root= param->mem_root;
        param->thd->mem_root= param->old_root;
        /* 
          Create one Item_type constant object. We'll need it as
          get_mm_parts only accepts constant values wrapped in Item_Type
@@ -3522,25 +3551,35 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
          per-statement mem_root (while thd->mem_root is currently pointing
          to mem_root local to range optimizer).
        */
        MEM_ROOT *tmp_root= param->mem_root;
        param->thd->mem_root= param->old_root;
        Item *value_item= func->array->create_item();
        param->thd->mem_root= tmp_root;

        if (!value_item)
          break;
        
        /* Get a SEL_TREE for "-inf < X < c_0" interval */
        func->array->value_to_item(0, value_item);
        /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval.  */
        uint i=0;
        do 
        {
          func->array->value_to_item(i, value_item);
          tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
                             value_item, cmp_type);
          if (!tree)
            break;
          i++;
        } while (i < func->array->count && tree->type == SEL_TREE::IMPOSSIBLE);

        if (!tree || tree->type == SEL_TREE::IMPOSSIBLE)
        {
          /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */
          tree= NULL;
          break;
        }
#define NOT_IN_IGNORE_THRESHOLD 1000        
        SEL_TREE *tree2;
        if (func->array->count < NOT_IN_IGNORE_THRESHOLD)
        {
          for (uint i=1; i < func->array->count; i++)
          for (; i < func->array->count; i++)
          {
            if (func->array->compare_elems(i, i-1))
            {
@@ -3548,18 +3587,27 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
              func->array->value_to_item(i, value_item);
              tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
                                  value_item, cmp_type);
              if (!tree2)
              {
                tree= NULL;
                break;
              }

              /* Change all intervals to be "c_{i-1} < X < c_i" */
              for (uint idx= 0; idx < param->keys; idx++)
              {
                SEL_ARG *new_interval;
                if ((new_interval=  tree2->keys[idx]))
                SEL_ARG *new_interval, *last_val;
                if (((new_interval= tree2->keys[idx])) && 
                    ((last_val= tree->keys[idx]->last())))
                {
                  SEL_ARG *last_val= tree->keys[idx]->last();
                  new_interval->min_value= last_val->max_value;
                  new_interval->min_flag= NEAR_MIN;
                }
              }
              /* 
                The following doesn't try to allocate memory so no need to
                check for NULL.
              */
              tree= tree_or(param, tree, tree2);
            }
          }
@@ -3567,6 +3615,8 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
        else
          func->array->value_to_item(func->array->count - 1, value_item);
        
        if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
        {
          /* 
            Get the SEL_TREE for the last "c_last < X < +inf" interval 
            (value_item cotains c_last already)
@@ -3575,6 +3625,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
                              value_item, cmp_type);
          tree= tree_or(param, tree, tree2);
        }
      }
      else
      {
        tree= get_ne_mm_tree(param, cond_func, field,