Commit bcd98d51 authored by unknown's avatar unknown
Browse files

BUG#18198: Fixes to handle VARCHAR strings properly

New methods to handle VARCHAR strings and CHAR's which are not
using a binary collation.
Indentation fixes
Now strings are run through strnxfrm before they are processed
by the partition function
We do not allow collations where strnxfrm expands the string since
we want the resulting string to fit in the same value range as
the original.


mysql-test/r/partition_range.result:
  New test cases
mysql-test/t/partition_range.test:
  New test cases
sql/partition_info.h:
  New methods to handle VARCHAR strings and CHAR's which are not
  using a binary collation.
sql/sql_partition.cc:
  New methods to handle VARCHAR strings and CHAR's which are not
  using a binary collation.
  Indentation fixes
  Now strings are run through strnxfrm before they are processed
  by the partition function
  We do not allow collations where strnxfrm expands the string since
  we want the resulting string to fit in the same value range as
  the original.
parent 04a70beb
Loading
Loading
Loading
Loading
+42 −0
Original line number Diff line number Diff line
@@ -709,3 +709,45 @@ WHERE (a >= '2004-07-01' AND a <= '2004-09-30') OR
id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t1	p407,p408,p409,p507,p508,p509	ALL	NULL	NULL	NULL	NULL	18	Using where
DROP TABLE t1;
create table t1 (a varchar(20))
partition by range (crc32(md5(a)))
(partition p0 values less than (100),
partition p1 values less than maxvalue);
insert into t1 values ("12345678901234567890");
insert into t1 values ("A2345678901234567890");
insert into t1 values ("B2345678901234567890");
insert into t1 values ("1234567890123456789");
insert into t1 values ("1234567890123456");
select * from t1;
a
12345678901234567890
A2345678901234567890
B2345678901234567890
1234567890123456789
1234567890123456
explain partitions select * from t1 where a = "12345678901234567890";
id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t1	p1	ALL	NULL	NULL	NULL	NULL	5	Using where
explain partitions select * from t1 where a = "12345678901234567890" OR
a = "A2345678901234567890" OR
a = "B2345678901234567890" OR
a = "C2345678901234567890";
id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t1	p1	ALL	NULL	NULL	NULL	NULL	5	Using where
explain partitions select * from t1 where a = "01234567890123456";
id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t1	p1	ALL	NULL	NULL	NULL	NULL	5	Using where
select * from t1 where a = "01234567890123456";
a
select * from t1 where a = "12345678901234567890" OR
a = "A2345678901234567890" OR
a = "B2345678901234567890" OR
a = "C2345678901234567890";
a
12345678901234567890
A2345678901234567890
B2345678901234567890
select * from t1 where a = "12345678901234567890";
a
12345678901234567890
drop table t1;
+30 −0
Original line number Diff line number Diff line
@@ -686,3 +686,33 @@ EXPLAIN PARTITIONS SELECT * from t1
WHERE (a >= '2004-07-01' AND a <= '2004-09-30') OR
      (a >= '2005-07-01' AND a <= '2005-09-30');
DROP TABLE t1;

#
# Bug 18198: Try with a couple of cases using VARCHAR fields in
#            partition function.
create table t1 (a varchar(20))
partition by range (crc32(md5(a)))
(partition p0 values less than (100),
 partition p1 values less than maxvalue);

insert into t1 values ("12345678901234567890");
insert into t1 values ("A2345678901234567890");
insert into t1 values ("B2345678901234567890");
insert into t1 values ("1234567890123456789");
insert into t1 values ("1234567890123456");
select * from t1;
explain partitions select * from t1 where a = "12345678901234567890";
explain partitions select * from t1 where a = "12345678901234567890" OR
                                          a = "A2345678901234567890" OR
                                          a = "B2345678901234567890" OR
                                          a = "C2345678901234567890";
explain partitions select * from t1 where a = "01234567890123456";
select * from t1 where a = "01234567890123456";
select * from t1 where a = "12345678901234567890" OR
                       a = "A2345678901234567890" OR
                       a = "B2345678901234567890" OR
                       a = "C2345678901234567890";
select * from t1 where a = "12345678901234567890";


drop table t1;
+27 −2
Original line number Diff line number Diff line
@@ -61,6 +61,16 @@ class partition_info : public Sql_alloc
  */
  get_subpart_id_func get_subpartition_id;

  /*
    When we have various string fields we might need some preparation
    before and clean-up after calling the get_part_id_func's. We need
    one such method for get_partition_id and one for
    get_part_partition_id and one for get_subpartition_id.
  */
  get_part_id_func get_partition_id_charset;
  get_part_id_func get_part_partition_id_charset;
  get_subpart_id_func get_subpartition_id_charset;

  /* NULL-terminated array of fields used in partitioned expression */
  Field **part_field_array;
  /* NULL-terminated array of fields used in subpartitioned expression */
@@ -72,6 +82,16 @@ class partition_info : public Sql_alloc
  */
  Field **full_part_field_array;

  /*
    When we have a field that requires transformation before calling the
    partition functions we must allocate field buffers for the field of
    the fields in the partition function.
  */
  char **part_field_buffers;
  char **subpart_field_buffers;
  char **restore_part_field_ptrs;
  char **restore_subpart_field_ptrs;

  Item *part_expr;
  Item *subpart_expr;

@@ -188,6 +208,8 @@ class partition_info : public Sql_alloc
  bool is_auto_partitioned;
  bool from_openfrm;
  bool has_null_value;
  bool includes_charset_field_part;
  bool includes_charset_field_subpart;


  partition_info()
@@ -195,6 +217,8 @@ class partition_info : public Sql_alloc
    get_subpartition_id(NULL),
    part_field_array(NULL), subpart_field_array(NULL),
    full_part_field_array(NULL),
    part_field_buffers(NULL), subpart_field_buffers(NULL),
    restore_part_field_ptrs(NULL), restore_subpart_field_ptrs(NULL),
    part_expr(NULL), subpart_expr(NULL), item_free_list(NULL),
    first_log_entry(NULL), exec_log_entry(NULL), frm_log_entry(NULL),
    list_array(NULL),
@@ -217,7 +241,8 @@ class partition_info : public Sql_alloc
    list_of_part_fields(FALSE), list_of_subpart_fields(FALSE),
    linear_hash_ind(FALSE), fixed(FALSE),
    is_auto_partitioned(FALSE), from_openfrm(FALSE),
    has_null_value(FALSE)
    has_null_value(FALSE), includes_charset_field_part(FALSE),
    includes_charset_field_subpart(FALSE)
  {
    all_fields_in_PF.clear_all();
    all_fields_in_PPF.clear_all();
+387 −30
Original line number Diff line number Diff line
@@ -62,6 +62,22 @@ static const char *end_paren_str= ")";
static const char *begin_paren_str= "(";
static const char *comma_str= ",";

static int get_part_id_charset_func_all(partition_info *part_info,
                                        uint32 *part_id,
                                        longlong *func_value);
static int get_part_id_charset_func_part(partition_info *part_info,
                                         uint32 *part_id,
                                         longlong *func_value);
static int get_part_id_charset_func_subpart(partition_info *part_info,
                                            uint32 *part_id,
                                            longlong *func_value);
static int get_part_part_id_charset_func(partition_info *part_info,
                                         uint32 *part_id,
                                         longlong *func_value);
static uint32 get_subpart_id_charset_func(partition_info *part_info);
int get_partition_id_list(partition_info *part_info,
                          uint32 *part_id,
                          longlong *func_value);
int get_partition_id_list(partition_info *part_info,
                          uint32 *part_id,
                          longlong *func_value);
@@ -1311,6 +1327,34 @@ static void set_up_partition_func_pointers(partition_info *part_info)
      }
    }
  }
  if (part_info->includes_charset_field_part ||
      part_info->includes_charset_field_subpart)
  {
    DBUG_ASSERT(part_info->get_partition_id);
    part_info->get_partition_id_charset= part_info->get_partition_id;
    if (part_info->includes_charset_field_part &&
        part_info->includes_charset_field_subpart)
      part_info->get_partition_id= get_part_id_charset_func_all;
    else if (part_info->includes_charset_field_part)
      part_info->get_partition_id= get_part_id_charset_func_part;
    else
      part_info->get_partition_id= get_part_id_charset_func_subpart;
  }
  if (part_info->includes_charset_field_part &&
      part_info->is_sub_partitioned())
  {
    DBUG_ASSERT(part_info->get_part_partition_id);
    part_info->get_part_partition_id_charset=
          part_info->get_part_partition_id;
    part_info->get_part_partition_id= get_part_part_id_charset_func;
  }
  if (part_info->includes_charset_field_subpart)
  {
    DBUG_ASSERT(part_info->get_subpartition_id);
    part_info->get_subpartition_id_charset=
          part_info->get_subpartition_id;
    part_info->get_subpartition_id= get_subpart_id_charset_func;
  }
  DBUG_VOID_RETURN;
}

@@ -1377,16 +1421,24 @@ static uint32 get_part_id_from_linear_hash(longlong hash_value, uint mask,
  character sets and collations.
  SYNOPSIS
    check_part_func_fields()
    part_info                           Partition info
    ptr                                 Array of Field pointers
    ok_with_charsets                    Will we report allowed charset
                                        fields as ok
  RETURN VALUES
    FALSE                               Success
    TRUE                                Error
  DESCRIPTION
    We will check in this routine that the fields of the partition functions
    do not contain unallowed parts. It can also be used to check if there
    are fields that require special care by calling my_strnxfrm before
    calling the functions to calculate partition id.
*/

static bool check_part_func_fields(Field **ptr)
static bool check_part_func_fields(Field **ptr, bool ok_with_charsets)
{
  Field *field;
  DBUG_ENTER("check_part_func_field");

  while ((field= *(ptr++)))
  {
    /*
@@ -1400,13 +1452,121 @@ static bool check_part_func_fields(Field **ptr)
      CHARSET_INFO *cs= ((Field_str*)field)->charset();
      if (field->type() == MYSQL_TYPE_STRING &&
          cs->state & MY_CS_BINSORT)
        return FALSE;
      return TRUE;
      {
        DBUG_RETURN(FALSE);
      }
      if (!ok_with_charsets ||
          cs->mbmaxlen > 1 ||
          cs->strxfrm_multiply > 1)
      {
        DBUG_RETURN(TRUE);
      }
      DBUG_RETURN(FALSE);
    }
  return FALSE;
  }
  DBUG_RETURN(FALSE);
}

/*
  Set up buffers and arrays for fields requiring preparation
  SYNOPSIS
    set_up_charset_field_preps()
    part_info                        Partition info object
  RETURN VALUES
    TRUE                             Memory Allocation error
    FALSE                            Success
  DESCRIPTION
    Set up arrays and buffers for fields that require special care for
    calculation of partition id. This is used for string fields with
    variable length or string fields with fixed length that isn't using
    the binary collation.
*/

static bool set_up_charset_field_preps(partition_info *part_info)
{
  Field *field, **ptr;
  char *field_buf;
  char **char_ptrs;
  unsigned i;
  size_t size;

  DBUG_ENTER("set_up_charset_field_preps");
  if (check_part_func_fields(part_info->part_field_array, FALSE))
  {
    ptr= part_info->part_field_array;
    part_info->includes_charset_field_part= TRUE;
    /*
      Set up arrays and buffers for those fields
    */
    i= 0;
    while ((field= *(ptr++)))
      i++;
    size= i * sizeof(char*);

    if (!(char_ptrs= (char**)sql_calloc(size)))
      goto error;
    part_info->part_field_buffers= char_ptrs;

    if (!(char_ptrs= (char**)sql_calloc(size)))
      goto error;
    part_info->restore_part_field_ptrs= char_ptrs;

    ptr= part_info->part_field_array;
    i= 0;
    while ((field= *(ptr++)))
    {
      CHARSET_INFO *cs= ((Field_str*)field)->charset();
      size= field->pack_length();
      if (!(field_buf= sql_calloc(size)))
        goto error;
      part_info->part_field_buffers[i++]= field_buf;
    }
  }
  if (part_info->is_sub_partitioned() &&
      check_part_func_fields(part_info->subpart_field_array, FALSE))
  {
    /*
      Set up arrays and buffers for those fields 
    */
    part_info->includes_charset_field_subpart= TRUE;

    ptr= part_info->subpart_field_array;
    i= 0;
    while ((field= *(ptr++)))
    {
      unsigned j= 0;
      Field *part_field;
      Field **part_ptr= part_info->part_field_array;
      bool field_already_have_buffer= FALSE;
      CHARSET_INFO *cs= ((Field_str*)field)->charset();
      size= field->pack_length();

      while ((part_field= *(part_ptr++)))
      {
        field_buf= part_info->part_field_buffers[j++];
        if (field == part_field)
        {
          field_already_have_buffer= TRUE;
          break;
        }
      }
      if (!field_already_have_buffer)
      {
        if (!(field_buf= sql_calloc(size)))
          goto error;
      }
      part_info->subpart_field_buffers[i++]= field_buf;
    }
    size= i * sizeof(char*);
    if (!(char_ptrs= (char**)sql_calloc(i * sizeof(char*))))
      goto error;
    part_info->restore_subpart_field_ptrs= char_ptrs;
  }
  DBUG_RETURN(FALSE);
error:
  mem_alloc_error(size);
  DBUG_RETURN(TRUE);
}

/*
  fix partition functions
@@ -1555,10 +1715,10 @@ bool fix_partition_func(THD *thd, TABLE *table,
  }
  if (((part_info->part_type != HASH_PARTITION ||
      part_info->list_of_part_fields == FALSE) &&
      check_part_func_fields(part_info->part_field_array)) ||
      check_part_func_fields(part_info->part_field_array, TRUE)) ||
      (part_info->list_of_part_fields == FALSE &&
       part_info->is_sub_partitioned() &&
       check_part_func_fields(part_info->subpart_field_array)))
       check_part_func_fields(part_info->subpart_field_array, TRUE)))
  {
    my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0));
    goto end;
@@ -1573,6 +1733,11 @@ bool fix_partition_func(THD *thd, TABLE *table,
    goto end;
  if (unlikely(set_up_partition_bitmap(thd, part_info)))
    goto end;
  if (unlikely(set_up_charset_field_preps(part_info)))
  {
    my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0));
    goto end;
  }
  check_range_capable_PF(table);
  set_up_partition_key_maps(table, part_info);
  set_up_partition_func_pointers(part_info);
@@ -2289,6 +2454,86 @@ static uint32 get_part_id_linear_key(partition_info *part_info,
                                           no_parts));
}

/*
  Copy to field buffers and set up field pointers
  SYNOPSIS
    copy_to_part_field_buffers()
    ptr                          Array of fields to copy
  RETURN VALUES
    NONE
  DESCRIPTION
    This routine is used to take the data from field pointer, convert
    it to a standard format and store this format in a field buffer
    allocated for this purpose. Next the field pointers are moved to
    point to the field buffers. There is a separate to restore the
    field pointers after this call.
*/

static void copy_to_part_field_buffers(Field **ptr,
                                       char **field_bufs,
                                       char **restore_ptr)
{
  Field *field;
  while ((field= *(ptr++)))
  {
    *restore_ptr= field->ptr;
    restore_ptr++;
    if ((field->type() == MYSQL_TYPE_VARCHAR ||
         (field->type() == MYSQL_TYPE_STRING &&
         (!(((Field_str*)field)->charset()->state & MY_CS_BINSORT))) &&
        ((!field->maybe_null()) || (!field->is_null()))))
    {
      CHARSET_INFO *cs= ((Field_str*)field)->charset();
      uint len= field->pack_length();
      char *field_buf= *field_bufs;
      /*
         We only use the field buffer for VARCHAR and CHAR strings
         which isn't of a binary collation. We also only use the
         field buffer for fields which are not currently NULL.
         The field buffer will store a normalised string. We use
         the strnxfrm method to normalise the string.
       */
      if (field->type() == MYSQL_TYPE_VARCHAR)
      {
        uint len_bytes= ((Field_varstring*)field)->length_bytes;
        my_strnxfrm(cs, (uchar*)(field_buf + len_bytes), (len - len_bytes),
                    (uchar*)(field->ptr + len_bytes), field->field_length);
        if (len_bytes == 1)
          *field_buf= (uchar)field->field_length;
        else
          int2store(field_buf, field->field_length);
      }
      else
      {
        my_strnxfrm(cs, (uchar*)field_buf, len,
                    (uchar*)field->ptr, field->field_length);
      }
      field->ptr= field_buf;
    }
    field_bufs++;
  }
  return;
}

/*
  Restore field pointers
  SYNOPSIS
    restore_part_field_pointers()
    ptr                            Array of fields to restore
  RETURN VALUES
    NONE
*/

static void restore_part_field_pointers(Field **ptr, char **restore_ptr)
{
  Field *field;
  while ((field= *(ptr++)))
  {
    field->ptr= *restore_ptr;
    restore_ptr++;
  }
  return;
}
/*
  This function is used to calculate the partition id where all partition
  fields have been prepared to point to a record where the partition field
@@ -2299,6 +2544,7 @@ static uint32 get_part_id_linear_key(partition_info *part_info,
    part_info           A reference to the partition_info struct where all the
                        desired information is given
    out:part_id         The partition id is returned through this pointer
    out: func_value     Value of partition function (longlong)

  RETURN VALUE
    part_id                     Partition id of partition that would contain
@@ -2342,6 +2588,7 @@ static uint32 get_part_id_linear_key(partition_info *part_info,
    part_info           A reference to the partition_info struct where all the
                        desired information is given
    out:part_id         The partition id is returned through this pointer
    out: func_value     The value calculated by partition function

  RETURN VALUE
    part_id                     Partition id of partition that would contain
@@ -2363,6 +2610,78 @@ static uint32 get_part_id_linear_key(partition_info *part_info,
    get_partition_id_linear_key_nosub
*/

static int get_part_id_charset_func_subpart(partition_info *part_info,
                                            uint32 *part_id,
                                            longlong *func_value)
{
  int res;
  copy_to_part_field_buffers(part_info->subpart_field_array,
                             part_info->subpart_field_buffers,
                             part_info->restore_subpart_field_ptrs);
  res= part_info->get_partition_id_charset(part_info, part_id, func_value);
  restore_part_field_pointers(part_info->subpart_field_array,
                              part_info->restore_subpart_field_ptrs);
  return res;
}
static int get_part_id_charset_func_part(partition_info *part_info,
                                         uint32 *part_id,
                                         longlong *func_value)
{
  int res;
  copy_to_part_field_buffers(part_info->part_field_array,
                             part_info->part_field_buffers,
                             part_info->restore_part_field_ptrs);
  res= part_info->get_partition_id_charset(part_info, part_id, func_value);
  restore_part_field_pointers(part_info->part_field_array,
                              part_info->restore_part_field_ptrs);
  return res;
}

static int get_part_id_charset_func_all(partition_info *part_info,
                                        uint32 *part_id,
                                        longlong *func_value)
{
  int res;
  copy_to_part_field_buffers(part_info->part_field_array,
                             part_info->part_field_buffers,
                             part_info->restore_part_field_ptrs);
  copy_to_part_field_buffers(part_info->subpart_field_array,
                             part_info->subpart_field_buffers,
                             part_info->restore_subpart_field_ptrs);
  res= part_info->get_partition_id_charset(part_info, part_id, func_value);
  restore_part_field_pointers(part_info->part_field_array,
                              part_info->restore_part_field_ptrs);
  restore_part_field_pointers(part_info->subpart_field_array,
                              part_info->restore_subpart_field_ptrs);
  return res;
}

static int get_part_part_id_charset_func(partition_info *part_info,
                                         uint32 *part_id,
                                         longlong *func_value)
{
  int res;
  copy_to_part_field_buffers(part_info->part_field_array,
                             part_info->part_field_buffers,
                             part_info->restore_part_field_ptrs);
  res= part_info->get_part_partition_id_charset(part_info,
                                                part_id, func_value);
  restore_part_field_pointers(part_info->part_field_array,
                              part_info->restore_part_field_ptrs);
  return res;
}

static uint32 get_subpart_id_charset_func(partition_info *part_info)
{
  int res;
  copy_to_part_field_buffers(part_info->subpart_field_array,
                             part_info->subpart_field_buffers,
                             part_info->restore_subpart_field_ptrs);
  res= part_info->get_subpartition_id_charset(part_info);
  restore_part_field_pointers(part_info->subpart_field_array,
                              part_info->restore_subpart_field_ptrs);
  return res;
}

int get_partition_id_list(partition_info *part_info,
                          uint32 *part_id,
@@ -2451,6 +2770,21 @@ int get_partition_id_list(partition_info *part_info,
    The edge of corresponding sub-array of part_info->list_array
*/

uint32 get_list_array_idx_for_endpoint_charset(partition_info *part_info,
                                               bool left_endpoint,
                                               bool include_endpoint)
{
  uint32 res;
  copy_to_part_field_buffers(part_info->part_field_array,
                             part_info->part_field_buffers,
                             part_info->restore_part_field_ptrs);
  res= get_list_array_idx_for_endpoint(part_info, left_endpoint,
                                       include_endpoint);
  restore_part_field_pointers(part_info->part_field_array,
                              part_info->restore_part_field_ptrs);
  return res;
}

uint32 get_list_array_idx_for_endpoint(partition_info *part_info,
                                       bool left_endpoint,
                                       bool include_endpoint)
@@ -2580,6 +2914,22 @@ int get_partition_id_range(partition_info *part_info,
    The edge of corresponding part_info->range_int_array sub-array.
*/

static uint32
get_partition_id_range_for_endpoint_charset(partition_info *part_info,
                                            bool left_endpoint,
                                            bool include_endpoint)
{
  uint32 res;
  copy_to_part_field_buffers(part_info->part_field_array,
                             part_info->part_field_buffers,
                             part_info->restore_part_field_ptrs);
  res= get_partition_id_range_for_endpoint(part_info, left_endpoint,
                                           include_endpoint);
  restore_part_field_pointers(part_info->part_field_array,
                              part_info->restore_part_field_ptrs);
  return res;
}

uint32 get_partition_id_range_for_endpoint(partition_info *part_info,
                                           bool left_endpoint,
                                           bool include_endpoint)
@@ -6420,12 +6770,19 @@ int get_part_iter_for_interval_via_mapping(partition_info *part_info,

  if (part_info->part_type == RANGE_PARTITION)
  {
    if (part_info->includes_charset_field_part)
      get_endpoint=        get_partition_id_range_for_endpoint_charset;
    else
      get_endpoint=        get_partition_id_range_for_endpoint;
    max_endpoint_val=    part_info->no_parts;
    part_iter->get_next= get_next_partition_id_range;
  }
  else if (part_info->part_type == LIST_PARTITION)
  {

    if (part_info->includes_charset_field_part)
      get_endpoint=        get_list_array_idx_for_endpoint_charset;
    else
      get_endpoint=        get_list_array_idx_for_endpoint;
    max_endpoint_val=    part_info->no_list_values;
    part_iter->get_next= get_next_partition_id_list;