Commit fc04692c authored by unknown's avatar unknown
Browse files

Many files:

  Allow mixing of different character sets for more SQL functions.
item_func.h:
  Allow mixing of different character sets for more SQL functions..


sql/item_cmpfunc.cc:
  Allow mixing of different character sets for more SQL functions.
sql/item_func.cc:
  Allow mixing of different character sets for more SQL functions.
sql/item_func.h:
  Allow mixing of different character sets for more SQL functions..
sql/item_strfunc.cc:
  Allow mixing of different character sets for more SQL functions.
sql/item.cc:
  Allow mixing of different character sets for more SQL functions.
sql/item.h:
  Allow mixing of different character sets for more SQL functions.
mysql-test/t/ctype_recoding.test:
  Allow mixing of different character sets for more SQL functions.
mysql-test/r/ctype_recoding.result:
  Allow mixing of different character sets for more SQL functions.
parent 0d92f0c7
Loading
Loading
Loading
Loading
+54 −0
Original line number Diff line number Diff line
@@ -186,3 +186,57 @@ select * from t1 where a=_latin1'
ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation '='
drop table t1;
set names latin1;
set names koi8r;
create table t1 (c1 char(10) character set cp1251);
insert into t1 values ('');
select c1 from t1 where c1 between '' and '';
c1

select ifnull(c1,''), ifnull(null,c1) from t1;
ifnull(c1,'')	ifnull(null,c1)
	
select if(1,c1,''), if(0,c1,'') from t1;
if(1,c1,'')	if(0,c1,'')
	
select coalesce('',c1), coalesce(null,c1) from t1;
coalesce('',c1)	coalesce(null,c1)
	
select least(c1,''), greatest(c1,'') from t1;
least(c1,'')	greatest(c1,'')
	
select locate(c1,''), locate('',c1) from t1;
locate(c1,'')	locate('',c1)
1	1
select field(c1,''),field('',c1) from t1;
field(c1,'')	field('',c1)
1	1
select concat(c1,''), concat('',c1) from t1;
concat(c1,'')	concat('',c1)
	
select concat_ws(c1,'',''), concat_ws('',c1,'') from t1;
concat_ws(c1,'','')	concat_ws('',c1,'')
	
select replace(c1,'',''), replace('',c1,'') from t1;
replace(c1,'','')	replace('',c1,'')
	
select substring_index(c1,'',2) from t1;
substring_index(c1,'',2)

select elt(1,c1,''),elt(1,'',c1) from t1;
elt(1,c1,'')	elt(1,'',c1)
	
select make_set(3,c1,''), make_set(3,'',c1) from t1;
make_set(3,c1,'')	make_set(3,'',c1)
,	,
select insert(c1,1,2,''),insert('',1,2,c1) from t1;
insert(c1,1,2,'')	insert('',1,2,c1)
	
select trim(c1 from ''),trim('' from c1) from t1;
trim(c1 from '')	trim('' from c1)
	
select lpad(c1,3,''), lpad('',3,c1) from t1;
lpad(c1,3,'')	lpad('',3,c1)
	
select rpad(c1,3,''), rpad('',3,c1) from t1;
rpad(c1,3,'')	rpad('',3,c1)
	
+26 −0
Original line number Diff line number Diff line
@@ -153,3 +153,29 @@ select * from t1 where a=_latin1'
drop table t1;
set names latin1;

#
# Check more automatic conversion
#
set names koi8r;
create table t1 (c1 char(10) character set cp1251);
insert into t1 values ('');
select c1 from t1 where c1 between '' and '';
select ifnull(c1,''), ifnull(null,c1) from t1;
select if(1,c1,''), if(0,c1,'') from t1;
select coalesce('',c1), coalesce(null,c1) from t1;
select least(c1,''), greatest(c1,'') from t1;
select locate(c1,''), locate('',c1) from t1;
select field(c1,''),field('',c1) from t1;
select concat(c1,''), concat('',c1) from t1;
select concat_ws(c1,'',''), concat_ws('',c1,'') from t1;
select replace(c1,'',''), replace('',c1,'') from t1;
select substring_index(c1,'',2) from t1;
select elt(1,c1,''),elt(1,'',c1) from t1;
select make_set(3,c1,''), make_set(3,'',c1) from t1;
select insert(c1,1,2,''),insert('',1,2,c1) from t1;
select trim(c1 from ''),trim('' from c1) from t1;
select lpad(c1,3,''), lpad('',3,c1) from t1;
select rpad(c1,3,''), rpad('',3,c1) from t1;
# TODO
#select case c1 when '' then '' when '' then '' else 'c' end from t1;
#select export_set(5,c1,''), export_set(5,'',c1) from t1;
+41 −0
Original line number Diff line number Diff line
@@ -205,6 +205,41 @@ bool Item::eq(const Item *item, bool binary_cmp) const
}


Item *Item::safe_charset_converter(CHARSET_INFO *tocs)
{
  /*
    Don't allow automatic conversion to non-Unicode charsets,
    as it potentially loses data.
  */
  if (!(tocs->state & MY_CS_UNICODE))
    return NULL; // safe conversion is not possible
  return new Item_func_conv_charset(this, tocs);
}


Item *Item_string::safe_charset_converter(CHARSET_INFO *tocs)
{
  Item_string *conv;
  uint conv_errors;
  String tmp, cstr, *ostr= val_str(&tmp);
  cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
  if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(),
                                             cstr.charset(),
                                             collation.derivation)))
  {
    /*
      Safe conversion is not possible (or EOM).
      We could not convert a string into the requested character set
      without data loss. The target charset does not cover all the
      characters from the string. Operation cannot be done correctly.
    */
    return NULL;
  }
  conv->str_value.copy();
  return conv;
}


bool Item_string::eq(const Item *item, bool binary_cmp) const
{
  if (type() == item->type())
@@ -723,6 +758,12 @@ String *Item_null::val_str(String *str)
}


Item *Item_null::safe_charset_converter(CHARSET_INFO *tocs)
{
  collation.set(tocs);
  return this;
}

/*********************** Item_param related ******************************/

/* 
+15 −3
Original line number Diff line number Diff line
@@ -39,13 +39,22 @@ enum Derivation

/*
  Flags for collation aggregation modes:
  allow conversion to a superset
  allow conversion of a coercible value (i.e. constant).
  MY_COLL_ALLOW_SUPERSET_CONV  - allow conversion to a superset
  MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
                                 (i.e. constant).
  MY_COLL_ALLOW_CONV           - allow any kind of conversion
                                 (combintion of the above two)
  MY_COLL_DISALLOW_NONE        - don't allow return DERIVATION_NONE
                                 (e.g. when aggregating for comparison)
  MY_COLL_CMP_CONV             - combination of MY_COLL_ALLOW_CONV
                                 and MY_COLL_DISALLOW_NONE
*/

#define MY_COLL_ALLOW_SUPERSET_CONV   1
#define MY_COLL_ALLOW_COERCIBLE_CONV  2

#define MY_COLL_ALLOW_CONV            3
#define MY_COLL_DISALLOW_NONE         4
#define MY_COLL_CMP_CONV              7

class DTCollation {
public:
@@ -302,6 +311,7 @@ class Item {
  Field *tmp_table_field_from_field_type(TABLE *table);

  virtual Item *neg_transformer(THD *thd) { return NULL; }
  virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
  void delete_self()
  {
    cleanup();
@@ -447,6 +457,7 @@ class Item_null :public Item
  Item *new_item() { return new Item_null(name); }
  bool is_null() { return 1; }
  void print(String *str) { str->append("NULL", 4); }
  Item *safe_charset_converter(CHARSET_INFO *tocs);
};


@@ -717,6 +728,7 @@ class Item_string :public Item
    return new Item_string(name, str_value.ptr(), 
    			   str_value.length(), &my_charset_bin);
  }
  Item *safe_charset_converter(CHARSET_INFO *tocs);
  String *const_string() { return &str_value; }
  inline void append(char *str, uint length) { str_value.append(str, length); }
  void print(String *str);
+9 −87
Original line number Diff line number Diff line
@@ -173,89 +173,11 @@ void Item_bool_func2::fix_length_and_dec()
  if (!args[0] || !args[1])
    return;

  /* 
    We allow to apply automatic character set conversion in some cases.
    The conditions when conversion is possible are:
    - arguments A and B have different charsets
    - A wins according to coercibility rules
      (i.e. a column is stronger than a string constant,
       an explicit COLLATE clause is stronger than a column)
    - character set of A is either superset for character set of B,
      or B is a string constant which can be converted into the
      character set of A without data loss.
    
    If all of the above is true, then it's possible to convert
    B into the character set of A, and then compare according
    to the collation of A.
  */

  uint32 dummy_offset;
  DTCollation coll;

  if (args[0]->result_type() == STRING_RESULT &&
      args[1]->result_type() == STRING_RESULT &&
      String::needs_conversion(0, args[0]->collation.collation,
                                  args[1]->collation.collation,
                                  &dummy_offset) &&
      !coll.set(args[0]->collation, args[1]->collation,
                MY_COLL_ALLOW_SUPERSET_CONV | 
                MY_COLL_ALLOW_COERCIBLE_CONV))
  {
    Item* conv= 0;
    Item_arena *arena= thd->current_arena, backup;
    uint strong= coll.strong;
    uint weak= strong ? 0 : 1;
    /*
      In case we're in statement prepare, create conversion item
      in its memory: it will be reused on each execute.
    */
    if (arena->is_stmt_prepare())
        thd->set_n_backup_item_arena(arena, &backup);
    if (args[weak]->type() == STRING_ITEM)
    {
      uint conv_errors; 
      String tmp, cstr, *ostr= args[weak]->val_str(&tmp);
      cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), 
                args[strong]->collation.collation, &conv_errors);
      if (conv_errors)
      {
        /* 
          We could not convert a string into the character set
          of the stronger side of the operation without data loss.
          It can happen if we tried to combine a column with a string
          constant, and the column charset does not cover all the
          characters from the string. Operation cannot be done
          correctly. Return an error.
        */
        my_coll_agg_error(args[0]->collation, args[1]->collation,
                          func_name());
      agg_arg_charsets(coll, args, 2, MY_COLL_CMP_CONV))
    return;
      }
      conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(),
                            args[weak]->collation.derivation);
      ((Item_string*)conv)->str_value.copy();
    }
    else
    {
      if (!(coll.collation->state & MY_CS_UNICODE))
      {
        /*
          Don't allow automatic conversion to non-Unicode charsets,
          as it potentially loses data.
        */
        my_coll_agg_error(args[0]->collation, args[1]->collation,
                          func_name());
        return;
      }
      conv= new Item_func_conv_charset(args[weak],
                                       args[strong]->collation.collation);
      conv->collation.set(args[weak]->collation.derivation);
      conv->fix_fields(thd, 0, &conv);
    }
    if (arena->is_stmt_prepare())
      thd->restore_backup_item_arena(arena, &backup);
    args[weak]= conv ? conv : args[weak];
  }
  
  // Make a special case of compare with fields to get nicer DATE comparisons

@@ -871,7 +793,7 @@ void Item_func_between::fix_length_and_dec()
    return;
  agg_cmp_type(&cmp_type, args, 3);
  if (cmp_type == STRING_RESULT &&
      agg_arg_collations_for_comparison(cmp_collation, args, 3))
      agg_arg_charsets(cmp_collation, args, 3, MY_COLL_CMP_CONV))
    return;

  /*
@@ -987,7 +909,7 @@ Item_func_ifnull::fix_length_and_dec()
  decimals=max(args[0]->decimals,args[1]->decimals);
  agg_result_type(&cached_result_type, args, 2);
  if (cached_result_type == STRING_RESULT)
    agg_arg_collations(collation, args, arg_count);
    agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV);
  else if (cached_result_type != REAL_RESULT)
    decimals= 0;
  
@@ -1083,7 +1005,7 @@ Item_func_if::fix_length_and_dec()
    agg_result_type(&cached_result_type, args+1, 2);
    if (cached_result_type == STRING_RESULT)
    {
      if (agg_arg_collations(collation, args+1, 2))
      if (agg_arg_charsets(collation, args+1, 2, MY_COLL_ALLOW_CONV))
      return;
    }
    else
@@ -1354,7 +1276,7 @@ void Item_func_case::fix_length_and_dec()
  
  agg_result_type(&cached_result_type, agg, nagg);
  if ((cached_result_type == STRING_RESULT) &&
      agg_arg_collations(collation, agg, nagg))
      agg_arg_charsets(collation, agg, nagg, MY_COLL_ALLOW_CONV))
    return;
  
  
@@ -1370,7 +1292,7 @@ void Item_func_case::fix_length_and_dec()
    nagg++;
    agg_cmp_type(&cmp_type, agg, nagg);
    if ((cmp_type == STRING_RESULT) &&
        agg_arg_collations_for_comparison(cmp_collation, agg, nagg))
        agg_arg_charsets(cmp_collation, agg, nagg, MY_COLL_CMP_CONV))
    return;
  }
  
@@ -1477,7 +1399,7 @@ void Item_func_coalesce::fix_length_and_dec()
    set_if_bigger(decimals,args[i]->decimals);
  }
  if (cached_result_type == STRING_RESULT)
    agg_arg_collations(collation, args, arg_count);
    agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV);
  else if (cached_result_type != REAL_RESULT)
    decimals= 0;
}
@@ -2423,7 +2345,7 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
  max_length= 1;
  decimals= 0;

  if (agg_arg_collations(cmp_collation, args, 2))
  if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV))
    return 1;

  used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
Loading