Commit a739f2d6 authored by unknown's avatar unknown
Browse files

Allow to convert to non-Unicode charset when mixing a string

constant with a column. The string is converted into the column
character set. It conversion doesn't lose data, then operation
is possible. Otherwise, give an error, as it was earlier.


sql/item.h:
  Change bool argument to uint flags:
  we have now two different flags.
parent facda8f3
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -174,3 +174,15 @@ Warnings:
Warning	1265	Data truncated for column 'a' at row 1
Warning	1265	Data truncated for column 'b' at row 1
drop table t1;
set names koi8r;
create table t1 (a char(10) character set cp1251);
insert into t1 values (_koi8r'');
select * from t1 where a=_koi8r'';
a

select * from t1 where a=concat(_koi8r'');
ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (koi8r_general_ci,COERCIBLE) for operation '='
select * from t1 where a=_latin1'';
ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation '='
drop table t1;
set names latin1;
+22 −0
Original line number Diff line number Diff line
@@ -131,3 +131,25 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r);
insert into t1 values ('test','test');
insert into t1 values ('','');
drop table t1;

#
# Try to  apply an automatic conversion in some cases:
# E.g. when mixing a column to a string, the string
# is converted into the column character set.
# If conversion loses data, then error. Otherwise,
# the string is replaced by its converted representation
#
set names koi8r;
create table t1 (a char(10) character set cp1251);
insert into t1 values (_koi8r'');
# this is possible:
select * from t1 where a=_koi8r'';
# this is not possible, because we have a function, not just a constant:
--error 1267
select * from t1 where a=concat(_koi8r'');
# this is not posible, cannot convert _latin1'' into cp1251:
--error 1267
select * from t1 where a=_latin1'';
drop table t1;
set names latin1;
+63 −18
Original line number Diff line number Diff line
@@ -259,7 +259,43 @@ CHARSET_INFO *Item::default_charset()
  return current_thd->variables.collation_connection;
}

bool DTCollation::aggregate(DTCollation &dt, bool superset_conversion)

/*
   Aggregate two collations together taking
   into account their coercibility (aka derivation):

   0 == DERIVATION_EXPLICIT  - an explicitely written COLLATE clause
   1 == DERIVATION_NONE      - a mix of two different collations
   2 == DERIVATION_IMPLICIT  - a column
   3 == DERIVATION_COERCIBLE - a string constant

   The most important rules are:

   1. If collations are the same:
      chose this collation, and the strongest derivation.

   2. If collations are different:
     - Character sets may differ, but only if conversion without
       data loss is possible. The caller provides flags whether
       character set conversion attempts should be done. If no
       flags are substituted, then the character sets must be the same.
       Currently processed flags are:
         MY_COLL_ALLOW_SUPERSET_CONV  - allow conversion to a superset
         MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
     - two EXPLICIT collations produce an error, e.g. this is wrong:
       CONCAT(expr1 collate latin1_swedish_ci, expr2 collate latin1_german_ci)
     - the side with smaller derivation value wins,
       i.e. a column is stronger than a string constant,
       an explicit COLLATE clause is stronger than a column.
     - if derivations are the same, we have DERIVATION_NONE,
       we'll wait for an explicit COLLATE clause which possibly can
       come from another argument later: for example, this is valid,
       but we don't know yet when collecting the first two arguments:
         CONCAT(latin1_swedish_ci_column,
                latin1_german1_ci_column,
                expr COLLATE latin1_german2_ci)
*/
bool DTCollation::aggregate(DTCollation &dt, uint flags)
{
  nagg++;
  if (!my_charset_same(collation, dt.collation))
@@ -290,26 +326,35 @@ bool DTCollation::aggregate(DTCollation &dt, bool superset_conversion)
      else
       ; // Do nothing
    }
    else if (superset_conversion)
    {
      if (derivation < dt.derivation &&
    else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
             derivation < dt.derivation &&
             collation->state & MY_CS_UNICODE)
        ; // Do nothing
      else if (dt.derivation < derivation &&
    {
      // Do nothing
    }
    else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
             dt.derivation < derivation &&
             dt.collation->state & MY_CS_UNICODE)
    {
      set(dt);
      strong= nagg;
    }
      else
    else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) &&
             derivation < dt.derivation &&
             dt.derivation == DERIVATION_COERCIBLE)
    {
        // Cannot convert to superset
        set(0, DERIVATION_NONE);
        return 1;
      // Do nothing;
    }
    else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) &&
             dt.derivation < derivation &&
             derivation == DERIVATION_COERCIBLE)
    {
      set(dt);
      strong= nagg;
    }
    else
    {
      // Cannot apply conversion
      set(0, DERIVATION_NONE);
      return 1;
    }
+13 −3
Original line number Diff line number Diff line
@@ -37,6 +37,16 @@ enum Derivation
  DERIVATION_EXPLICIT= 0
};

/*
  Flags for collation aggregation modes:
  allow conversion to a superset
  allow conversion of a coercible value (i.e. constant).
*/

#define MY_COLL_ALLOW_SUPERSET_CONV   1
#define MY_COLL_ALLOW_COERCIBLE_CONV  2


class DTCollation {
public:
  CHARSET_INFO     *collation;
@@ -72,9 +82,9 @@ class DTCollation {
  { collation= collation_arg; }
  void set(Derivation derivation_arg)
  { derivation= derivation_arg; }
  bool aggregate(DTCollation &dt, bool superset_conversion= FALSE);
  bool set(DTCollation &dt1, DTCollation &dt2, bool superset_conversion= FALSE)
  { set(dt1); return aggregate(dt2, superset_conversion); }
  bool aggregate(DTCollation &dt, uint flags= 0);
  bool set(DTCollation &dt1, DTCollation &dt2, uint flags= 0)
  { set(dt1); return aggregate(dt2, flags); }
  const char *derivation_name() const
  {
    switch(derivation)
+70 −46
Original line number Diff line number Diff line
@@ -174,21 +174,21 @@ void Item_bool_func2::fix_length_and_dec()
    return;

  /* 
    We allow to convert to Unicode character sets in some cases.
    We allow to apply automatic character set conversion in some cases.
    The conditions when conversion is possible are:
    - arguments A and B have different charsets
    - A wins according to coercibility rules
    - character set of A is superset for character set of B
      (i.e. a column is stronger than a string constant,
       an explicit COLLATE clause is stronger than a column)
    - character set of A is either superset for character set of B,
      or B is a string constant which can be converted into the
      character set of A without data loss.
    
    If all of the above is true, then it's possible to convert
    B into the character set of A, and then compare according
    to the collation of A.
  */

  if (args[0] && args[1])
  {
    uint strong= 0;
    uint weak= 0;
  uint32 dummy_offset;
  DTCollation coll;

@@ -197,12 +197,14 @@ void Item_bool_func2::fix_length_and_dec()
      String::needs_conversion(0, args[0]->collation.collation,
                                  args[1]->collation.collation,
                                  &dummy_offset) &&
        !coll.set(args[0]->collation, args[1]->collation, TRUE))
      !coll.set(args[0]->collation, args[1]->collation,
                MY_COLL_ALLOW_SUPERSET_CONV | 
                MY_COLL_ALLOW_COERCIBLE_CONV))
  {
    Item* conv= 0;
    Item_arena *arena= thd->current_arena, backup;
      strong= coll.strong;
      weak= strong ? 0 : 1;
    uint strong= coll.strong;
    uint weak= strong ? 0 : 1;
    /*
      In case we're in statement prepare, create conversion item
      in its memory: it will be reused on each execute.
@@ -211,16 +213,40 @@ void Item_bool_func2::fix_length_and_dec()
        thd->set_n_backup_item_arena(arena, &backup);
    if (args[weak]->type() == STRING_ITEM)
    {
        String tmp, cstr;
        String *ostr= args[weak]->val_str(&tmp);
      uint conv_errors; 
      String tmp, cstr, *ostr= args[weak]->val_str(&tmp);
      cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), 
		  args[strong]->collation.collation);
                args[strong]->collation.collation, &conv_errors);
      if (conv_errors)
      {
        /* 
          We could not convert a string into the character set
          of the stronger side of the operation without data loss.
          It can happen if we tried to combine a column with a string
          constant, and the column charset does not cover all the
          characters from the string. Operation cannot be done
          correctly. Return an error.
        */
        my_coll_agg_error(args[0]->collation, args[1]->collation,
                          func_name());
        return;
      }
      conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(),
                            args[weak]->collation.derivation);
      ((Item_string*)conv)->str_value.copy();
    }
    else
    {
      if (!(coll.collation->state & MY_CS_UNICODE))
      {
        /*
          Don't allow automatic conversion to non-Unicode charsets,
          as it potentially loses data.
        */
        my_coll_agg_error(args[0]->collation, args[1]->collation,
                          func_name());
        return;
      }
      conv= new Item_func_conv_charset(args[weak],
                                       args[strong]->collation.collation);
      conv->collation.set(args[weak]->collation.derivation);
@@ -230,7 +256,6 @@ void Item_bool_func2::fix_length_and_dec()
      thd->restore_backup_item_arena(arena, &backup);
    args[weak]= conv ? conv : args[weak];
  }
  }
  
  // Make a special case of compare with fields to get nicer DATE comparisons

@@ -1782,14 +1807,13 @@ void Item_func_in::fix_length_and_dec()
      via creating Item_func_conv_charset().
    */

    if (agg_arg_collations_for_comparison(cmp_collation,
                                          args, arg_count, TRUE))
    if (agg_arg_collations_for_comparison(cmp_collation, args, arg_count,
                                          MY_COLL_ALLOW_SUPERSET_CONV))
      return;
    if ((!my_charset_same(args[0]->collation.collation, 
                          cmp_collation.collation) || !const_itm))
    {
      if (agg_arg_collations_for_comparison(cmp_collation,
                                            args, arg_count, FALSE))
      if (agg_arg_collations_for_comparison(cmp_collation, args, arg_count))
        return;
    }
    else
Loading