Commit 5d1ccce5 authored by svoj@mysql.com/june.mysql.com's avatar svoj@mysql.com/june.mysql.com
Browse files

BUG#31159 - fulltext search on ucs2 column crashes server

ucs2 doesn't provide required by fulltext ctype array. Crash
happens because fulltext attempts to use unitialized ctype
array.

Fixed by converting ucs2 fields to compatible utf8 analogue.
parent ab95dad7
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -784,6 +784,8 @@ extern CHARSET_INFO *get_charset(uint cs_number, myf flags);
extern CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags);
extern CHARSET_INFO *get_charset_by_csname(const char *cs_name,
					   uint cs_flags, myf my_flags);
extern CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO
                                                       *original_cs);
extern void free_charsets(void);
extern char *get_charsets_dir(char *buf);
extern my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2);
+6 −0
Original line number Diff line number Diff line
@@ -803,4 +803,10 @@ quote(name)
????????
????????????????
drop table bug20536;
CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci);
INSERT INTO t1 VALUES('abcd');
SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
a
abcd
DROP TABLE t1;
End of 4.1 tests
+8 −0
Original line number Diff line number Diff line
@@ -535,4 +535,12 @@ select quote(name) from bug20536;

drop table bug20536;

#
# BUG#31159 - fulltext search on ucs2 column crashes server
#
CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci);
INSERT INTO t1 VALUES('abcd');
SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
DROP TABLE t1;

--echo End of 4.1 tests
+40 −0
Original line number Diff line number Diff line
@@ -673,3 +673,43 @@ CHARSET_INFO *fs_character_set()
  return fs_cset_cache;
}
#endif


/**
  @brief Find compatible character set with ctype.

  @param[in] original_cs Original character set

  @note
    128 my_charset_ucs2_general_uca      ->192 my_charset_utf8_general_uca_ci
    129 my_charset_ucs2_icelandic_uca_ci ->193 my_charset_utf8_icelandic_uca_ci
    130 my_charset_ucs2_latvian_uca_ci   ->194 my_charset_utf8_latvian_uca_ci
    131 my_charset_ucs2_romanian_uca_ci  ->195 my_charset_utf8_romanian_uca_ci
    132 my_charset_ucs2_slovenian_uca_ci ->196 my_charset_utf8_slovenian_uca_ci
    133 my_charset_ucs2_polish_uca_ci    ->197 my_charset_utf8_polish_uca_ci
    134 my_charset_ucs2_estonian_uca_ci  ->198 my_charset_utf8_estonian_uca_ci
    135 my_charset_ucs2_spanish_uca_ci   ->199 my_charset_utf8_spanish_uca_ci
    136 my_charset_ucs2_swedish_uca_ci   ->200 my_charset_utf8_swedish_uca_ci
    137 my_charset_ucs2_turkish_uca_ci   ->201 my_charset_utf8_turkish_uca_ci
    138 my_charset_ucs2_czech_uca_ci     ->202 my_charset_utf8_czech_uca_ci
    139 my_charset_ucs2_danish_uca_ci    ->203 my_charset_utf8_danish_uca_ci
    140 my_charset_ucs2_lithuanian_uca_ci->204 my_charset_utf8_lithuanian_uca_ci
    141 my_charset_ucs2_slovak_uca_ci    ->205 my_charset_utf8_slovak_uca_ci
    142 my_charset_ucs2_spanish2_uca_ci  ->206 my_charset_utf8_spanish2_uca_ci
    143 my_charset_ucs2_roman_uca_ci     ->207 my_charset_utf8_roman_uca_ci
    144 my_charset_ucs2_persian_uca_ci   ->208 my_charset_utf8_persian_uca_ci

  @return Compatible character set or NULL.
*/

CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO *original_cs)
{
  CHARSET_INFO *compatible_cs= 0;
  DBUG_ENTER("get_compatible_charset_with_ctype");
  if (!strcmp(original_cs->csname, "ucs2") &&
      (compatible_cs= get_charset(original_cs->number + 64, MYF(0))) &&
      (!compatible_cs->ctype ||
       strcmp(original_cs->name + 4, compatible_cs->name + 4)))
    compatible_cs= 0;
  DBUG_RETURN(compatible_cs);
}
+32 −1
Original line number Diff line number Diff line
@@ -3135,13 +3135,44 @@ bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref)
    my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
    return 1;
  }
  table=((Item_field *)item)->field->table;
  /*
    With prepared statements Item_func_match::fix_fields is called twice.
    When it is called first time we have original item tree here and add
    conversion layer for character sets that do not have ctype array a few
    lines below. When it is called second time, we already have conversion
    layer in item tree.
  */
  table= (item->type() == Item::FIELD_ITEM) ?
         ((Item_field *)item)->field->table :
         ((Item_field *)((Item_func_conv *)item)->key_item())->field->table;
  if (!(table->file->table_flags() & HA_CAN_FULLTEXT))
  {
    my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0));
    return 1;
  }
  table->fulltext_searched=1;
  /* A workaround for ucs2 character set */
  if (!args[1]->collation.collation->ctype)
  {
    CHARSET_INFO *compatible_cs=
      get_compatible_charset_with_ctype(args[1]->collation.collation);
    bool rc= 1;
    if (compatible_cs)
    {
      Item_string *conv_item= new Item_string("", 0, compatible_cs,
                                              DERIVATION_EXPLICIT);
      item= args[0];
      args[0]= conv_item;
      rc= agg_item_charsets(cmp_collation, func_name(), args, arg_count,
                            MY_COLL_ALLOW_SUPERSET_CONV |
                            MY_COLL_ALLOW_COERCIBLE_CONV |
                            MY_COLL_DISALLOW_NONE);
      args[0]= item;
    }
    else
      my_error(ER_WRONG_ARGUMENTS, MYF(0), "MATCH");
    return rc;
  }
  return agg_arg_collations_for_comparison(cmp_collation, args+1, arg_count-1);
}