BUG#31159 - fulltext search on ucs2 column crashes server (5d1ccce5) · Commits · Software / OSDI20 Artifacts / mariadb

include/my_sys.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -784,6 +784,8 @@ extern CHARSET_INFO *get_charset(uint cs_number, myf flags);
		extern CHARSET_INFO get_charset_by_name(const char cs_name, myf flags);
		extern CHARSET_INFO get_charset_by_csname(const char cs_name,
		uint cs_flags, myf my_flags);
		extern CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO
		*original_cs);
		extern void free_charsets(void);
		extern char get_charsets_dir(char buf);
		extern my_bool my_charset_same(CHARSET_INFO cs1, CHARSET_INFO cs2);

mysql-test/r/ctype_ucs.result

+6 −0

Original line number	Diff line number	Diff line
		@@ -803,4 +803,10 @@ quote(name)
		????????
		????????????????
		drop table bug20536;
		CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci);
		INSERT INTO t1 VALUES('abcd');
		SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
		a
		abcd
		DROP TABLE t1;
		End of 4.1 tests

mysql-test/t/ctype_ucs.test

+8 −0

Original line number	Diff line number	Diff line
		@@ -535,4 +535,12 @@ select quote(name) from bug20536;

		drop table bug20536;

		#
		# BUG#31159 - fulltext search on ucs2 column crashes server
		#
		CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci);
		INSERT INTO t1 VALUES('abcd');
		SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
		DROP TABLE t1;

		--echo End of 4.1 tests

mysys/charset.c

+40 −0

Original line number	Diff line number	Diff line
		@@ -673,3 +673,43 @@ CHARSET_INFO *fs_character_set()
		return fs_cset_cache;
		}
		#endif


		/**
		@brief Find compatible character set with ctype.

		@param[in] original_cs Original character set

		@note
		128 my_charset_ucs2_general_uca ->192 my_charset_utf8_general_uca_ci
		129 my_charset_ucs2_icelandic_uca_ci ->193 my_charset_utf8_icelandic_uca_ci
		130 my_charset_ucs2_latvian_uca_ci ->194 my_charset_utf8_latvian_uca_ci
		131 my_charset_ucs2_romanian_uca_ci ->195 my_charset_utf8_romanian_uca_ci
		132 my_charset_ucs2_slovenian_uca_ci ->196 my_charset_utf8_slovenian_uca_ci
		133 my_charset_ucs2_polish_uca_ci ->197 my_charset_utf8_polish_uca_ci
		134 my_charset_ucs2_estonian_uca_ci ->198 my_charset_utf8_estonian_uca_ci
		135 my_charset_ucs2_spanish_uca_ci ->199 my_charset_utf8_spanish_uca_ci
		136 my_charset_ucs2_swedish_uca_ci ->200 my_charset_utf8_swedish_uca_ci
		137 my_charset_ucs2_turkish_uca_ci ->201 my_charset_utf8_turkish_uca_ci
		138 my_charset_ucs2_czech_uca_ci ->202 my_charset_utf8_czech_uca_ci
		139 my_charset_ucs2_danish_uca_ci ->203 my_charset_utf8_danish_uca_ci
		140 my_charset_ucs2_lithuanian_uca_ci->204 my_charset_utf8_lithuanian_uca_ci
		141 my_charset_ucs2_slovak_uca_ci ->205 my_charset_utf8_slovak_uca_ci
		142 my_charset_ucs2_spanish2_uca_ci ->206 my_charset_utf8_spanish2_uca_ci
		143 my_charset_ucs2_roman_uca_ci ->207 my_charset_utf8_roman_uca_ci
		144 my_charset_ucs2_persian_uca_ci ->208 my_charset_utf8_persian_uca_ci

		@return Compatible character set or NULL.
		*/

		CHARSET_INFO get_compatible_charset_with_ctype(CHARSET_INFO original_cs)
		{
		CHARSET_INFO *compatible_cs= 0;
		DBUG_ENTER("get_compatible_charset_with_ctype");
		if (!strcmp(original_cs->csname, "ucs2") &&
		(compatible_cs= get_charset(original_cs->number + 64, MYF(0))) &&
		(!compatible_cs->ctype \|\|
		strcmp(original_cs->name + 4, compatible_cs->name + 4)))
		compatible_cs= 0;
		DBUG_RETURN(compatible_cs);
		}

sql/item_func.cc

+32 −1

Original line number	Diff line number	Diff line
		@@ -3135,13 +3135,44 @@ bool Item_func_match::fix_fields(THD thd, TABLE_LIST tlist, Item **ref)
		my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
		return 1;
		}
		table=((Item_field *)item)->field->table;
		/*
		With prepared statements Item_func_match::fix_fields is called twice.
		When it is called first time we have original item tree here and add
		conversion layer for character sets that do not have ctype array a few
		lines below. When it is called second time, we already have conversion
		layer in item tree.
		*/
		table= (item->type() == Item::FIELD_ITEM) ?
		((Item_field *)item)->field->table :
		((Item_field )((Item_func_conv )item)->key_item())->field->table;
		if (!(table->file->table_flags() & HA_CAN_FULLTEXT))
		{
		my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0));
		return 1;
		}
		table->fulltext_searched=1;
		/* A workaround for ucs2 character set */
		if (!args[1]->collation.collation->ctype)
		{
		CHARSET_INFO *compatible_cs=
		get_compatible_charset_with_ctype(args[1]->collation.collation);
		bool rc= 1;
		if (compatible_cs)
		{
		Item_string *conv_item= new Item_string("", 0, compatible_cs,
		DERIVATION_EXPLICIT);
		item= args[0];
		args[0]= conv_item;
		rc= agg_item_charsets(cmp_collation, func_name(), args, arg_count,
		MY_COLL_ALLOW_SUPERSET_CONV \|
		MY_COLL_ALLOW_COERCIBLE_CONV \|
		MY_COLL_DISALLOW_NONE);
		args[0]= item;
		}
		else
		my_error(ER_WRONG_ARGUMENTS, MYF(0), "MATCH");
		return rc;
		}
		return agg_arg_collations_for_comparison(cmp_collation, args+1, arg_count-1);
		}