Bug#20471 LIKE search fails with indexed utf8 char column (d2f7fe35) · Commits · Software / OSDI20 Artifacts / mariadb

include/m_ctype.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -108,6 +108,8 @@ enum my_lex_states

		struct charset_info_st;


		/* See strings/CHARSET_INFO.txt about information on this structure */
		typedef struct my_collation_handler_st
		{
		my_bool (init)(struct charset_info_st , void (alloc)(uint));
		@@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
		extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;


		/* See strings/CHARSET_INFO.txt about information on this structure */
		typedef struct my_charset_handler_st
		{
		my_bool (init)(struct charset_info_st , void (alloc)(uint));
		@@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler;
		extern MY_CHARSET_HANDLER my_charset_ucs2_handler;


		/* See strings/CHARSET_INFO.txt about information on this structure */
		typedef struct charset_info_st
		{
		uint number;

mysql-test/r/ctype_utf8.result

+75 −0

Original line number	Diff line number	Diff line
		@@ -1124,6 +1124,81 @@ check table t1;
		Table Op Msg_type Msg_text
		test.t1 check status OK
		drop table t1;
		set names utf8;
		create table t1 (s1 char(5) character set utf8);
		insert into t1 values
		('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
		create index it1 on t1 (s1);
		select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
		before_delete_general_ci
		ペテルグル
		delete from t1 where s1 = 'Y';
		select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
		after_delete_general_ci
		ペテルグル
		drop table t1;
		set names utf8;
		create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
		insert into t1 values
		('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
		create index it1 on t1 (s1);
		select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
		before_delete_unicode_ci
		ペテルグル
		delete from t1 where s1 = 'Y';
		select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
		after_delete_unicode_ci
		ペテルグル
		drop table t1;
		set names utf8;
		create table t1 (s1 char(5) character set utf8 collate utf8_bin);
		insert into t1 values
		('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
		create index it1 on t1 (s1);
		select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
		before_delete_bin
		ペテルグル
		delete from t1 where s1 = 'Y';
		select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
		after_delete_bin
		ペテルグル
		drop table t1;
		set names utf8;
		create table t1 (a varchar(30) not null primary key)
		engine=innodb default character set utf8 collate utf8_general_ci;
		insert into t1 values ('あいうえおかきくけこさしすせそ');
		insert into t1 values ('さしすせそかきくけこあいうえお');
		select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
		gci1
		さしすせそかきくけこあいうえお
		select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
		gci2
		あいうえおかきくけこさしすせそ
		drop table t1;
		set names utf8;
		create table t1 (a varchar(30) not null primary key)
		engine=innodb default character set utf8 collate utf8_unicode_ci;
		insert into t1 values ('あいうえおかきくけこさしすせそ');
		insert into t1 values ('さしすせそかきくけこあいうえお');
		select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
		uci1
		さしすせそかきくけこあいうえお
		select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
		uci2
		あいうえおかきくけこさしすせそ
		drop table t1;
		set names utf8;
		create table t1 (a varchar(30) not null primary key)
		engine=innodb default character set utf8 collate utf8_bin;
		insert into t1 values ('あいうえおかきくけこさしすせそ');
		insert into t1 values ('さしすせそかきくけこあいうえお');
		select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
		bin1
		さしすせそかきくけこあいうえお
		select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
		bin2
		あいうえおかきくけこさしすせそ
		drop table t1;
		SET NAMES utf8;
		CREATE TABLE t1 (id int PRIMARY KEY,
		a varchar(16) collate utf8_unicode_ci NOT NULL default '',

mysql-test/t/ctype_utf8.test

+70 −0

Original line number	Diff line number	Diff line
		@@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb
		check table t1;
		drop table t1;

		#
		# Bug#20471 LIKE search fails with indexed utf8 char column
		#
		set names utf8;
		create table t1 (s1 char(5) character set utf8);
		insert into t1 values
		('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
		create index it1 on t1 (s1);
		select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
		delete from t1 where s1 = 'Y';
		select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
		drop table t1;

		set names utf8;
		create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
		insert into t1 values
		('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
		create index it1 on t1 (s1);
		select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
		delete from t1 where s1 = 'Y';
		select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
		drop table t1;

		set names utf8;
		create table t1 (s1 char(5) character set utf8 collate utf8_bin);
		insert into t1 values
		('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
		create index it1 on t1 (s1);
		select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
		delete from t1 where s1 = 'Y';
		select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
		drop table t1;

		# additional tests from duplicate bug#20744 MySQL return no result

		set names utf8;
		--disable_warnings
		create table t1 (a varchar(30) not null primary key)
		engine=innodb default character set utf8 collate utf8_general_ci;
		--enable_warnings
		insert into t1 values ('あいうえおかきくけこさしすせそ');
		insert into t1 values ('さしすせそかきくけこあいうえお');
		select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
		select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
		drop table t1;

		set names utf8;
		--disable_warnings
		create table t1 (a varchar(30) not null primary key)
		engine=innodb default character set utf8 collate utf8_unicode_ci;
		--enable_warnings
		insert into t1 values ('あいうえおかきくけこさしすせそ');
		insert into t1 values ('さしすせそかきくけこあいうえお');
		select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
		select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
		drop table t1;

		set names utf8;
		--disable_warnings
		create table t1 (a varchar(30) not null primary key)
		engine=innodb default character set utf8 collate utf8_bin;
		--enable_warnings
		insert into t1 values ('あいうえおかきくけこさしすせそ');
		insert into t1 values ('さしすせそかきくけこあいうえお');
		select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
		select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
		drop table t1;



		#
		# Bug#14896: Comparison with a key in a partial index over mb chararacter field
		#

strings/CHARSET_INFO.txt

+10 −2

Original line number	Diff line number	Diff line
		@@ -33,7 +33,7 @@ typedef struct charset_info_st
		uint strxfrm_multiply;
		uint mbminlen;
		uint mbmaxlen;
		char max_sort_char; /* For LIKE optimization */
		uint16 max_sort_char; /* For LIKE optimization */

		MY_CHARSET_HANDLER *cset;
		MY_COLLATION_HANDLER *coll;
		@@ -134,7 +134,15 @@ Misc fields
		mbmaxlen - maximum multibyte sequence length.
		1 for 8bit charsets. Can be also 2 or 3.


		max_sort_char - for LIKE range
		in case of 8bit character sets - native code
		of maximum character (max_str pad byte);
		in case of UTF8 and UCS2 - Unicode code of the maximum
		possible character (usually U+FFFF). This code is
		converted to multibyte representation (usually 0xEFBFBF)
		and then used as a pad sequence for max_str.
		in case of other multibyte character sets -
		max_str pad byte (usually 0xFF).

		MY_CHARSET_HANDLER
		==================

strings/ctype-mb.c

+17 −4

Original line number	Diff line number	Diff line
		@@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),


		/*
		Write max key: create a buffer with multibyte
		Write max key:
		- for non-Unicode character sets:
		just set to 255.
		- for Unicode character set (utf-8):
		create a buffer with multibyte
		representation of the max_sort_char character,
		and copy it into max_str in a loop.
		*/
		static void pad_max_char(CHARSET_INFO cs, char str, char *end)
		{
		char buf[10];
		char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
		char buflen;

		if (!(cs->state & MY_CS_UNICODE))
		{
		bfill(str, end - str, 255);
		return;
		}

		buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
		(uchar*) buf + sizeof(buf));

		DBUG_ASSERT(buflen > 0);
		do
		{
		@@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler =
		my_strnncoll_mb_bin,
		my_strnncollsp_mb_bin,
		my_strnxfrm_mb_bin,
		my_like_range_simple,
		my_like_range_mb,
		my_wildcmp_mb_bin,
		my_strcasecmp_mb_bin,
		my_instr_mb,