Bug #5324 Bug in UCA collations with LIKE comparisons and INDEX (7e7dfccc) · Commits · Software / OSDI20 Artifacts / mariadb

include/m_ctype.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -312,6 +312,13 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
		char min_str, char max_str,
		uint min_length, uint max_length);

		my_bool my_like_range_mb(CHARSET_INFO *cs,
		const char *ptr, uint ptr_length,
		pbool escape, pbool w_one, pbool w_many,
		uint res_length,
		char min_str, char max_str,
		uint min_length, uint max_length);

		my_bool my_like_range_ucs2(CHARSET_INFO *cs,
		const char *ptr, uint ptr_length,
		pbool escape, pbool w_one, pbool w_many,

mysql-test/r/ctype_uca.result

+39 −0

Original line number	Diff line number	Diff line
		@@ -1872,3 +1872,42 @@ Z,z,Ź,ź,Ż,ż,Ž,ž
		ǁ
		ǂ
		ǃ
		drop table t1;
		SET NAMES utf8;
		CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
		INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
		SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
		COLLATE utf8_general_ci;
		c
		Μωδαί̈
		INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
		SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
		COLLATE utf8_general_ci ORDER BY c;
		c
		Μωδ
		Μωδαί̈
		DROP TABLE t1;
		CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
		INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
		SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
		c
		Μωδαί̈
		INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
		SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
		COLLATE ucs2_unicode_ci ORDER BY c;
		c
		Μωδ
		Μωδαί̈
		DROP TABLE t1;
		CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
		INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
		SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
		c
		Μωδαί̈
		INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
		SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
		COLLATE utf8_unicode_ci ORDER BY c;
		c
		Μωδ
		Μωδαί̈
		DROP TABLE t1;

mysql-test/t/ctype_uca.test

+37 −0

Original line number	Diff line number	Diff line
		@@ -180,3 +180,40 @@ select group_concat(c1 order by c1) from t1 group by c1 collate utf8_slovak_ci;
		select group_concat(c1 order by c1) from t1 group by c1 collate utf8_spanish2_ci;
		select group_concat(c1 order by c1) from t1 group by c1 collate utf8_roman_ci;

		drop table t1;

		#
		# Bug#5324
		#
		SET NAMES utf8;
		#test1
		CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
		INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
		#Check one row
		SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
		COLLATE utf8_general_ci;
		INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
		#Check two rows
		SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
		COLLATE utf8_general_ci ORDER BY c;
		DROP TABLE t1;
		#test2
		CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
		INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
		#Check one row
		SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
		INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
		#Check two rows
		SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
		COLLATE ucs2_unicode_ci ORDER BY c;
		DROP TABLE t1;
		#test 3
		CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
		INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
		#Check one row row
		SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
		INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
		#Check two rows
		SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
		COLLATE utf8_unicode_ci ORDER BY c;
		DROP TABLE t1;

strings/ctype-mb.c

+86 −0

Original line number	Diff line number	Diff line
		@@ -458,6 +458,92 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
		}
		}

		/*
		** Calculate min_str and max_str that ranges a LIKE string.
		** Arguments:
		** ptr Pointer to LIKE string.
		** ptr_length Length of LIKE string.
		** escape Escape character in LIKE. (Normally '\').
		** All escape characters should be removed from min_str and max_str
		** res_length Length of min_str and max_str.
		** min_str Smallest case sensitive string that ranges LIKE.
		** Should be space padded to res_length.
		** max_str Largest case sensitive string that ranges LIKE.
		** Normally padded with the biggest character sort value.
		**
		** The function should return 0 if ok and 1 if the LIKE string can't be
		** optimized !
		*/

		my_bool my_like_range_mb(CHARSET_INFO *cs,
		const char *ptr,uint ptr_length,
		pbool escape, pbool w_one, pbool w_many,
		uint res_length,
		char min_str,char max_str,
		uint min_length,uint max_length)
		{
		const char *end=ptr+ptr_length;
		char *min_org=min_str;
		char *min_end=min_str+res_length;
		char *max_end=max_str+res_length;

		for (; ptr != end && min_str != min_end ; ptr++)
		{
		if (*ptr == escape && ptr+1 != end)
		{
		ptr++; /* Skip escape */
		min_str++= max_str++ = *ptr;
		continue;
		}
		if (ptr == w_one \|\| ptr == w_many) /* '_' and '%' in SQL */
		{
		char buf[10];
		uint buflen;

		/* Write min key */
		*min_length= (uint) (min_str - min_org);
		*max_length=res_length;
		do
		{
		*min_str++= (char) cs->min_sort_char;
		} while (min_str != min_end);

		/*
		Write max key: create a buffer with multibyte
		representation of the max_sort_char character,
		and copy it into max_str in a loop.
		*/
		buflen= cs->cset->wc_mb(cs, cs->max_sort_char, buf, buf + sizeof(buf));
		DBUG_ASSERT(buflen > 0);
		do
		{
		if ((max_str + buflen) <= max_end)
		{
		/* Enough space for max characer */
		memcpy(max_str, buf, buflen);
		max_str+= buflen;
		}
		else
		{
		/*
		There is no space for whole multibyte
		character, then add trailing spaces.
		*/

		*max_str++= ' ';
		}
		} while (max_str != max_end);
		return 0;
		}
		min_str++= max_str++ = *ptr;
		}
		min_length= max_length = (uint) (min_str - min_org);

		while (min_str != min_end)
		min_str++ = max_str++ = ' '; /* Because if key compression */
		return 0;
		}

		static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
		const char str,const char str_end,
		const char wildstr,const char wildend,

strings/ctype-uca.c

+4 −3

Original line number	Diff line number	Diff line
		@@ -6876,7 +6876,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
		int mblen;

		if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc,
		scanner->sbeg, scanner->send)) < 0))
		scanner->sbeg,
		scanner->send)) <= 0))
		return -1;

		scanner->page= wc >> 8;
		@@ -7918,7 +7919,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
		my_strnncoll_ucs2_uca,
		my_strnncollsp_ucs2_uca,
		my_strnxfrm_ucs2_uca,
		my_like_range_simple,
		my_like_range_ucs2,
		my_wildcmp_uca,
		NULL,
		my_instr_mb,
		@@ -8369,7 +8370,7 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler =
		my_strnncoll_any_uca,
		my_strnncollsp_any_uca,
		my_strnxfrm_any_uca,
		my_like_range_simple,
		my_like_range_mb,
		my_wildcmp_uca,
		NULL,
		my_instr_mb,