Commit 2a664ff6 authored by bar@mysql.com/bar.intranet.mysql.r18.ru's avatar bar@mysql.com/bar.intranet.mysql.r18.ru
Browse files

Bug#20471 LIKE search fails with indexed utf8 char column

The main problem was already fixed by Igor under terms of 16674.
Adding some additional minor fixes and tests.
parent 2eacb14d
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -108,6 +108,8 @@ enum my_lex_states

struct charset_info_st;


/* See strings/CHARSET_INFO.txt about information on this structure  */
typedef struct my_collation_handler_st
{
  my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
@@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;


/* See strings/CHARSET_INFO.txt about information on this structure  */
typedef struct my_charset_handler_st
{
  my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
@@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler;
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;


/* See strings/CHARSET_INFO.txt about information on this structure  */
typedef struct charset_info_st
{
  uint      number;
+75 −0
Original line number Diff line number Diff line
@@ -1124,6 +1124,81 @@ check table t1;
Table	Op	Msg_type	Msg_text
test.t1	check	status	OK
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
before_delete_general_ci
ペテルグル
delete from t1 where s1 = 'Y';
select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
after_delete_general_ci
ペテルグル
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
before_delete_unicode_ci
ペテルグル
delete from t1 where s1 = 'Y';
select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
after_delete_unicode_ci
ペテルグル
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_bin);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
before_delete_bin
ペテルグル
delete from t1 where s1 = 'Y';
select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
after_delete_bin
ペテルグル
drop table t1;
set names utf8;
create table t1 (a varchar(30) not null primary key)
engine=innodb  default character set utf8 collate utf8_general_ci;
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
gci1
さしすせそかきくけこあいうえお
select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
gci2
あいうえおかきくけこさしすせそ
drop table t1;
set names utf8;
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_unicode_ci;
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
uci1
さしすせそかきくけこあいうえお
select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
uci2
あいうえおかきくけこさしすせそ
drop table t1;
set names utf8;
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_bin;
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
bin1
さしすせそかきくけこあいうえお
select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
bin2
あいうえおかきくけこさしすせそ
drop table t1;
SET NAMES utf8;
CREATE TABLE t1 (id int PRIMARY KEY,
a varchar(16) collate utf8_unicode_ci NOT NULL default '',
+70 −0
Original line number Diff line number Diff line
@@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb
check table t1;
drop table t1;

#
# Bug#20471 LIKE search fails with indexed utf8 char column
#
set names utf8;
create table t1 (s1 char(5) character set utf8);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
delete from t1 where s1 = 'Y';
select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
drop table t1;

set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
delete from t1 where s1 = 'Y';
select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
drop table t1;

set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_bin);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
delete from t1 where s1 = 'Y';
select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
drop table t1;

# additional tests from duplicate bug#20744 MySQL return no result

set names utf8;
--disable_warnings
create table t1 (a varchar(30) not null primary key)
engine=innodb  default character set utf8 collate utf8_general_ci;
--enable_warnings
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
drop table t1;

set names utf8;
--disable_warnings
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_unicode_ci;
--enable_warnings
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
drop table t1;

set names utf8;
--disable_warnings
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_bin;
--enable_warnings
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
drop table t1;



#
# Bug#14896: Comparison with a key in a partial index over mb chararacter field
#
+10 −2
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ typedef struct charset_info_st
  uint      strxfrm_multiply;
  uint      mbminlen;
  uint      mbmaxlen;
  char      max_sort_char; /* For LIKE optimization */
  uint16    max_sort_char; /* For LIKE optimization */

  MY_CHARSET_HANDLER *cset;
  MY_COLLATION_HANDLER *coll;
@@ -134,7 +134,15 @@ Misc fields
  mbmaxlen         - maximum multibyte sequence length.
                     1 for 8bit charsets. Can be also 2 or 3.


  max_sort_char    - for LIKE range
                     in case of 8bit character sets - native code
		     of maximum character (max_str pad byte);
                     in case of UTF8 and UCS2 - Unicode code of the maximum
		     possible character (usually U+FFFF). This code is
		     converted to multibyte representation (usually 0xEFBFBF)
		     and then used as a pad sequence for max_str.
		     in case of other multibyte character sets -
		     max_str pad byte (usually 0xFF).

MY_CHARSET_HANDLER
==================
+17 −4
Original line number Diff line number Diff line
@@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),


/* 
  Write max key: create a buffer with multibyte
  Write max key:
- for non-Unicode character sets:
  just set to 255.
- for Unicode character set (utf-8):
  create a buffer with multibyte
  representation of the max_sort_char character,
  and copy it into max_str in a loop. 
*/
static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
{
  char buf[10];
  char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
  char buflen;
  
  if (!(cs->state & MY_CS_UNICODE))
  {
    bfill(str, end - str, 255);
    return;
  }
  
  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
                          (uchar*) buf + sizeof(buf));
  
  DBUG_ASSERT(buflen > 0);
  do
  {
@@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler =
    my_strnncoll_mb_bin,
    my_strnncollsp_mb_bin,
    my_strnxfrm_mb_bin,
    my_like_range_simple,
    my_like_range_mb,
    my_wildcmp_mb_bin,
    my_strcasecmp_mb_bin,
    my_instr_mb,
Loading