Loading include/m_ctype.h +4 −0 Original line number Diff line number Diff line Loading @@ -108,6 +108,8 @@ enum my_lex_states struct charset_info_st; /* See strings/CHARSET_INFO.txt about information on this structure */ typedef struct my_collation_handler_st { my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); Loading Loading @@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler; extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler; /* See strings/CHARSET_INFO.txt about information on this structure */ typedef struct my_charset_handler_st { my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); Loading Loading @@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler; extern MY_CHARSET_HANDLER my_charset_ucs2_handler; /* See strings/CHARSET_INFO.txt about information on this structure */ typedef struct charset_info_st { uint number; Loading mysql-test/r/ctype_utf8.result +75 −0 Original line number Diff line number Diff line Loading @@ -1124,6 +1124,81 @@ check table t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%'; before_delete_general_ci ペテルグル delete from t1 where s1 = 'Y'; select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%'; after_delete_general_ci ペテルグル drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%'; before_delete_unicode_ci ペテルグル delete from t1 where s1 = 'Y'; select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%'; after_delete_unicode_ci ペテルグル drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8 collate utf8_bin); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_bin from t1 where s1 like 'ペテ%'; before_delete_bin ペテルグル delete from t1 where s1 = 'Y'; select s1 as after_delete_bin from t1 where s1 like 'ペテ%'; after_delete_bin ペテルグル drop table t1; set names utf8; create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_general_ci; insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; gci1 さしすせそかきくけこあいうえお select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; gci2 あいうえおかきくけこさしすせそ drop table t1; set names utf8; create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_unicode_ci; insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; uci1 さしすせそかきくけこあいうえお select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; uci2 あいうえおかきくけこさしすせそ drop table t1; set names utf8; create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_bin; insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%'; bin1 さしすせそかきくけこあいうえお select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ'; bin2 あいうえおかきくけこさしすせそ drop table t1; SET NAMES utf8; CREATE TABLE t1 (id int PRIMARY KEY, a varchar(16) collate utf8_unicode_ci NOT NULL default '', Loading mysql-test/t/ctype_utf8.test +70 −0 Original line number Diff line number Diff line Loading @@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb check table t1; drop table t1; # # Bug#20471 LIKE search fails with indexed utf8 char column # set names utf8; create table t1 (s1 char(5) character set utf8); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%'; delete from t1 where s1 = 'Y'; select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%'; drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%'; delete from t1 where s1 = 'Y'; select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%'; drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8 collate utf8_bin); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_bin from t1 where s1 like 'ペテ%'; delete from t1 where s1 = 'Y'; select s1 as after_delete_bin from t1 where s1 like 'ペテ%'; drop table t1; # additional tests from duplicate bug#20744 MySQL return no result set names utf8; --disable_warnings create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_general_ci; --enable_warnings insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; drop table t1; set names utf8; --disable_warnings create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_unicode_ci; --enable_warnings insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; drop table t1; set names utf8; --disable_warnings create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_bin; --enable_warnings insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%'; select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ'; drop table t1; # # Bug#14896: Comparison with a key in a partial index over mb chararacter field # Loading strings/CHARSET_INFO.txt +10 −2 Original line number Diff line number Diff line Loading @@ -33,7 +33,7 @@ typedef struct charset_info_st uint strxfrm_multiply; uint mbminlen; uint mbmaxlen; char max_sort_char; /* For LIKE optimization */ uint16 max_sort_char; /* For LIKE optimization */ MY_CHARSET_HANDLER *cset; MY_COLLATION_HANDLER *coll; Loading Loading @@ -134,7 +134,15 @@ Misc fields mbmaxlen - maximum multibyte sequence length. 1 for 8bit charsets. Can be also 2 or 3. max_sort_char - for LIKE range in case of 8bit character sets - native code of maximum character (max_str pad byte); in case of UTF8 and UCS2 - Unicode code of the maximum possible character (usually U+FFFF). This code is converted to multibyte representation (usually 0xEFBFBF) and then used as a pad sequence for max_str. in case of other multibyte character sets - max_str pad byte (usually 0xFF). MY_CHARSET_HANDLER ================== Loading strings/ctype-mb.c +17 −4 Original line number Diff line number Diff line Loading @@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), /* Write max key: create a buffer with multibyte Write max key: - for non-Unicode character sets: just set to 255. - for Unicode character set (utf-8): create a buffer with multibyte representation of the max_sort_char character, and copy it into max_str in a loop. */ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) { char buf[10]; char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, char buflen; if (!(cs->state & MY_CS_UNICODE)) { bfill(str, end - str, 255); return; } buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, (uchar*) buf + sizeof(buf)); DBUG_ASSERT(buflen > 0); do { Loading Loading @@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler = my_strnncoll_mb_bin, my_strnncollsp_mb_bin, my_strnxfrm_mb_bin, my_like_range_simple, my_like_range_mb, my_wildcmp_mb_bin, my_strcasecmp_mb_bin, my_instr_mb, Loading Loading
include/m_ctype.h +4 −0 Original line number Diff line number Diff line Loading @@ -108,6 +108,8 @@ enum my_lex_states struct charset_info_st; /* See strings/CHARSET_INFO.txt about information on this structure */ typedef struct my_collation_handler_st { my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); Loading Loading @@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler; extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler; /* See strings/CHARSET_INFO.txt about information on this structure */ typedef struct my_charset_handler_st { my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); Loading Loading @@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler; extern MY_CHARSET_HANDLER my_charset_ucs2_handler; /* See strings/CHARSET_INFO.txt about information on this structure */ typedef struct charset_info_st { uint number; Loading
mysql-test/r/ctype_utf8.result +75 −0 Original line number Diff line number Diff line Loading @@ -1124,6 +1124,81 @@ check table t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%'; before_delete_general_ci ペテルグル delete from t1 where s1 = 'Y'; select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%'; after_delete_general_ci ペテルグル drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%'; before_delete_unicode_ci ペテルグル delete from t1 where s1 = 'Y'; select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%'; after_delete_unicode_ci ペテルグル drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8 collate utf8_bin); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_bin from t1 where s1 like 'ペテ%'; before_delete_bin ペテルグル delete from t1 where s1 = 'Y'; select s1 as after_delete_bin from t1 where s1 like 'ペテ%'; after_delete_bin ペテルグル drop table t1; set names utf8; create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_general_ci; insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; gci1 さしすせそかきくけこあいうえお select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; gci2 あいうえおかきくけこさしすせそ drop table t1; set names utf8; create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_unicode_ci; insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; uci1 さしすせそかきくけこあいうえお select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; uci2 あいうえおかきくけこさしすせそ drop table t1; set names utf8; create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_bin; insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%'; bin1 さしすせそかきくけこあいうえお select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ'; bin2 あいうえおかきくけこさしすせそ drop table t1; SET NAMES utf8; CREATE TABLE t1 (id int PRIMARY KEY, a varchar(16) collate utf8_unicode_ci NOT NULL default '', Loading
mysql-test/t/ctype_utf8.test +70 −0 Original line number Diff line number Diff line Loading @@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb check table t1; drop table t1; # # Bug#20471 LIKE search fails with indexed utf8 char column # set names utf8; create table t1 (s1 char(5) character set utf8); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%'; delete from t1 where s1 = 'Y'; select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%'; drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%'; delete from t1 where s1 = 'Y'; select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%'; drop table t1; set names utf8; create table t1 (s1 char(5) character set utf8 collate utf8_bin); insert into t1 values ('a'),('b'),(null),('ペテルグル'),('ü'),('Y'); create index it1 on t1 (s1); select s1 as before_delete_bin from t1 where s1 like 'ペテ%'; delete from t1 where s1 = 'Y'; select s1 as after_delete_bin from t1 where s1 like 'ペテ%'; drop table t1; # additional tests from duplicate bug#20744 MySQL return no result set names utf8; --disable_warnings create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_general_ci; --enable_warnings insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; drop table t1; set names utf8; --disable_warnings create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_unicode_ci; --enable_warnings insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%'; select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ'; drop table t1; set names utf8; --disable_warnings create table t1 (a varchar(30) not null primary key) engine=innodb default character set utf8 collate utf8_bin; --enable_warnings insert into t1 values ('あいうえおかきくけこさしすせそ'); insert into t1 values ('さしすせそかきくけこあいうえお'); select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%'; select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ'; drop table t1; # # Bug#14896: Comparison with a key in a partial index over mb chararacter field # Loading
strings/CHARSET_INFO.txt +10 −2 Original line number Diff line number Diff line Loading @@ -33,7 +33,7 @@ typedef struct charset_info_st uint strxfrm_multiply; uint mbminlen; uint mbmaxlen; char max_sort_char; /* For LIKE optimization */ uint16 max_sort_char; /* For LIKE optimization */ MY_CHARSET_HANDLER *cset; MY_COLLATION_HANDLER *coll; Loading Loading @@ -134,7 +134,15 @@ Misc fields mbmaxlen - maximum multibyte sequence length. 1 for 8bit charsets. Can be also 2 or 3. max_sort_char - for LIKE range in case of 8bit character sets - native code of maximum character (max_str pad byte); in case of UTF8 and UCS2 - Unicode code of the maximum possible character (usually U+FFFF). This code is converted to multibyte representation (usually 0xEFBFBF) and then used as a pad sequence for max_str. in case of other multibyte character sets - max_str pad byte (usually 0xFF). MY_CHARSET_HANDLER ================== Loading
strings/ctype-mb.c +17 −4 Original line number Diff line number Diff line Loading @@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), /* Write max key: create a buffer with multibyte Write max key: - for non-Unicode character sets: just set to 255. - for Unicode character set (utf-8): create a buffer with multibyte representation of the max_sort_char character, and copy it into max_str in a loop. */ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) { char buf[10]; char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, char buflen; if (!(cs->state & MY_CS_UNICODE)) { bfill(str, end - str, 255); return; } buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, (uchar*) buf + sizeof(buf)); DBUG_ASSERT(buflen > 0); do { Loading Loading @@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler = my_strnncoll_mb_bin, my_strnncollsp_mb_bin, my_strnxfrm_mb_bin, my_like_range_simple, my_like_range_mb, my_wildcmp_mb_bin, my_strcasecmp_mb_bin, my_instr_mb, Loading