Loading mysql-test/r/ctype_uca.result +92 −0 Original line number Diff line number Diff line Loading @@ -2663,3 +2663,95 @@ COUNT(*) c1 1 1 a DROP TABLE IF EXISTS t1; set names utf8; create table t1 ( a varchar(255), key a(a) ) character set utf8 collate utf8_danish_ci; insert into t1 values ('åaaaa'),('ååaaa'),('aaaaa'); select a as like_a from t1 where a like 'a%'; like_a aaaaa select a as like_aa from t1 where a like 'aa%'; like_aa aaaaa select a as like_aaa from t1 where a like 'aaa%'; like_aaa aaaaa select a as like_aaaa from t1 where a like 'aaaa%'; like_aaaa aaaaa select a as like_aaaaa from t1 where a like 'aaaaa%'; like_aaaaa aaaaa alter table t1 convert to character set ucs2 collate ucs2_danish_ci; select a as like_a from t1 where a like 'a%'; like_a aaaaa select a as like_aa from t1 where a like 'aa%'; like_aa aaaaa select a as like_aaa from t1 where a like 'aaa%'; like_aaa aaaaa select a as like_aaaa from t1 where a like 'aaaa%'; like_aaaa aaaaa select a as like_aaaaa from t1 where a like 'aaaaa%'; like_aaaaa aaaaa drop table t1; create table t1 ( a varchar(255), key(a) ) character set utf8 collate utf8_spanish2_ci; insert into t1 values ('aaaaa'),('lllll'),('zzzzz'); select a as like_l from t1 where a like 'l%'; like_l lllll select a as like_ll from t1 where a like 'll%'; like_ll lllll select a as like_lll from t1 where a like 'lll%'; like_lll lllll select a as like_llll from t1 where a like 'llll%'; like_llll lllll select a as like_lllll from t1 where a like 'lllll%'; like_lllll lllll alter table t1 convert to character set ucs2 collate ucs2_spanish2_ci; select a as like_l from t1 where a like 'l%'; like_l lllll select a as like_ll from t1 where a like 'll%'; like_ll lllll select a as like_lll from t1 where a like 'lll%'; like_lll lllll select a as like_llll from t1 where a like 'llll%'; like_llll lllll select a as like_lllll from t1 where a like 'lllll%'; like_lllll lllll drop table t1; create table t1 ( a varchar(255), key a(a) ) character set utf8 collate utf8_czech_ci; insert into t1 values ('b'),('c'),('d'),('e'),('f'),('g'),('h'),('ch'),('i'),('j'); select * from t1 where a like 'c%'; a c ch alter table t1 convert to character set ucs2 collate ucs2_czech_ci; select * from t1 where a like 'c%'; a c ch drop table t1; End for 5.0 tests mysql-test/t/ctype_uca.test +54 −0 Original line number Diff line number Diff line Loading @@ -485,3 +485,57 @@ CREATE TABLE t1 ( insert into t1 values (''),('a'); SELECT COUNT(*), c1 FROM t1 GROUP BY c1; DROP TABLE IF EXISTS t1; # # Bug#27345 Incorrect data returned when range-read from utf8_danish_ci indexes # set names utf8; create table t1 ( a varchar(255), key a(a) ) character set utf8 collate utf8_danish_ci; insert into t1 values ('åaaaa'),('ååaaa'),('aaaaa'); select a as like_a from t1 where a like 'a%'; select a as like_aa from t1 where a like 'aa%'; select a as like_aaa from t1 where a like 'aaa%'; select a as like_aaaa from t1 where a like 'aaaa%'; select a as like_aaaaa from t1 where a like 'aaaaa%'; alter table t1 convert to character set ucs2 collate ucs2_danish_ci; select a as like_a from t1 where a like 'a%'; select a as like_aa from t1 where a like 'aa%'; select a as like_aaa from t1 where a like 'aaa%'; select a as like_aaaa from t1 where a like 'aaaa%'; select a as like_aaaaa from t1 where a like 'aaaaa%'; drop table t1; create table t1 ( a varchar(255), key(a) ) character set utf8 collate utf8_spanish2_ci; insert into t1 values ('aaaaa'),('lllll'),('zzzzz'); select a as like_l from t1 where a like 'l%'; select a as like_ll from t1 where a like 'll%'; select a as like_lll from t1 where a like 'lll%'; select a as like_llll from t1 where a like 'llll%'; select a as like_lllll from t1 where a like 'lllll%'; alter table t1 convert to character set ucs2 collate ucs2_spanish2_ci; select a as like_l from t1 where a like 'l%'; select a as like_ll from t1 where a like 'll%'; select a as like_lll from t1 where a like 'lll%'; select a as like_llll from t1 where a like 'llll%'; select a as like_lllll from t1 where a like 'lllll%'; drop table t1; create table t1 ( a varchar(255), key a(a) ) character set utf8 collate utf8_czech_ci; -- In Czech 'ch' is a single letter between 'h' and 'i' insert into t1 values ('b'),('c'),('d'),('e'),('f'),('g'),('h'),('ch'),('i'),('j'); select * from t1 where a like 'c%'; alter table t1 convert to character set ucs2 collate ucs2_czech_ci; select * from t1 where a like 'c%'; drop table t1; -- echo End for 5.0 tests strings/ctype-mb.c +70 −1 Original line number Diff line number Diff line Loading @@ -563,6 +563,8 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, char *min_end= min_str + res_length; char *max_end= max_str + res_length; uint maxcharlen= res_length / cs->mbmaxlen; const char *contraction_flags= cs->contractions ? ((const char*) cs->contractions) + 0x40*0x40 : NULL; for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--) { Loading @@ -571,6 +573,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, ptr++; /* Skip escape */ else if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */ { fill_max_and_min: /* Calculate length of keys: 'a\0\0... is the smallest possible string when we have space expand Loading Loading @@ -602,8 +605,74 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, *min_str++= *max_str++= *ptr++; } else *min_str++= *max_str++= *ptr++; { /* Special case for collations with contractions. For example, in Chezh, 'ch' is a separate letter which is sorted between 'h' and 'i'. If the pattern 'abc%', 'c' at the end can mean: - letter 'c' itself, - beginning of the contraction 'ch'. If we simply return this LIKE range: 'abc\min\min\min' and 'abc\max\max\max' then this query: SELECT * FROM t1 WHERE a LIKE 'abc%' will only find values starting from 'abc[^h]', but won't find values starting from 'abch'. We must ignore contraction heads followed by w_one or w_many. ('Contraction head' means any letter which can be the first letter in a contraction) For example, for Czech 'abc%', we will return LIKE range, which is equal to LIKE range for 'ab%': 'ab\min\min\min\min' and 'ab\max\max\max\max'. */ if (contraction_flags && ptr + 1 < end && contraction_flags[(uchar) *ptr]) { /* Ptr[0] is a contraction head. */ if (ptr[1] == w_one || ptr[1] == w_many) { /* Contraction head followed by a wildcard, quit. */ goto fill_max_and_min; } /* Some letters can be both contraction heads and contraction tails. For example, in Danish 'aa' is a separate single letter which is sorted after 'z'. So 'a' can be both head and tail. If ptr[0]+ptr[1] is a contraction, then put both letters together. If ptr[1] can be a contraction part, but ptr[0]+ptr[1] is not a contraction, then we put only ptr[0], and continue with ptr[1] on the next loop. */ if (contraction_flags[(uchar) ptr[1]] && cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40]) { /* Contraction found */ if (maxcharlen == 1 || min_str + 1 >= min_end) { /* Both contraction parts don't fit, quit */ goto fill_max_and_min; } /* Put contraction head */ *min_str++= *max_str++= *ptr++; maxcharlen--; } } /* Put contraction tail, or a single character */ *min_str++= *max_str++= *ptr++; } } *min_length= *max_length = (uint) (min_str - min_org); Loading strings/ctype-uca.c +10 −1 Original line number Diff line number Diff line Loading @@ -7937,10 +7937,16 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) /* Now process contractions */ if (ncontractions) { uint size= 0x40*0x40*sizeof(uint16); /* 8K, for basic latin letter only */ /* 8K for weights for basic latin letter pairs, plus 256 bytes for "is contraction part" flags. */ uint size= 0x40*0x40*sizeof(uint16) + 256; char *contraction_flags; if (!(cs->contractions= (uint16*) (*alloc)(size))) return 1; bzero((void*)cs->contractions, size); contraction_flags= ((char*) cs->contractions) + 0x40*0x40; for (i=0; i < rc; i++) { if (rule[i].curr[1]) Loading @@ -7966,6 +7972,9 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) /* Copy base weight applying primary difference */ cs->contractions[offsc]= offsb[0] + rule[i].diff[0]; /* Mark both letters as "is contraction part */ contraction_flags[rule[i].curr[0]]= 1; contraction_flags[rule[i].curr[1]]= 1; } } } Loading strings/ctype-ucs2.c +35 −0 Original line number Diff line number Diff line Loading @@ -1524,6 +1524,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, char *min_org=min_str; char *min_end=min_str+res_length; uint charlen= res_length / cs->mbmaxlen; const char *contraction_flags= cs->contractions ? ((const char*) cs->contractions) + 0x40*0x40 : NULL; for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0 ; ptr+=2, charlen--) Loading @@ -1545,6 +1547,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, } if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */ { fill_max_and_min: /* Calculate length of keys: 'a\0\0... is the smallest possible string when we have space expand Loading @@ -1561,6 +1564,38 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, } while (min_str + 1 < min_end); return 0; } if (contraction_flags && ptr + 3 < end && ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]]) { /* Contraction head found */ if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many)) { /* Contraction head followed by a wildcard, quit */ goto fill_max_and_min; } /* Check if the second letter can be contraction part, and if two letters really produce a contraction. */ if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] && cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40]) { /* Contraction found */ if (charlen == 1 || min_str + 2 >= min_end) { /* Full contraction doesn't fit, quit */ goto fill_max_and_min; } /* Put contraction head */ *min_str++= *max_str++= *ptr++; *min_str++= *max_str++= *ptr++; charlen--; } } /* Put contraction tail, or a single character */ *min_str++= *max_str++ = ptr[0]; *min_str++= *max_str++ = ptr[1]; } Loading Loading
mysql-test/r/ctype_uca.result +92 −0 Original line number Diff line number Diff line Loading @@ -2663,3 +2663,95 @@ COUNT(*) c1 1 1 a DROP TABLE IF EXISTS t1; set names utf8; create table t1 ( a varchar(255), key a(a) ) character set utf8 collate utf8_danish_ci; insert into t1 values ('åaaaa'),('ååaaa'),('aaaaa'); select a as like_a from t1 where a like 'a%'; like_a aaaaa select a as like_aa from t1 where a like 'aa%'; like_aa aaaaa select a as like_aaa from t1 where a like 'aaa%'; like_aaa aaaaa select a as like_aaaa from t1 where a like 'aaaa%'; like_aaaa aaaaa select a as like_aaaaa from t1 where a like 'aaaaa%'; like_aaaaa aaaaa alter table t1 convert to character set ucs2 collate ucs2_danish_ci; select a as like_a from t1 where a like 'a%'; like_a aaaaa select a as like_aa from t1 where a like 'aa%'; like_aa aaaaa select a as like_aaa from t1 where a like 'aaa%'; like_aaa aaaaa select a as like_aaaa from t1 where a like 'aaaa%'; like_aaaa aaaaa select a as like_aaaaa from t1 where a like 'aaaaa%'; like_aaaaa aaaaa drop table t1; create table t1 ( a varchar(255), key(a) ) character set utf8 collate utf8_spanish2_ci; insert into t1 values ('aaaaa'),('lllll'),('zzzzz'); select a as like_l from t1 where a like 'l%'; like_l lllll select a as like_ll from t1 where a like 'll%'; like_ll lllll select a as like_lll from t1 where a like 'lll%'; like_lll lllll select a as like_llll from t1 where a like 'llll%'; like_llll lllll select a as like_lllll from t1 where a like 'lllll%'; like_lllll lllll alter table t1 convert to character set ucs2 collate ucs2_spanish2_ci; select a as like_l from t1 where a like 'l%'; like_l lllll select a as like_ll from t1 where a like 'll%'; like_ll lllll select a as like_lll from t1 where a like 'lll%'; like_lll lllll select a as like_llll from t1 where a like 'llll%'; like_llll lllll select a as like_lllll from t1 where a like 'lllll%'; like_lllll lllll drop table t1; create table t1 ( a varchar(255), key a(a) ) character set utf8 collate utf8_czech_ci; insert into t1 values ('b'),('c'),('d'),('e'),('f'),('g'),('h'),('ch'),('i'),('j'); select * from t1 where a like 'c%'; a c ch alter table t1 convert to character set ucs2 collate ucs2_czech_ci; select * from t1 where a like 'c%'; a c ch drop table t1; End for 5.0 tests
mysql-test/t/ctype_uca.test +54 −0 Original line number Diff line number Diff line Loading @@ -485,3 +485,57 @@ CREATE TABLE t1 ( insert into t1 values (''),('a'); SELECT COUNT(*), c1 FROM t1 GROUP BY c1; DROP TABLE IF EXISTS t1; # # Bug#27345 Incorrect data returned when range-read from utf8_danish_ci indexes # set names utf8; create table t1 ( a varchar(255), key a(a) ) character set utf8 collate utf8_danish_ci; insert into t1 values ('åaaaa'),('ååaaa'),('aaaaa'); select a as like_a from t1 where a like 'a%'; select a as like_aa from t1 where a like 'aa%'; select a as like_aaa from t1 where a like 'aaa%'; select a as like_aaaa from t1 where a like 'aaaa%'; select a as like_aaaaa from t1 where a like 'aaaaa%'; alter table t1 convert to character set ucs2 collate ucs2_danish_ci; select a as like_a from t1 where a like 'a%'; select a as like_aa from t1 where a like 'aa%'; select a as like_aaa from t1 where a like 'aaa%'; select a as like_aaaa from t1 where a like 'aaaa%'; select a as like_aaaaa from t1 where a like 'aaaaa%'; drop table t1; create table t1 ( a varchar(255), key(a) ) character set utf8 collate utf8_spanish2_ci; insert into t1 values ('aaaaa'),('lllll'),('zzzzz'); select a as like_l from t1 where a like 'l%'; select a as like_ll from t1 where a like 'll%'; select a as like_lll from t1 where a like 'lll%'; select a as like_llll from t1 where a like 'llll%'; select a as like_lllll from t1 where a like 'lllll%'; alter table t1 convert to character set ucs2 collate ucs2_spanish2_ci; select a as like_l from t1 where a like 'l%'; select a as like_ll from t1 where a like 'll%'; select a as like_lll from t1 where a like 'lll%'; select a as like_llll from t1 where a like 'llll%'; select a as like_lllll from t1 where a like 'lllll%'; drop table t1; create table t1 ( a varchar(255), key a(a) ) character set utf8 collate utf8_czech_ci; -- In Czech 'ch' is a single letter between 'h' and 'i' insert into t1 values ('b'),('c'),('d'),('e'),('f'),('g'),('h'),('ch'),('i'),('j'); select * from t1 where a like 'c%'; alter table t1 convert to character set ucs2 collate ucs2_czech_ci; select * from t1 where a like 'c%'; drop table t1; -- echo End for 5.0 tests
strings/ctype-mb.c +70 −1 Original line number Diff line number Diff line Loading @@ -563,6 +563,8 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, char *min_end= min_str + res_length; char *max_end= max_str + res_length; uint maxcharlen= res_length / cs->mbmaxlen; const char *contraction_flags= cs->contractions ? ((const char*) cs->contractions) + 0x40*0x40 : NULL; for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--) { Loading @@ -571,6 +573,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, ptr++; /* Skip escape */ else if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */ { fill_max_and_min: /* Calculate length of keys: 'a\0\0... is the smallest possible string when we have space expand Loading Loading @@ -602,8 +605,74 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, *min_str++= *max_str++= *ptr++; } else *min_str++= *max_str++= *ptr++; { /* Special case for collations with contractions. For example, in Chezh, 'ch' is a separate letter which is sorted between 'h' and 'i'. If the pattern 'abc%', 'c' at the end can mean: - letter 'c' itself, - beginning of the contraction 'ch'. If we simply return this LIKE range: 'abc\min\min\min' and 'abc\max\max\max' then this query: SELECT * FROM t1 WHERE a LIKE 'abc%' will only find values starting from 'abc[^h]', but won't find values starting from 'abch'. We must ignore contraction heads followed by w_one or w_many. ('Contraction head' means any letter which can be the first letter in a contraction) For example, for Czech 'abc%', we will return LIKE range, which is equal to LIKE range for 'ab%': 'ab\min\min\min\min' and 'ab\max\max\max\max'. */ if (contraction_flags && ptr + 1 < end && contraction_flags[(uchar) *ptr]) { /* Ptr[0] is a contraction head. */ if (ptr[1] == w_one || ptr[1] == w_many) { /* Contraction head followed by a wildcard, quit. */ goto fill_max_and_min; } /* Some letters can be both contraction heads and contraction tails. For example, in Danish 'aa' is a separate single letter which is sorted after 'z'. So 'a' can be both head and tail. If ptr[0]+ptr[1] is a contraction, then put both letters together. If ptr[1] can be a contraction part, but ptr[0]+ptr[1] is not a contraction, then we put only ptr[0], and continue with ptr[1] on the next loop. */ if (contraction_flags[(uchar) ptr[1]] && cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40]) { /* Contraction found */ if (maxcharlen == 1 || min_str + 1 >= min_end) { /* Both contraction parts don't fit, quit */ goto fill_max_and_min; } /* Put contraction head */ *min_str++= *max_str++= *ptr++; maxcharlen--; } } /* Put contraction tail, or a single character */ *min_str++= *max_str++= *ptr++; } } *min_length= *max_length = (uint) (min_str - min_org); Loading
strings/ctype-uca.c +10 −1 Original line number Diff line number Diff line Loading @@ -7937,10 +7937,16 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) /* Now process contractions */ if (ncontractions) { uint size= 0x40*0x40*sizeof(uint16); /* 8K, for basic latin letter only */ /* 8K for weights for basic latin letter pairs, plus 256 bytes for "is contraction part" flags. */ uint size= 0x40*0x40*sizeof(uint16) + 256; char *contraction_flags; if (!(cs->contractions= (uint16*) (*alloc)(size))) return 1; bzero((void*)cs->contractions, size); contraction_flags= ((char*) cs->contractions) + 0x40*0x40; for (i=0; i < rc; i++) { if (rule[i].curr[1]) Loading @@ -7966,6 +7972,9 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) /* Copy base weight applying primary difference */ cs->contractions[offsc]= offsb[0] + rule[i].diff[0]; /* Mark both letters as "is contraction part */ contraction_flags[rule[i].curr[0]]= 1; contraction_flags[rule[i].curr[1]]= 1; } } } Loading
strings/ctype-ucs2.c +35 −0 Original line number Diff line number Diff line Loading @@ -1524,6 +1524,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, char *min_org=min_str; char *min_end=min_str+res_length; uint charlen= res_length / cs->mbmaxlen; const char *contraction_flags= cs->contractions ? ((const char*) cs->contractions) + 0x40*0x40 : NULL; for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0 ; ptr+=2, charlen--) Loading @@ -1545,6 +1547,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, } if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */ { fill_max_and_min: /* Calculate length of keys: 'a\0\0... is the smallest possible string when we have space expand Loading @@ -1561,6 +1564,38 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, } while (min_str + 1 < min_end); return 0; } if (contraction_flags && ptr + 3 < end && ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]]) { /* Contraction head found */ if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many)) { /* Contraction head followed by a wildcard, quit */ goto fill_max_and_min; } /* Check if the second letter can be contraction part, and if two letters really produce a contraction. */ if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] && cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40]) { /* Contraction found */ if (charlen == 1 || min_str + 2 >= min_end) { /* Full contraction doesn't fit, quit */ goto fill_max_and_min; } /* Put contraction head */ *min_str++= *max_str++= *ptr++; *min_str++= *max_str++= *ptr++; charlen--; } } /* Put contraction tail, or a single character */ *min_str++= *max_str++ = ptr[0]; *min_str++= *max_str++ = ptr[1]; } Loading