Loading include/m_ctype.h +5 −0 Original line number Diff line number Diff line Loading @@ -365,6 +365,11 @@ uint my_instr_mb(struct charset_info_st *, const char *s, uint s_length, my_match_t *match, uint nmatch); int my_wildcmp_unicode(CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, MY_UNICASE_INFO **weights); extern my_bool my_parse_charset_xml(const char *bug, uint len, int (*add)(CHARSET_INFO *cs)); Loading mysql-test/r/ctype_utf8.result +9 −0 Original line number Diff line number Diff line Loading @@ -63,6 +63,15 @@ select 'A' like 'a' collate utf8_bin; select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%') 1 select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8); convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8) 1 select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8) 1 select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8) 1 SELECT 'a' = 'a '; 'a' = 'a ' 1 Loading mysql-test/t/ctype_utf8.test +8 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,14 @@ select 'A' like 'a'; select 'A' like 'a' collate utf8_bin; select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); # Bug #6040: can't retrieve records with umlaut # characters in case insensitive manner. # Case insensitive search LIKE comparison # was broken for multibyte characters: select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8); select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); # # Check the following: # "a" == "a " Loading strings/ctype-ucs2.c +4 −162 Original line number Diff line number Diff line Loading @@ -1231,171 +1231,13 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), } /* ** Compare string against string with wildcard ** 0 if matched ** -1 if not matched with wildcard ** 1 if matched with wildcard */ static int my_wildcmp_ucs2(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many, MY_UNICASE_INFO **weights) { int result= -1; /* Not found, using wildcards */ my_wc_t s_wc, w_wc; int scan, plane; while (wildstr != wildend) { while (1) { scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; if (w_wc == (my_wc_t)escape) { wildstr+= scan; scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; } if (w_wc == (my_wc_t)w_many) { result= 1; /* Found an anchor char */ break; } wildstr+= scan; scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end); if (scan <=0) return 1; str+= scan; if (w_wc == (my_wc_t)w_one) { result= 1; /* Found an anchor char */ } else { if (weights) { plane=(s_wc>>8) & 0xFF; s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; plane=(w_wc>>8) & 0xFF; w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; } if (s_wc != w_wc) return 1; /* No match */ } if (wildstr == wildend) return (str != str_end); /* Match if both are at end */ } if (w_wc == (my_wc_t)w_many) { /* Found w_many */ /* Remove any '%' and '_' from the wild search string */ for ( ; wildstr != wildend ; ) { scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; if (w_wc == (my_wc_t)w_many) { wildstr+= scan; continue; } if (w_wc == (my_wc_t)w_one) { wildstr+= scan; scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end); if (scan <=0) return 1; str+= scan; continue; } break; /* Not a wild character */ } if (wildstr == wildend) return 0; /* Ok if w_many is last */ if (str == str_end) return -1; scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; if (w_wc == (my_wc_t)escape) { wildstr+= scan; scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; } while (1) { /* Skip until the first character from wildstr is found */ while (str != str_end) { scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str, (const uchar*)str_end); if (scan <= 0) return 1; if (weights) { plane=(s_wc>>8) & 0xFF; s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; plane=(w_wc>>8) & 0xFF; w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; } if (s_wc == w_wc) break; str+= scan; } if (str == str_end) return -1; result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape, w_one,w_many,weights); if (result <= 0) return result; str+= scan; } } } return (str != str_end ? 1 : 0); } static int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, escape,w_one,w_many,uni_plane); } Loading @@ -1406,7 +1248,7 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, escape,w_one,w_many,NULL); } Loading strings/ctype-utf8.c +167 −1 Original line number Diff line number Diff line Loading @@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={ }; /* ** Compare string against string with wildcard ** This function is used in UTF8 and UCS2 ** ** 0 if matched ** -1 if not matched with wildcard ** 1 if matched with wildcard */ int my_wildcmp_unicode(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many, MY_UNICASE_INFO **weights) { int result= -1; /* Not found, using wildcards */ my_wc_t s_wc, w_wc; int scan, plane; int (*mb_wc)(struct charset_info_st *cs, my_wc_t *wc, const unsigned char *s,const unsigned char *e); mb_wc= cs->cset->mb_wc; while (wildstr != wildend) { while (1) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <= 0) return 1; if (w_wc == (my_wc_t)escape) { wildstr+= scan; if ((scan= mb_wc(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <= 0) return 1; } if (w_wc == (my_wc_t)w_many) { result= 1; /* Found an anchor char */ break; } wildstr+= scan; if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, (const uchar*)str_end)) <=0) return 1; str+= scan; if (w_wc == (my_wc_t)w_one) { result= 1; /* Found an anchor char */ } else { if (weights) { plane=(s_wc>>8) & 0xFF; s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; plane=(w_wc>>8) & 0xFF; w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; } if (s_wc != w_wc) return 1; /* No match */ } if (wildstr == wildend) return (str != str_end); /* Match if both are at end */ } if (w_wc == (my_wc_t)w_many) { /* Found w_many */ /* Remove any '%' and '_' from the wild search string */ for ( ; wildstr != wildend ; ) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <= 0) return 1; if (w_wc == (my_wc_t)w_many) { wildstr+= scan; continue; } if (w_wc == (my_wc_t)w_one) { wildstr+= scan; if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, (const uchar*)str_end)) <=0) return 1; str+= scan; continue; } break; /* Not a wild character */ } if (wildstr == wildend) return 0; /* Ok if w_many is last */ if (str == str_end) return -1; if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <=0) return 1; if (w_wc == (my_wc_t)escape) { wildstr+= scan; if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <=0) return 1; } while (1) { /* Skip until the first character from wildstr is found */ while (str != str_end) { if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, (const uchar*)str_end)) <=0) return 1; if (weights) { plane=(s_wc>>8) & 0xFF; s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; plane=(w_wc>>8) & 0xFF; w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; } if (s_wc == w_wc) break; str+= scan; } if (str == str_end) return -1; result= my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, weights); if (result <= 0) return result; str+= scan; } } } return (str != str_end ? 1 : 0); } #endif Loading Loading @@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) return my_strncasecmp_utf8(cs, s, t, len); } static int my_wildcmp_utf8(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, escape,w_one,w_many,uni_plane); } static int my_strnxfrm_utf8(CHARSET_INFO *cs, uchar *dst, uint dstlen, const uchar *src, uint srclen) Loading Loading @@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_strnncollsp_utf8, my_strnxfrm_utf8, my_like_range_mb, my_wildcmp_mb, my_wildcmp_utf8, my_strcasecmp_utf8, my_instr_mb, my_hash_sort_utf8 Loading Loading
include/m_ctype.h +5 −0 Original line number Diff line number Diff line Loading @@ -365,6 +365,11 @@ uint my_instr_mb(struct charset_info_st *, const char *s, uint s_length, my_match_t *match, uint nmatch); int my_wildcmp_unicode(CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, MY_UNICASE_INFO **weights); extern my_bool my_parse_charset_xml(const char *bug, uint len, int (*add)(CHARSET_INFO *cs)); Loading
mysql-test/r/ctype_utf8.result +9 −0 Original line number Diff line number Diff line Loading @@ -63,6 +63,15 @@ select 'A' like 'a' collate utf8_bin; select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%') 1 select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8); convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8) 1 select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8) 1 select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8) 1 SELECT 'a' = 'a '; 'a' = 'a ' 1 Loading
mysql-test/t/ctype_utf8.test +8 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,14 @@ select 'A' like 'a'; select 'A' like 'a' collate utf8_bin; select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); # Bug #6040: can't retrieve records with umlaut # characters in case insensitive manner. # Case insensitive search LIKE comparison # was broken for multibyte characters: select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8); select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8); # # Check the following: # "a" == "a " Loading
strings/ctype-ucs2.c +4 −162 Original line number Diff line number Diff line Loading @@ -1231,171 +1231,13 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), } /* ** Compare string against string with wildcard ** 0 if matched ** -1 if not matched with wildcard ** 1 if matched with wildcard */ static int my_wildcmp_ucs2(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many, MY_UNICASE_INFO **weights) { int result= -1; /* Not found, using wildcards */ my_wc_t s_wc, w_wc; int scan, plane; while (wildstr != wildend) { while (1) { scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; if (w_wc == (my_wc_t)escape) { wildstr+= scan; scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; } if (w_wc == (my_wc_t)w_many) { result= 1; /* Found an anchor char */ break; } wildstr+= scan; scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end); if (scan <=0) return 1; str+= scan; if (w_wc == (my_wc_t)w_one) { result= 1; /* Found an anchor char */ } else { if (weights) { plane=(s_wc>>8) & 0xFF; s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; plane=(w_wc>>8) & 0xFF; w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; } if (s_wc != w_wc) return 1; /* No match */ } if (wildstr == wildend) return (str != str_end); /* Match if both are at end */ } if (w_wc == (my_wc_t)w_many) { /* Found w_many */ /* Remove any '%' and '_' from the wild search string */ for ( ; wildstr != wildend ; ) { scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; if (w_wc == (my_wc_t)w_many) { wildstr+= scan; continue; } if (w_wc == (my_wc_t)w_one) { wildstr+= scan; scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end); if (scan <=0) return 1; str+= scan; continue; } break; /* Not a wild character */ } if (wildstr == wildend) return 0; /* Ok if w_many is last */ if (str == str_end) return -1; scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; if (w_wc == (my_wc_t)escape) { wildstr+= scan; scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend); if (scan <= 0) return 1; } while (1) { /* Skip until the first character from wildstr is found */ while (str != str_end) { scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str, (const uchar*)str_end); if (scan <= 0) return 1; if (weights) { plane=(s_wc>>8) & 0xFF; s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; plane=(w_wc>>8) & 0xFF; w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; } if (s_wc == w_wc) break; str+= scan; } if (str == str_end) return -1; result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape, w_one,w_many,weights); if (result <= 0) return result; str+= scan; } } } return (str != str_end ? 1 : 0); } static int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, escape,w_one,w_many,uni_plane); } Loading @@ -1406,7 +1248,7 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, escape,w_one,w_many,NULL); } Loading
strings/ctype-utf8.c +167 −1 Original line number Diff line number Diff line Loading @@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={ }; /* ** Compare string against string with wildcard ** This function is used in UTF8 and UCS2 ** ** 0 if matched ** -1 if not matched with wildcard ** 1 if matched with wildcard */ int my_wildcmp_unicode(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many, MY_UNICASE_INFO **weights) { int result= -1; /* Not found, using wildcards */ my_wc_t s_wc, w_wc; int scan, plane; int (*mb_wc)(struct charset_info_st *cs, my_wc_t *wc, const unsigned char *s,const unsigned char *e); mb_wc= cs->cset->mb_wc; while (wildstr != wildend) { while (1) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <= 0) return 1; if (w_wc == (my_wc_t)escape) { wildstr+= scan; if ((scan= mb_wc(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <= 0) return 1; } if (w_wc == (my_wc_t)w_many) { result= 1; /* Found an anchor char */ break; } wildstr+= scan; if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, (const uchar*)str_end)) <=0) return 1; str+= scan; if (w_wc == (my_wc_t)w_one) { result= 1; /* Found an anchor char */ } else { if (weights) { plane=(s_wc>>8) & 0xFF; s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; plane=(w_wc>>8) & 0xFF; w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; } if (s_wc != w_wc) return 1; /* No match */ } if (wildstr == wildend) return (str != str_end); /* Match if both are at end */ } if (w_wc == (my_wc_t)w_many) { /* Found w_many */ /* Remove any '%' and '_' from the wild search string */ for ( ; wildstr != wildend ; ) { if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <= 0) return 1; if (w_wc == (my_wc_t)w_many) { wildstr+= scan; continue; } if (w_wc == (my_wc_t)w_one) { wildstr+= scan; if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, (const uchar*)str_end)) <=0) return 1; str+= scan; continue; } break; /* Not a wild character */ } if (wildstr == wildend) return 0; /* Ok if w_many is last */ if (str == str_end) return -1; if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <=0) return 1; if (w_wc == (my_wc_t)escape) { wildstr+= scan; if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, (const uchar*)wildend)) <=0) return 1; } while (1) { /* Skip until the first character from wildstr is found */ while (str != str_end) { if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, (const uchar*)str_end)) <=0) return 1; if (weights) { plane=(s_wc>>8) & 0xFF; s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc; plane=(w_wc>>8) & 0xFF; w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc; } if (s_wc == w_wc) break; str+= scan; } if (str == str_end) return -1; result= my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, weights); if (result <= 0) return result; str+= scan; } } } return (str != str_end ? 1 : 0); } #endif Loading Loading @@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) return my_strncasecmp_utf8(cs, s, t, len); } static int my_wildcmp_utf8(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, escape,w_one,w_many,uni_plane); } static int my_strnxfrm_utf8(CHARSET_INFO *cs, uchar *dst, uint dstlen, const uchar *src, uint srclen) Loading Loading @@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = my_strnncollsp_utf8, my_strnxfrm_utf8, my_like_range_mb, my_wildcmp_mb, my_wildcmp_utf8, my_strcasecmp_utf8, my_instr_mb, my_hash_sort_utf8 Loading