Loading mysql-test/r/fulltext2.result +8 −0 Original line number Diff line number Diff line Loading @@ -241,3 +241,11 @@ select * from t1 where match a against('ab c' in boolean mode); a drop table t1; set names latin1; SET NAMES utf8; CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8; INSERT INTO t1 VALUES('„MySQL“'); SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE); a „MySQL“ DROP TABLE t1; SET NAMES latin1; mysql-test/t/fulltext2.test +10 −0 Original line number Diff line number Diff line Loading @@ -221,3 +221,13 @@ drop table t1; set names latin1; # End of 4.1 tests # # BUG#19580 - FULLTEXT search produces wrong results on UTF-8 columns # SET NAMES utf8; CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8; INSERT INTO t1 VALUES('„MySQL“'); SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE); DROP TABLE t1; SET NAMES latin1; storage/myisam/ft_parser.c +22 −10 Original line number Diff line number Diff line Loading @@ -111,6 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, FT_WORD *word, MYSQL_FTPARSER_BOOLEAN_INFO *param) { byte *doc=*start; int ctype; uint mwc, length, mbl; param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); Loading @@ -119,9 +120,11 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, while (doc<end) { for (;doc<end;doc++) for (; doc < end; doc+= (mbl > 0 ? mbl : 1)) { if (true_word_char(cs,*doc)) break; mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) break; if (*doc == FTB_RQUOT && param->quot) { param->quot=doc; Loading Loading @@ -155,14 +158,16 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, } mwc=length=0; for (word->pos=doc; doc<end; length++, mbl=my_mbcharlen(cs, *(uchar *)doc), doc+=(mbl ? mbl : 1)) if (true_word_char(cs,*doc)) for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) { mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) mwc=0; else if (!misc_word_char(*doc) || mwc) break; else mwc++; } param->prev='A'; /* be sure *prev is true_word_char */ word->len= (uint)(doc-word->pos) - mwc; if ((param->trunc=(doc<end && *doc == FTB_TRUNC))) Loading Loading @@ -197,24 +202,31 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, { byte *doc= *start; uint mwc, length, mbl; int ctype; DBUG_ENTER("ft_simple_get_word"); do { for (;; doc++) for (;; doc+= (mbl > 0 ? mbl : 1)) { if (doc >= end) DBUG_RETURN(0); if (true_word_char(cs, *doc)) break; if (doc >= end) DBUG_RETURN(0); mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) break; } mwc= length= 0; for (word->pos=doc; doc<end; length++, mbl=my_mbcharlen(cs, *(uchar *)doc), doc+=(mbl ? mbl : 1)) if (true_word_char(cs,*doc)) for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) { mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) mwc= 0; else if (!misc_word_char(*doc) || mwc) break; else mwc++; } word->len= (uint)(doc-word->pos) - mwc; Loading storage/myisam/ft_update.c +0 −5 Original line number Diff line number Diff line Loading @@ -174,11 +174,6 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) FT_SEG_ITERATOR ftsi1, ftsi2; CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; DBUG_ENTER("_mi_ft_cmp"); #ifndef MYSQL_HAS_TRUE_CTYPE_IMPLEMENTATION if (cs->mbmaxlen > 1) DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); #endif _mi_ft_segiterator_init(info, keynr, rec1, &ftsi1); _mi_ft_segiterator_init(info, keynr, rec2, &ftsi2); Loading storage/myisam/ftdefs.h +3 −2 Original line number Diff line number Diff line Loading @@ -24,9 +24,10 @@ #include <queues.h> #include <mysql/plugin.h> #define true_word_char(s,X) (my_isalnum(s,X) || (X)=='_') #define true_word_char(ctype, character) \ ((ctype) & (_MY_U | _MY_L | _MY_NMR) || \ (character) == '_') #define misc_word_char(X) 0 #define word_char(s,X) (true_word_char(s,X) || misc_word_char(X)) #define FT_MAX_WORD_LEN_FOR_SORT 31 Loading Loading
mysql-test/r/fulltext2.result +8 −0 Original line number Diff line number Diff line Loading @@ -241,3 +241,11 @@ select * from t1 where match a against('ab c' in boolean mode); a drop table t1; set names latin1; SET NAMES utf8; CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8; INSERT INTO t1 VALUES('„MySQL“'); SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE); a „MySQL“ DROP TABLE t1; SET NAMES latin1;
mysql-test/t/fulltext2.test +10 −0 Original line number Diff line number Diff line Loading @@ -221,3 +221,13 @@ drop table t1; set names latin1; # End of 4.1 tests # # BUG#19580 - FULLTEXT search produces wrong results on UTF-8 columns # SET NAMES utf8; CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8; INSERT INTO t1 VALUES('„MySQL“'); SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE); DROP TABLE t1; SET NAMES latin1;
storage/myisam/ft_parser.c +22 −10 Original line number Diff line number Diff line Loading @@ -111,6 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, FT_WORD *word, MYSQL_FTPARSER_BOOLEAN_INFO *param) { byte *doc=*start; int ctype; uint mwc, length, mbl; param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); Loading @@ -119,9 +120,11 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, while (doc<end) { for (;doc<end;doc++) for (; doc < end; doc+= (mbl > 0 ? mbl : 1)) { if (true_word_char(cs,*doc)) break; mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) break; if (*doc == FTB_RQUOT && param->quot) { param->quot=doc; Loading Loading @@ -155,14 +158,16 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, } mwc=length=0; for (word->pos=doc; doc<end; length++, mbl=my_mbcharlen(cs, *(uchar *)doc), doc+=(mbl ? mbl : 1)) if (true_word_char(cs,*doc)) for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) { mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) mwc=0; else if (!misc_word_char(*doc) || mwc) break; else mwc++; } param->prev='A'; /* be sure *prev is true_word_char */ word->len= (uint)(doc-word->pos) - mwc; if ((param->trunc=(doc<end && *doc == FTB_TRUNC))) Loading Loading @@ -197,24 +202,31 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, { byte *doc= *start; uint mwc, length, mbl; int ctype; DBUG_ENTER("ft_simple_get_word"); do { for (;; doc++) for (;; doc+= (mbl > 0 ? mbl : 1)) { if (doc >= end) DBUG_RETURN(0); if (true_word_char(cs, *doc)) break; if (doc >= end) DBUG_RETURN(0); mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) break; } mwc= length= 0; for (word->pos=doc; doc<end; length++, mbl=my_mbcharlen(cs, *(uchar *)doc), doc+=(mbl ? mbl : 1)) if (true_word_char(cs,*doc)) for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) { mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); if (true_word_char(ctype, *doc)) mwc= 0; else if (!misc_word_char(*doc) || mwc) break; else mwc++; } word->len= (uint)(doc-word->pos) - mwc; Loading
storage/myisam/ft_update.c +0 −5 Original line number Diff line number Diff line Loading @@ -174,11 +174,6 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) FT_SEG_ITERATOR ftsi1, ftsi2; CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; DBUG_ENTER("_mi_ft_cmp"); #ifndef MYSQL_HAS_TRUE_CTYPE_IMPLEMENTATION if (cs->mbmaxlen > 1) DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); #endif _mi_ft_segiterator_init(info, keynr, rec1, &ftsi1); _mi_ft_segiterator_init(info, keynr, rec2, &ftsi2); Loading
storage/myisam/ftdefs.h +3 −2 Original line number Diff line number Diff line Loading @@ -24,9 +24,10 @@ #include <queues.h> #include <mysql/plugin.h> #define true_word_char(s,X) (my_isalnum(s,X) || (X)=='_') #define true_word_char(ctype, character) \ ((ctype) & (_MY_U | _MY_L | _MY_NMR) || \ (character) == '_') #define misc_word_char(X) 0 #define word_char(s,X) (true_word_char(s,X) || misc_word_char(X)) #define FT_MAX_WORD_LEN_FOR_SORT 31 Loading