Loading myisam/ft_boolean_search.c +57 −22 Original line number Diff line number Diff line Loading @@ -59,6 +59,7 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ typedef struct st_ftb_expr FTB_EXPR; struct st_ftb_expr { FTB_EXPR *up; byte *quot, *qend; float weight; uint flags; my_off_t docid[2]; /* for index search and for scan */ Loading Loading @@ -126,6 +127,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, return; param.prev=' '; param.quot=up->quot; while ((res=ft_get_word(start,end,&w,¶m))) { int r=param.plusminus; Loading @@ -149,7 +151,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbw->word[0]=w.len; if (param.yesno > 0) up->ythresh++; queue_insert(& ftb->queue, (byte *)ftbw); ftb->with_scan|=param.trunc; ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC); break; case 2: /* left bracket */ ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); Loading @@ -160,10 +162,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbe->up=up; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; if ((ftbe->quot=param.quot)) ftb->with_scan|=2; if (param.yesno > 0) up->ythresh++; _ftb_parse_query(ftb, start, end, ftbe, depth+1); break; case 3: /* right bracket */ if (up->quot) up->qend=param.quot; return; } } Loading Loading @@ -260,7 +264,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ftbe->weight=1; ftbe->flags=FTB_FLAG_YES; ftbe->nos=1; ftbe->up=0; ftbe->quot=ftbe->up=0; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; ftb->root=ftbe; Loading @@ -270,16 +274,39 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements); qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *), (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset); if (ftb->queue.elements<2) ftb->with_scan=0; if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC; ftb->state=READY; return ftb; } void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) /* returns 1 if str0 contain str1 */ int _ftb_strstr(const byte *s0, const byte *e0, const byte *s1, const byte *e1, CHARSET_INFO *cs) { const byte *p; while (s0 < e0) { while (s0 < e0 && cs->to_upper[*s0++] != cs->to_upper[*s1]) /* no-op */; if (s0 >= e0) return 0; p=s1+1; while (s0 < e0 && p < e1 && cs->to_upper[*s0++] == cs->to_upper[*p++]) /* no-op */; if (p >= e1) return 1; } return 0; } void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) { FT_SEG_ITERATOR ftsi; FTB_EXPR *ftbe; float weight=ftbw->weight; int yn=ftbw->flags, ythresh; int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0); my_off_t curdoc=ftbw->docid[mode]; for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) Loading @@ -300,6 +327,20 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) { yn=ftbe->flags; weight=ftbe->cur_weight*ftbe->weight; if (mode && ftbe->quot) { int not_found=1; memcpy(&ftsi, ftsi_orig, sizeof(ftsi)); while (_mi_ft_segiterator(&ftsi) && not_found) { if (!ftsi.pos) continue; not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len, ftbe->quot, ftbe->qend, ftb->charset); } if (not_found) break; } /* ftbe->quot */ } else break; Loading Loading @@ -356,7 +397,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) { while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) { _ftb_climb_the_tree(ftbw,0); _ftb_climb_the_tree(ftb, ftbw, 0); /* update queue */ r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY, Loading Loading @@ -414,7 +455,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) FT_WORD word; FTB_WORD *ftbw; FTB_EXPR *ftbe; FT_SEG_ITERATOR ftsi; FT_SEG_ITERATOR ftsi, ftsi2; const byte *end; my_off_t docid=ftb->info->lastpos; Loading @@ -423,17 +464,11 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) if (!ftb->queue.elements) return 0; #if NOT_USED if (ftb->state == READY || ftb->state == INDEX_DONE) ftb->state=SCAN; else if (ftb->state != SCAN) return -3.0; #endif if (ftb->keynr==NO_SUCH_KEY) _mi_ft_segiterator_dummy_init(record, length, &ftsi); else _mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi); memcpy(&ftsi2, &ftsi, sizeof(ftsi)); while (_mi_ft_segiterator(&ftsi)) { Loading Loading @@ -464,7 +499,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) if (ftbw->docid[1] == docid) continue; ftbw->docid[1]=docid; _ftb_climb_the_tree(ftbw,1); _ftb_climb_the_tree(ftb, ftbw, &ftsi2); } } } Loading myisam/ft_parser.c +11 −3 Original line number Diff line number Diff line Loading @@ -133,13 +133,20 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) for (;doc<end;doc++) { if (true_word_char(*doc)) break; if (*doc == FTB_LBR || *doc == FTB_RBR) if (*doc == FTB_RQUOT && param->quot) { param->quot=doc-1; *start=doc+1; return 3; /* FTB_RBR */ } if ((*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) && !param->quot) { /* param->prev=' '; */ *start=doc+1; if (*doc == FTB_LQUOT) param->quot=*start; return (*doc == FTB_RBR)+2; } if (param->prev == ' ') if (param->prev == ' ' && !param->quot) { if (*doc == FTB_YES ) { param->yesno=+1; continue; } else if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else Loading @@ -149,7 +156,8 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) if (*doc == FTB_NEG ) { param->pmsign=!param->pmsign; continue; } } param->prev=*doc; param->yesno=param->plusminus=param->pmsign=0; param->yesno=(param->quot != 0); param->plusminus=param->pmsign=0; } mwc=0; Loading myisam/ftdefs.h +4 −1 Original line number Diff line number Diff line Loading @@ -95,6 +95,8 @@ extern ulong collstat; #define FTB_RBR (ft_boolean_syntax[6]) #define FTB_NEG (ft_boolean_syntax[7]) #define FTB_TRUNC (ft_boolean_syntax[8]) #define FTB_LQUOT (ft_boolean_syntax[10]) #define FTB_RQUOT (ft_boolean_syntax[11]) typedef struct st_ft_word { byte * pos; Loading @@ -111,6 +113,7 @@ typedef struct st_ftb_param { int plusminus; bool pmsign; bool trunc; byte *quot; } FTB_PARAM; int is_stopword(char *word, uint len); Loading @@ -132,7 +135,7 @@ uint _mi_ft_segiterator(FT_SEG_ITERATOR *); void ft_parse_init(TREE *, CHARSET_INFO *); int ft_parse(TREE *, byte *, int); FT_WORD * ft_linearize(/*MI_INFO *, uint, byte *, */TREE *); FT_WORD * ft_linearize(TREE *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *); uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record); Loading mysql-test/r/fulltext.result +3 −0 Original line number Diff line number Diff line Loading @@ -67,6 +67,9 @@ Full-text indexes are called collections 1 Only MyISAM tables support collections 2 Function MATCH ... AGAINST() is used to do a search 0 Full-text search in MySQL implements vector space model 0 select * from t1 where MATCH a,b AGAINST ('"Now sUPPort"' IN BOOLEAN MODE); a b MySQL has now support for full-text search select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); a b Full-text search in MySQL implements vector space model Loading mysql-test/t/fulltext.test +2 −1 Original line number Diff line number Diff line Loading @@ -20,7 +20,6 @@ select * from t1 where MATCH(a,b) AGAINST ("indexes collections"); # UNION of fulltext's select * from t1 where MATCH(a,b) AGAINST ("collections") UNION ALL select * from t1 where MATCH(a,b) AGAINST ("indexes"); # boolean search select * from t1 where MATCH(a,b) AGAINST("support -collections" IN BOOLEAN MODE); Loading @@ -34,6 +33,8 @@ select * from t1 where MATCH(a,b) AGAINST("+search -(support vector)" IN BOOLEAN select *, MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE) as x from t1; select *, MATCH(a,b) AGAINST("collections support" IN BOOLEAN MODE) as x from t1; select * from t1 where MATCH a,b AGAINST ('"Now sUPPort"' IN BOOLEAN MODE); # boolean w/o index: select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); Loading Loading
myisam/ft_boolean_search.c +57 −22 Original line number Diff line number Diff line Loading @@ -59,6 +59,7 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ typedef struct st_ftb_expr FTB_EXPR; struct st_ftb_expr { FTB_EXPR *up; byte *quot, *qend; float weight; uint flags; my_off_t docid[2]; /* for index search and for scan */ Loading Loading @@ -126,6 +127,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, return; param.prev=' '; param.quot=up->quot; while ((res=ft_get_word(start,end,&w,¶m))) { int r=param.plusminus; Loading @@ -149,7 +151,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbw->word[0]=w.len; if (param.yesno > 0) up->ythresh++; queue_insert(& ftb->queue, (byte *)ftbw); ftb->with_scan|=param.trunc; ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC); break; case 2: /* left bracket */ ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); Loading @@ -160,10 +162,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbe->up=up; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; if ((ftbe->quot=param.quot)) ftb->with_scan|=2; if (param.yesno > 0) up->ythresh++; _ftb_parse_query(ftb, start, end, ftbe, depth+1); break; case 3: /* right bracket */ if (up->quot) up->qend=param.quot; return; } } Loading Loading @@ -260,7 +264,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ftbe->weight=1; ftbe->flags=FTB_FLAG_YES; ftbe->nos=1; ftbe->up=0; ftbe->quot=ftbe->up=0; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; ftb->root=ftbe; Loading @@ -270,16 +274,39 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements); qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *), (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset); if (ftb->queue.elements<2) ftb->with_scan=0; if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC; ftb->state=READY; return ftb; } void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) /* returns 1 if str0 contain str1 */ int _ftb_strstr(const byte *s0, const byte *e0, const byte *s1, const byte *e1, CHARSET_INFO *cs) { const byte *p; while (s0 < e0) { while (s0 < e0 && cs->to_upper[*s0++] != cs->to_upper[*s1]) /* no-op */; if (s0 >= e0) return 0; p=s1+1; while (s0 < e0 && p < e1 && cs->to_upper[*s0++] == cs->to_upper[*p++]) /* no-op */; if (p >= e1) return 1; } return 0; } void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) { FT_SEG_ITERATOR ftsi; FTB_EXPR *ftbe; float weight=ftbw->weight; int yn=ftbw->flags, ythresh; int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0); my_off_t curdoc=ftbw->docid[mode]; for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) Loading @@ -300,6 +327,20 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) { yn=ftbe->flags; weight=ftbe->cur_weight*ftbe->weight; if (mode && ftbe->quot) { int not_found=1; memcpy(&ftsi, ftsi_orig, sizeof(ftsi)); while (_mi_ft_segiterator(&ftsi) && not_found) { if (!ftsi.pos) continue; not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len, ftbe->quot, ftbe->qend, ftb->charset); } if (not_found) break; } /* ftbe->quot */ } else break; Loading Loading @@ -356,7 +397,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) { while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) { _ftb_climb_the_tree(ftbw,0); _ftb_climb_the_tree(ftb, ftbw, 0); /* update queue */ r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY, Loading Loading @@ -414,7 +455,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) FT_WORD word; FTB_WORD *ftbw; FTB_EXPR *ftbe; FT_SEG_ITERATOR ftsi; FT_SEG_ITERATOR ftsi, ftsi2; const byte *end; my_off_t docid=ftb->info->lastpos; Loading @@ -423,17 +464,11 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) if (!ftb->queue.elements) return 0; #if NOT_USED if (ftb->state == READY || ftb->state == INDEX_DONE) ftb->state=SCAN; else if (ftb->state != SCAN) return -3.0; #endif if (ftb->keynr==NO_SUCH_KEY) _mi_ft_segiterator_dummy_init(record, length, &ftsi); else _mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi); memcpy(&ftsi2, &ftsi, sizeof(ftsi)); while (_mi_ft_segiterator(&ftsi)) { Loading Loading @@ -464,7 +499,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) if (ftbw->docid[1] == docid) continue; ftbw->docid[1]=docid; _ftb_climb_the_tree(ftbw,1); _ftb_climb_the_tree(ftb, ftbw, &ftsi2); } } } Loading
myisam/ft_parser.c +11 −3 Original line number Diff line number Diff line Loading @@ -133,13 +133,20 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) for (;doc<end;doc++) { if (true_word_char(*doc)) break; if (*doc == FTB_LBR || *doc == FTB_RBR) if (*doc == FTB_RQUOT && param->quot) { param->quot=doc-1; *start=doc+1; return 3; /* FTB_RBR */ } if ((*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) && !param->quot) { /* param->prev=' '; */ *start=doc+1; if (*doc == FTB_LQUOT) param->quot=*start; return (*doc == FTB_RBR)+2; } if (param->prev == ' ') if (param->prev == ' ' && !param->quot) { if (*doc == FTB_YES ) { param->yesno=+1; continue; } else if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else Loading @@ -149,7 +156,8 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) if (*doc == FTB_NEG ) { param->pmsign=!param->pmsign; continue; } } param->prev=*doc; param->yesno=param->plusminus=param->pmsign=0; param->yesno=(param->quot != 0); param->plusminus=param->pmsign=0; } mwc=0; Loading
myisam/ftdefs.h +4 −1 Original line number Diff line number Diff line Loading @@ -95,6 +95,8 @@ extern ulong collstat; #define FTB_RBR (ft_boolean_syntax[6]) #define FTB_NEG (ft_boolean_syntax[7]) #define FTB_TRUNC (ft_boolean_syntax[8]) #define FTB_LQUOT (ft_boolean_syntax[10]) #define FTB_RQUOT (ft_boolean_syntax[11]) typedef struct st_ft_word { byte * pos; Loading @@ -111,6 +113,7 @@ typedef struct st_ftb_param { int plusminus; bool pmsign; bool trunc; byte *quot; } FTB_PARAM; int is_stopword(char *word, uint len); Loading @@ -132,7 +135,7 @@ uint _mi_ft_segiterator(FT_SEG_ITERATOR *); void ft_parse_init(TREE *, CHARSET_INFO *); int ft_parse(TREE *, byte *, int); FT_WORD * ft_linearize(/*MI_INFO *, uint, byte *, */TREE *); FT_WORD * ft_linearize(TREE *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *); uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record); Loading
mysql-test/r/fulltext.result +3 −0 Original line number Diff line number Diff line Loading @@ -67,6 +67,9 @@ Full-text indexes are called collections 1 Only MyISAM tables support collections 2 Function MATCH ... AGAINST() is used to do a search 0 Full-text search in MySQL implements vector space model 0 select * from t1 where MATCH a,b AGAINST ('"Now sUPPort"' IN BOOLEAN MODE); a b MySQL has now support for full-text search select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); a b Full-text search in MySQL implements vector space model Loading
mysql-test/t/fulltext.test +2 −1 Original line number Diff line number Diff line Loading @@ -20,7 +20,6 @@ select * from t1 where MATCH(a,b) AGAINST ("indexes collections"); # UNION of fulltext's select * from t1 where MATCH(a,b) AGAINST ("collections") UNION ALL select * from t1 where MATCH(a,b) AGAINST ("indexes"); # boolean search select * from t1 where MATCH(a,b) AGAINST("support -collections" IN BOOLEAN MODE); Loading @@ -34,6 +33,8 @@ select * from t1 where MATCH(a,b) AGAINST("+search -(support vector)" IN BOOLEAN select *, MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE) as x from t1; select *, MATCH(a,b) AGAINST("collections support" IN BOOLEAN MODE) as x from t1; select * from t1 where MATCH a,b AGAINST ('"Now sUPPort"' IN BOOLEAN MODE); # boolean w/o index: select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); Loading