Loading Docs/manual.texi +11 −4 Original line number Diff line number Diff line Loading @@ -36191,6 +36191,9 @@ others, but will not be excluded altogether, as it would be with the @item * An asterisk is the truncation operator. Unlike the other operators, it should be @strong{appended} to the word, not prepended. @item " The phrase, that is enclosed in double quotes @code{"}, matches only rows that contain this phrase @strong{literally, as it was typed}. @end table And here are some examples: Loading @@ -36199,16 +36202,18 @@ And here are some examples: @item apple banana find rows that contain at least one of these words. @item +apple +juice ... both words ... both words. @item +apple macintosh ... word ``apple'', but rank it higher if it also contain ``macintosh'' ... word ``apple'', but rank it higher if it also contain ``macintosh''. @item +apple -macintosh ... word ``apple'' but not ``macintosh'' ... word ``apple'' but not ``macintosh''. @item +apple +(>pie <strudel) ... ``apple'' and ``pie'', or ``apple'' and ``strudel'' (in any order), but rank ``apple pie'' higher than ``apple strudel''. @item apple* ... ``apple'', ``apples'', ``applesauce'', and ``applet'' ... ``apple'', ``apples'', ``applesauce'', and ``applet''. @item "some words" ... ``some words of wisdom'', but not ``some noise words''. @end table @menu Loading Loading @@ -48928,6 +48933,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item Boolean fulltext search now supports "phrase searches". @item New configure option @code{--without-query-cache}. @item Memory allocation strategy for 'root memory' changed. Block size now grows myisam/ft_boolean_search.c +70 −31 Original line number Diff line number Diff line Loading @@ -59,6 +59,7 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ typedef struct st_ftb_expr FTB_EXPR; struct st_ftb_expr { FTB_EXPR *up; byte *quot, *qend; float weight; uint flags; my_off_t docid[2]; /* for index search and for scan */ Loading @@ -84,6 +85,7 @@ typedef struct st_ft_info { struct _ft_vft *please; MI_INFO *info; uint keynr; CHARSET_INFO *charset; enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE /*, SCAN*/ } state; uint with_scan; FTB_EXPR *root; Loading @@ -101,10 +103,10 @@ int FTB_WORD_cmp(void *v __attribute__((unused)), FTB_WORD *a, FTB_WORD *b) return i; } int FTB_WORD_cmp_list(void *v __attribute__((unused)), FTB_WORD **a, FTB_WORD **b) int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { /* ORDER BY word DESC, ndepth DESC */ int i=_mi_compare_text(default_charset_info, (*b)->word+1,(*b)->len-1, int i=_mi_compare_text(cs, (*b)->word+1,(*b)->len-1, (*a)->word+1,(*a)->len-1,0); if (!i) i=CMP_NUM((*b)->ndepth,(*a)->ndepth); Loading @@ -125,6 +127,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, return; param.prev=' '; param.quot=up->quot; while ((res=ft_get_word(start,end,&w,¶m))) { int r=param.plusminus; Loading @@ -148,7 +151,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbw->word[0]=w.len; if (param.yesno > 0) up->ythresh++; queue_insert(& ftb->queue, (byte *)ftbw); ftb->with_scan|=param.trunc; ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC); break; case 2: /* left bracket */ ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); Loading @@ -159,10 +162,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbe->up=up; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; if ((ftbe->quot=param.quot)) ftb->with_scan|=2; if (param.yesno > 0) up->ythresh++; _ftb_parse_query(ftb, start, end, ftbe, depth+1); break; case 3: /* right bracket */ if (up->quot) up->qend=param.quot; return; } } Loading Loading @@ -203,7 +208,7 @@ void _ftb_init_index_search(FT_INFO *ftb) SEARCH_FIND | SEARCH_BIGGER, keyroot); if (!r) { r=_mi_compare_text(default_charset_info, r=_mi_compare_text(ftb->charset, info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), Loading Loading @@ -241,6 +246,9 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ftb->state=UNINITIALIZED; ftb->info=info; ftb->keynr=keynr; ftb->charset= ((keynr==NO_SUCH_KEY) ? default_charset_info : info->s->keyinfo[keynr].seg->charset); ftb->with_scan=0; init_alloc_root(&ftb->mem_root, 1024, 1024); Loading @@ -256,26 +264,49 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ftbe->weight=1; ftbe->flags=FTB_FLAG_YES; ftbe->nos=1; ftbe->up=0; ftbe->quot=ftbe->up=0; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; ftb->root=ftbe; _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0); ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root, sizeof(FTB_WORD *)*ftb->queue.elements); memcpy(ftb->list, ftb->queue.root, sizeof(FTB_WORD *)*ftb->queue.elements); memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements); qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *), (qsort2_cmp)FTB_WORD_cmp_list, 0); if (ftb->queue.elements<2) ftb->with_scan=0; (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset); if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC; ftb->state=READY; return ftb; } void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) /* returns 1 if str0 contain str1 */ int _ftb_strstr(const byte *s0, const byte *e0, const byte *s1, const byte *e1, CHARSET_INFO *cs) { const byte *p; while (s0 < e0) { while (s0 < e0 && cs->to_upper[*s0++] != cs->to_upper[*s1]) /* no-op */; if (s0 >= e0) return 0; p=s1+1; while (s0 < e0 && p < e1 && cs->to_upper[*s0++] == cs->to_upper[*p++]) /* no-op */; if (p >= e1) return 1; } return 0; } void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) { FT_SEG_ITERATOR ftsi; FTB_EXPR *ftbe; float weight=ftbw->weight; int yn=ftbw->flags, ythresh; int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0); my_off_t curdoc=ftbw->docid[mode]; for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) Loading @@ -296,6 +327,20 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) { yn=ftbe->flags; weight=ftbe->cur_weight*ftbe->weight; if (mode && ftbe->quot) { int not_found=1; memcpy(&ftsi, ftsi_orig, sizeof(ftsi)); while (_mi_ft_segiterator(&ftsi) && not_found) { if (!ftsi.pos) continue; not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len, ftbe->quot, ftbe->qend, ftb->charset); } if (not_found) break; } /* ftbe->quot */ } else break; Loading Loading @@ -352,14 +397,14 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) { while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) { _ftb_climb_the_tree(ftbw,0); _ftb_climb_the_tree(ftb, ftbw, 0); /* update queue */ r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY, SEARCH_BIGGER , keyroot); if (!r) { r=_mi_compare_text(default_charset_info, r=_mi_compare_text(ftb->charset, info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), Loading Loading @@ -410,7 +455,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) FT_WORD word; FTB_WORD *ftbw; FTB_EXPR *ftbe; FT_SEG_ITERATOR ftsi; FT_SEG_ITERATOR ftsi, ftsi2; const byte *end; my_off_t docid=ftb->info->lastpos; Loading @@ -419,17 +464,11 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) if (!ftb->queue.elements) return 0; #if NOT_USED if (ftb->state == READY || ftb->state == INDEX_DONE) ftb->state=SCAN; else if (ftb->state != SCAN) return -3.0; #endif if (ftb->keynr==NO_SUCH_KEY) _mi_ft_segiterator_dummy_init(record, length, &ftsi); else _mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi); memcpy(&ftsi2, &ftsi, sizeof(ftsi)); while (_mi_ft_segiterator(&ftsi)) { Loading @@ -443,7 +482,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2) { ftbw=(FTB_WORD *)(ftb->list[c]); if (_mi_compare_text(default_charset_info, word.pos,word.len, if (_mi_compare_text(ftb->charset, word.pos,word.len, (uchar*) ftbw->word+1,ftbw->len-1, (ftbw->flags&FTB_FLAG_TRUNC) ) >0) b=c; Loading @@ -453,14 +492,14 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) for (; c>=0; c--) { ftbw=(FTB_WORD *)(ftb->list[c]); if (_mi_compare_text(default_charset_info, word.pos,word.len, if (_mi_compare_text(ftb->charset, word.pos,word.len, (uchar*) ftbw->word+1,ftbw->len-1, (ftbw->flags&FTB_FLAG_TRUNC) )) break; if (ftbw->docid[1] == docid) continue; ftbw->docid[1]=docid; _ftb_climb_the_tree(ftbw,1); _ftb_climb_the_tree(ftb, ftbw, &ftsi2); } } } Loading myisam/ft_nlq_search.c +5 −2 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ struct st_ft_info { typedef struct st_all_in_one { MI_INFO *info; uint keynr; CHARSET_INFO *charset; uchar *keybuff; MI_KEYDEF *keyinfo; my_off_t key_root; Loading Loading @@ -93,7 +94,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) while(!r) { if (_mi_compare_text(default_charset_info, if (_mi_compare_text(aio->charset, aio->info->lastkey,keylen, aio->keybuff,keylen,0)) break; Loading Loading @@ -184,8 +185,9 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, aio.info=info; aio.keynr=keynr; aio.keybuff=info->lastkey+info->s->base.max_key_length; aio.keyinfo=info->s->keyinfo+keynr; aio.charset=aio.keyinfo->seg->charset; aio.keybuff=info->lastkey+info->s->base.max_key_length; aio.key_root=info->s->state.key_root[keynr]; bzero(&allocated_wtree,sizeof(allocated_wtree)); Loading @@ -193,6 +195,7 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, NULL, NULL); ft_parse_init(&allocated_wtree, aio.charset); if(ft_parse(&allocated_wtree,query,query_len)) goto err; Loading myisam/ft_parser.c +20 −13 Original line number Diff line number Diff line Loading @@ -35,12 +35,10 @@ typedef struct st_ft_docstat { } FT_DOCSTAT; static int FT_WORD_cmp(void* cmp_arg, FT_WORD *w1, FT_WORD *w2) static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) { return _mi_compare_text(default_charset_info, (uchar*) w1->pos, w1->len, (uchar*) w2->pos, w2->len, (my_bool) (cmp_arg != 0)); return _mi_compare_text(cs, (uchar*) w1->pos, w1->len, (uchar*) w2->pos, w2->len, 0); } static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) Loading Loading @@ -135,13 +133,20 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) for (;doc<end;doc++) { if (true_word_char(*doc)) break; if (*doc == FTB_LBR || *doc == FTB_RBR) if (*doc == FTB_RQUOT && param->quot) { param->quot=doc-1; *start=doc+1; return 3; /* FTB_RBR */ } if ((*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) && !param->quot) { /* param->prev=' '; */ *start=doc+1; if (*doc == FTB_LQUOT) param->quot=*start; return (*doc == FTB_RBR)+2; } if (param->prev == ' ') if (param->prev == ' ' && !param->quot) { if (*doc == FTB_YES ) { param->yesno=+1; continue; } else if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else Loading @@ -151,7 +156,8 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) if (*doc == FTB_NEG ) { param->pmsign=!param->pmsign; continue; } } param->prev=*doc; param->yesno=param->plusminus=param->pmsign=0; param->yesno=(param->quot != 0); param->plusminus=param->pmsign=0; } mwc=0; Loading Loading @@ -207,16 +213,17 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word) return 0; } void ft_parse_init(TREE *wtree, CHARSET_INFO *cs) { if (!is_tree_inited(wtree)) init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, cs); } int ft_parse(TREE *wtree, byte *doc, int doclen) { byte *end=doc+doclen; FT_WORD w; if (!is_tree_inited(wtree)) { init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, NULL); } while (ft_simple_get_word(&doc,end,&w)) { if (!tree_insert(wtree, &w, 0)) Loading myisam/ft_update.c +8 −11 Original line number Diff line number Diff line Loading @@ -90,15 +90,12 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) FT_SEG_ITERATOR ftsi; _mi_ft_segiterator_init(info, keynr, record, &ftsi); ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset); while (_mi_ft_segiterator(&ftsi)) if (ftsi.pos) if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len)) return 1; /* Handle the case where all columns are NULL */ if (!is_tree_inited(parsed) && ft_parse(parsed, (byte*) "", 0)) return 1; else return 0; } Loading Loading @@ -153,6 +150,7 @@ static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist, int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) { FT_SEG_ITERATOR ftsi1, ftsi2; CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; _mi_ft_segiterator_init(info, keynr, rec1, &ftsi1); _mi_ft_segiterator_init(info, keynr, rec2, &ftsi2); Loading @@ -160,8 +158,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) { if ((ftsi1.pos != ftsi2.pos) && (!ftsi1.pos || !ftsi2.pos || _mi_compare_text(default_charset_info, (uchar*) ftsi1.pos,ftsi1.len, _mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, (uchar*) ftsi2.pos,ftsi2.len,0))) return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT; } Loading @@ -174,6 +171,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, { int error= -1; FT_WORD *oldlist,*newlist, *old_word, *new_word; CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; uint key_length; int cmp, cmp2; Loading @@ -185,8 +183,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, error=0; while(old_word->pos && new_word->pos) { cmp=_mi_compare_text(default_charset_info, (uchar*) old_word->pos,old_word->len, cmp=_mi_compare_text(cs, (uchar*) old_word->pos,old_word->len, (uchar*) new_word->pos,new_word->len,0); cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); Loading Loading
Docs/manual.texi +11 −4 Original line number Diff line number Diff line Loading @@ -36191,6 +36191,9 @@ others, but will not be excluded altogether, as it would be with the @item * An asterisk is the truncation operator. Unlike the other operators, it should be @strong{appended} to the word, not prepended. @item " The phrase, that is enclosed in double quotes @code{"}, matches only rows that contain this phrase @strong{literally, as it was typed}. @end table And here are some examples: Loading @@ -36199,16 +36202,18 @@ And here are some examples: @item apple banana find rows that contain at least one of these words. @item +apple +juice ... both words ... both words. @item +apple macintosh ... word ``apple'', but rank it higher if it also contain ``macintosh'' ... word ``apple'', but rank it higher if it also contain ``macintosh''. @item +apple -macintosh ... word ``apple'' but not ``macintosh'' ... word ``apple'' but not ``macintosh''. @item +apple +(>pie <strudel) ... ``apple'' and ``pie'', or ``apple'' and ``strudel'' (in any order), but rank ``apple pie'' higher than ``apple strudel''. @item apple* ... ``apple'', ``apples'', ``applesauce'', and ``applet'' ... ``apple'', ``apples'', ``applesauce'', and ``applet''. @item "some words" ... ``some words of wisdom'', but not ``some noise words''. @end table @menu Loading Loading @@ -48928,6 +48933,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item Boolean fulltext search now supports "phrase searches". @item New configure option @code{--without-query-cache}. @item Memory allocation strategy for 'root memory' changed. Block size now grows
myisam/ft_boolean_search.c +70 −31 Original line number Diff line number Diff line Loading @@ -59,6 +59,7 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ typedef struct st_ftb_expr FTB_EXPR; struct st_ftb_expr { FTB_EXPR *up; byte *quot, *qend; float weight; uint flags; my_off_t docid[2]; /* for index search and for scan */ Loading @@ -84,6 +85,7 @@ typedef struct st_ft_info { struct _ft_vft *please; MI_INFO *info; uint keynr; CHARSET_INFO *charset; enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE /*, SCAN*/ } state; uint with_scan; FTB_EXPR *root; Loading @@ -101,10 +103,10 @@ int FTB_WORD_cmp(void *v __attribute__((unused)), FTB_WORD *a, FTB_WORD *b) return i; } int FTB_WORD_cmp_list(void *v __attribute__((unused)), FTB_WORD **a, FTB_WORD **b) int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { /* ORDER BY word DESC, ndepth DESC */ int i=_mi_compare_text(default_charset_info, (*b)->word+1,(*b)->len-1, int i=_mi_compare_text(cs, (*b)->word+1,(*b)->len-1, (*a)->word+1,(*a)->len-1,0); if (!i) i=CMP_NUM((*b)->ndepth,(*a)->ndepth); Loading @@ -125,6 +127,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, return; param.prev=' '; param.quot=up->quot; while ((res=ft_get_word(start,end,&w,¶m))) { int r=param.plusminus; Loading @@ -148,7 +151,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbw->word[0]=w.len; if (param.yesno > 0) up->ythresh++; queue_insert(& ftb->queue, (byte *)ftbw); ftb->with_scan|=param.trunc; ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC); break; case 2: /* left bracket */ ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); Loading @@ -159,10 +162,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbe->up=up; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; if ((ftbe->quot=param.quot)) ftb->with_scan|=2; if (param.yesno > 0) up->ythresh++; _ftb_parse_query(ftb, start, end, ftbe, depth+1); break; case 3: /* right bracket */ if (up->quot) up->qend=param.quot; return; } } Loading Loading @@ -203,7 +208,7 @@ void _ftb_init_index_search(FT_INFO *ftb) SEARCH_FIND | SEARCH_BIGGER, keyroot); if (!r) { r=_mi_compare_text(default_charset_info, r=_mi_compare_text(ftb->charset, info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), Loading Loading @@ -241,6 +246,9 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ftb->state=UNINITIALIZED; ftb->info=info; ftb->keynr=keynr; ftb->charset= ((keynr==NO_SUCH_KEY) ? default_charset_info : info->s->keyinfo[keynr].seg->charset); ftb->with_scan=0; init_alloc_root(&ftb->mem_root, 1024, 1024); Loading @@ -256,26 +264,49 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ftbe->weight=1; ftbe->flags=FTB_FLAG_YES; ftbe->nos=1; ftbe->up=0; ftbe->quot=ftbe->up=0; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; ftb->root=ftbe; _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0); ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root, sizeof(FTB_WORD *)*ftb->queue.elements); memcpy(ftb->list, ftb->queue.root, sizeof(FTB_WORD *)*ftb->queue.elements); memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements); qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *), (qsort2_cmp)FTB_WORD_cmp_list, 0); if (ftb->queue.elements<2) ftb->with_scan=0; (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset); if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC; ftb->state=READY; return ftb; } void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) /* returns 1 if str0 contain str1 */ int _ftb_strstr(const byte *s0, const byte *e0, const byte *s1, const byte *e1, CHARSET_INFO *cs) { const byte *p; while (s0 < e0) { while (s0 < e0 && cs->to_upper[*s0++] != cs->to_upper[*s1]) /* no-op */; if (s0 >= e0) return 0; p=s1+1; while (s0 < e0 && p < e1 && cs->to_upper[*s0++] == cs->to_upper[*p++]) /* no-op */; if (p >= e1) return 1; } return 0; } void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) { FT_SEG_ITERATOR ftsi; FTB_EXPR *ftbe; float weight=ftbw->weight; int yn=ftbw->flags, ythresh; int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0); my_off_t curdoc=ftbw->docid[mode]; for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) Loading @@ -296,6 +327,20 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) { yn=ftbe->flags; weight=ftbe->cur_weight*ftbe->weight; if (mode && ftbe->quot) { int not_found=1; memcpy(&ftsi, ftsi_orig, sizeof(ftsi)); while (_mi_ft_segiterator(&ftsi) && not_found) { if (!ftsi.pos) continue; not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len, ftbe->quot, ftbe->qend, ftb->charset); } if (not_found) break; } /* ftbe->quot */ } else break; Loading Loading @@ -352,14 +397,14 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) { while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) { _ftb_climb_the_tree(ftbw,0); _ftb_climb_the_tree(ftb, ftbw, 0); /* update queue */ r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY, SEARCH_BIGGER , keyroot); if (!r) { r=_mi_compare_text(default_charset_info, r=_mi_compare_text(ftb->charset, info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), Loading Loading @@ -410,7 +455,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) FT_WORD word; FTB_WORD *ftbw; FTB_EXPR *ftbe; FT_SEG_ITERATOR ftsi; FT_SEG_ITERATOR ftsi, ftsi2; const byte *end; my_off_t docid=ftb->info->lastpos; Loading @@ -419,17 +464,11 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) if (!ftb->queue.elements) return 0; #if NOT_USED if (ftb->state == READY || ftb->state == INDEX_DONE) ftb->state=SCAN; else if (ftb->state != SCAN) return -3.0; #endif if (ftb->keynr==NO_SUCH_KEY) _mi_ft_segiterator_dummy_init(record, length, &ftsi); else _mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi); memcpy(&ftsi2, &ftsi, sizeof(ftsi)); while (_mi_ft_segiterator(&ftsi)) { Loading @@ -443,7 +482,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2) { ftbw=(FTB_WORD *)(ftb->list[c]); if (_mi_compare_text(default_charset_info, word.pos,word.len, if (_mi_compare_text(ftb->charset, word.pos,word.len, (uchar*) ftbw->word+1,ftbw->len-1, (ftbw->flags&FTB_FLAG_TRUNC) ) >0) b=c; Loading @@ -453,14 +492,14 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) for (; c>=0; c--) { ftbw=(FTB_WORD *)(ftb->list[c]); if (_mi_compare_text(default_charset_info, word.pos,word.len, if (_mi_compare_text(ftb->charset, word.pos,word.len, (uchar*) ftbw->word+1,ftbw->len-1, (ftbw->flags&FTB_FLAG_TRUNC) )) break; if (ftbw->docid[1] == docid) continue; ftbw->docid[1]=docid; _ftb_climb_the_tree(ftbw,1); _ftb_climb_the_tree(ftb, ftbw, &ftsi2); } } } Loading
myisam/ft_nlq_search.c +5 −2 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ struct st_ft_info { typedef struct st_all_in_one { MI_INFO *info; uint keynr; CHARSET_INFO *charset; uchar *keybuff; MI_KEYDEF *keyinfo; my_off_t key_root; Loading Loading @@ -93,7 +94,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) while(!r) { if (_mi_compare_text(default_charset_info, if (_mi_compare_text(aio->charset, aio->info->lastkey,keylen, aio->keybuff,keylen,0)) break; Loading Loading @@ -184,8 +185,9 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, aio.info=info; aio.keynr=keynr; aio.keybuff=info->lastkey+info->s->base.max_key_length; aio.keyinfo=info->s->keyinfo+keynr; aio.charset=aio.keyinfo->seg->charset; aio.keybuff=info->lastkey+info->s->base.max_key_length; aio.key_root=info->s->state.key_root[keynr]; bzero(&allocated_wtree,sizeof(allocated_wtree)); Loading @@ -193,6 +195,7 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, NULL, NULL); ft_parse_init(&allocated_wtree, aio.charset); if(ft_parse(&allocated_wtree,query,query_len)) goto err; Loading
myisam/ft_parser.c +20 −13 Original line number Diff line number Diff line Loading @@ -35,12 +35,10 @@ typedef struct st_ft_docstat { } FT_DOCSTAT; static int FT_WORD_cmp(void* cmp_arg, FT_WORD *w1, FT_WORD *w2) static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) { return _mi_compare_text(default_charset_info, (uchar*) w1->pos, w1->len, (uchar*) w2->pos, w2->len, (my_bool) (cmp_arg != 0)); return _mi_compare_text(cs, (uchar*) w1->pos, w1->len, (uchar*) w2->pos, w2->len, 0); } static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) Loading Loading @@ -135,13 +133,20 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) for (;doc<end;doc++) { if (true_word_char(*doc)) break; if (*doc == FTB_LBR || *doc == FTB_RBR) if (*doc == FTB_RQUOT && param->quot) { param->quot=doc-1; *start=doc+1; return 3; /* FTB_RBR */ } if ((*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) && !param->quot) { /* param->prev=' '; */ *start=doc+1; if (*doc == FTB_LQUOT) param->quot=*start; return (*doc == FTB_RBR)+2; } if (param->prev == ' ') if (param->prev == ' ' && !param->quot) { if (*doc == FTB_YES ) { param->yesno=+1; continue; } else if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else Loading @@ -151,7 +156,8 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) if (*doc == FTB_NEG ) { param->pmsign=!param->pmsign; continue; } } param->prev=*doc; param->yesno=param->plusminus=param->pmsign=0; param->yesno=(param->quot != 0); param->plusminus=param->pmsign=0; } mwc=0; Loading Loading @@ -207,16 +213,17 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word) return 0; } void ft_parse_init(TREE *wtree, CHARSET_INFO *cs) { if (!is_tree_inited(wtree)) init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, cs); } int ft_parse(TREE *wtree, byte *doc, int doclen) { byte *end=doc+doclen; FT_WORD w; if (!is_tree_inited(wtree)) { init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, NULL); } while (ft_simple_get_word(&doc,end,&w)) { if (!tree_insert(wtree, &w, 0)) Loading
myisam/ft_update.c +8 −11 Original line number Diff line number Diff line Loading @@ -90,15 +90,12 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) FT_SEG_ITERATOR ftsi; _mi_ft_segiterator_init(info, keynr, record, &ftsi); ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset); while (_mi_ft_segiterator(&ftsi)) if (ftsi.pos) if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len)) return 1; /* Handle the case where all columns are NULL */ if (!is_tree_inited(parsed) && ft_parse(parsed, (byte*) "", 0)) return 1; else return 0; } Loading Loading @@ -153,6 +150,7 @@ static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist, int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) { FT_SEG_ITERATOR ftsi1, ftsi2; CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; _mi_ft_segiterator_init(info, keynr, rec1, &ftsi1); _mi_ft_segiterator_init(info, keynr, rec2, &ftsi2); Loading @@ -160,8 +158,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) { if ((ftsi1.pos != ftsi2.pos) && (!ftsi1.pos || !ftsi2.pos || _mi_compare_text(default_charset_info, (uchar*) ftsi1.pos,ftsi1.len, _mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, (uchar*) ftsi2.pos,ftsi2.len,0))) return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT; } Loading @@ -174,6 +171,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, { int error= -1; FT_WORD *oldlist,*newlist, *old_word, *new_word; CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; uint key_length; int cmp, cmp2; Loading @@ -185,8 +183,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, error=0; while(old_word->pos && new_word->pos) { cmp=_mi_compare_text(default_charset_info, (uchar*) old_word->pos,old_word->len, cmp=_mi_compare_text(cs, (uchar*) old_word->pos,old_word->len, (uchar*) new_word->pos,new_word->len,0); cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); Loading