Loading include/ft_global.h +2 −2 Original line number Diff line number Diff line Loading @@ -53,8 +53,8 @@ void ft_free_stopwords(void); FT_DOCLIST * ft_init_search(void *, uint, byte *, uint, my_bool); int ft_read_next(FT_DOCLIST *, char *); #define ft_close_search(handler) my_free(((gptr)(handler)),MYF(0)) #define ft_get_relevance(handler) ((handler)->doc[(handler)->curdoc].weight) #define ft_get_docid(handler) ((handler)->doc[(handler)->curdoc].dpos) #define ft_get_relevance(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].weight) #define ft_get_docid(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].dpos) #define ft_reinit_search(handler) (((FT_DOCLIST *)(handler))->curdoc=-1) #ifdef __cplusplus Loading myisam/ft_boolean_search.c +250 −160 Original line number Diff line number Diff line Loading @@ -17,52 +17,10 @@ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ #include "ftdefs.h" #include <queues.h> /* search with boolean queries */ typedef struct st_all_in_one { MI_INFO *info; uint keynr; uchar *keybuff; MI_KEYDEF *keyinfo; my_off_t key_root; TREE dtree; byte *start, *end; uint total_yes, total_no; } ALL_IN_ONE; typedef struct st_ft_superdoc { FT_DOC doc; //FT_WORD *word_ptr; //double tmp_weight; uint yes; uint no; uint wno; ALL_IN_ONE *aio; } FT_SUPERDOC; static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)), FT_SUPERDOC *p1, FT_SUPERDOC *p2) { if (p1->doc.dpos < p2->doc.dpos) return -1; if (p1->doc.dpos == p2->doc.dpos) return 0; return 1; } static int walk_and_copy(FT_SUPERDOC *from, uint32 count __attribute__((unused)), FT_DOC **to) { if (from->yes == from->aio->total_yes && !from->no) { (*to)->dpos=from->doc.dpos; (*to)->weight=from->doc.weight; (*to)++; } return 0; } static double _wghts[11]={ 0.131687242798354, 0.197530864197531, Loading Loading @@ -91,136 +49,268 @@ static double _nwghts[11]={ -3.796875000000000}; static double *nwghts=_nwghts+5; // nwghts[i] = -0.5*1.5**i int do_boolean(ALL_IN_ONE *aio, uint nested __attribute__((unused)), int yesno __attribute__((unused)), int plusminus, bool pmsign) { int r, res; uint keylen, wno; FT_SUPERDOC sdoc, *sptr; TREE_ELEMENT *selem; FT_WORD w; FTB_PARAM param; #ifdef EVAL_RUN return 1; #endif /* EVAL_RUN */ param.prev=' '; typedef struct st_ftb_expr FTB_EXPR; struct st_ftb_expr { FTB_EXPR *up; float weight; int yesno; my_off_t docid; float cur_weight; int yesses; /* number of "yes" words matched */ int nos; /* number of "no" words matched */ int ythresh; /* number of "yes" words in expr */ }; typedef struct { FTB_EXPR *up; float weight; int yesno; int trunc; my_off_t docid; uint ndepth; int len; /* ... there can be docid cache added here. SerG */ byte word[1]; } FTB_WORD; typedef struct st_ftb_handler { MI_INFO *info; uint keynr; int ok; FTB_EXPR *root; QUEUE queue; MEM_ROOT mem_root; } FTB; for(wno=1; (res=ft_get_word(&aio->start,aio->end,&w,¶m)); wno++) int FTB_WORD_cmp(void *v, byte *a, byte *b) { r=plusminus+param.plusminus; if (param.pmsign^pmsign) w.weight=nwghts[(r>5)?5:((r<-5)?-5:r)]; else w.weight=wghts[(r>5)?5:((r<-5)?-5:r)]; if (param.yesno>0) aio->total_yes++; if (param.yesno<0) aio->total_no++; /* ORDER BY docid, ndepth DESC */ int i=((FTB_WORD *)a)->docid-((FTB_WORD *)b)->docid; if (!i) i=((FTB_WORD *)b)->ndepth-((FTB_WORD *)a)->ndepth; return sgn(i); } void _ftb_parse_query(FTB *ftb, byte **start, byte *end, FTB_EXPR *up, uint ndepth, uint depth) { byte res; FTB_PARAM param; FT_WORD w; FTB_WORD *ftbw; FTB_EXPR *ftbe; MI_INFO *info=ftb->info; int r; MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr; my_off_t keyroot=info->s->state.key_root[ftb->keynr]; uint extra=HA_FT_WLEN+info->s->rec_reflength; /* just a shortcut */ if (! ftb->ok) return; while (res=ftb_get_word(&start,end,&w,¶m)) { byte r=param.plusminus; float weight=(param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; switch (res) { case FTB_LBR: // ( //if (do_boolean(aio,nested+1,my_yesno,plusminus+my_plusminus)) // return 1; // ??? case FTB_LBR: ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); ftbe->yesno=param.yesno; ftbe->weight=weight; ftbe->up=up; ftbe->ythresh=0; ftbe->docid=HA_POS_ERROR; if (ftbw->yesno > 0) up->ythresh++; _ftb_parse_query(ftb, start, end, ftbe, depth+1, (param.yesno<0 ? depth+1 : ndepth)); break; case 1: // word keylen=_ft_make_key(aio->info,aio->keynr,(char*) aio->keybuff,&w,0); keylen-=HA_FT_WLEN; r=_mi_search(aio->info, aio->keyinfo, aio->keybuff, keylen, SEARCH_FIND | SEARCH_PREFIX, aio->key_root); while (!r) case FTB_RBR: return; case 1: ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root, sizeof(FTB_WORD) + (param.trunc ? MI_MAX_KEY_BUFF : w.len+extra)); ftbw->len=w.len + !param.trunc; ftbw->yesno=param.yesno; ftbw->trunc=param.trunc; /* 0 or 1 */ ftbw->weight=weight; ftbw->up=up; ftbw->docid=HA_POS_ERROR; ftbw->ndepth= param.yesno<0 ? depth : ndepth; memcpy(ftbw->word+1, w.pos, w.len); ftbw->word[0]=w.len; if (ftbw->yesno > 0) up->ythresh++; /*****************************************/ r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, SEARCH_FIND | SEARCH_PREFIX, keyroot); if (!r) { if (param.trunc) r=_mi_compare_text(default_charset_info, aio->info->lastkey+1,keylen-1, aio->keybuff+1,keylen-1,0); info->lastkey+ftbw->trunc,ftbw->len, ftbw->word+ftbw->trunc,ftbw->len,0); } if (r) /* not found */ { if (ftbw->yesno>0 && ftbw->up->up==0) { /* this word MUST BE present in every document returned, so we can abort the search right now */ ftb->ok=0; return; } } else r=_mi_compare_text(default_charset_info, aio->info->lastkey,keylen, aio->keybuff,keylen,0); if (r) break; { memcpy(ftbw->word, info->lastkey, info->lastkey_length); ftbw->docid=info->lastpos; queue_insert(& ftb->queue, (byte *)ftbw); } /*****************************************/ break; } } return; } sdoc.doc.dpos=aio->info->lastpos; FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query, uint query_len) { FTB *ftb; FTB_EXPR *ftbe; uint res; /* saving document matched into dtree */ if (!(selem=tree_insert(&aio->dtree, &sdoc, 0))) return 1; if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME)))) return 0; ftb->ok=1; ftb->info=info; ftb->keynr=keynr; init_alloc_root(&ftb->mem_root, query_len,0); /* hack: instead of init_queue, we'll use reinit queue to be able * to alloc queue with alloc_root() */ res=ftb->queue.max_elements=query_len/(ft_min_word_len+1); ftb->queue.root=(byte **)alloc_root(&ftb->mem_root, (res+1)*sizeof(void*)); reinit_queue(& ftb->queue, res, 0, 0, FTB_WORD_cmp, ftb); ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); ftbe->weight=ftbe->yesno=ftbe->nos=1; ftbe->up=0; ftbe->ythresh=0; ftbe->docid=HA_POS_ERROR; ftb->root=ftbe; _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0, 0); return ftb; } sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem); int ft_boolean_search_next(FTB *ftb, char *record) { FTB_EXPR *ftbe, *up; FTB_WORD *ftbw; MI_INFO *info=ftb->info; MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr; my_off_t keyroot=info->s->state.key_root[ftb->keynr]; my_off_t curdoc; int r; /* black magic ON */ if ((int) _mi_check_index(info, ftb->keynr) < 0) return my_errno; if (_mi_readinfo(info, F_RDLCK, 1)) return my_errno; /* black magic OFF */ while(ftb->ok && ftb->queue.elements) { curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid; if (selem->count==1) /* document's first match */ while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid) { sptr->yes=0; sptr->no=0; sptr->doc.weight=0; sptr->aio=aio; sptr->wno=0; float weight=ftbw->weight; uint yn=ftbw->yesno; for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) { if (ftbe->docid != curdoc) { ftbe->cur_weight=ftbe->yesses=ftbe->nos=0; ftbe->docid=curdoc; } if (sptr->wno != wno) if (yn>0) { ftbe->cur_weight+=weight; if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos) { if (param.yesno>0) sptr->yes++; if (param.yesno<0) sptr->no++; sptr->wno=wno; yn=ftbe->yesno; weight=ftbe->cur_weight*ftbe->weight; } sptr->doc.weight+=w.weight; if (_mi_test_if_changed(aio->info) == 0) r=_mi_search_next(aio->info, aio->keyinfo, aio->info->lastkey, aio->info->lastkey_length, SEARCH_BIGGER, aio->key_root); else r=_mi_search(aio->info, aio->keyinfo, aio->info->lastkey, aio->info->lastkey_length, SEARCH_BIGGER, aio->key_root); break; } else if (yn<0) { /* NOTE: special sort function of queue assures that all yn<0 * events for every particular subexpression will happen * BEFORE all yn>=0 events. So no already matched expression * can become not-matched again. */ ++ftbe->nos; break; case FTB_RBR: // ) } else /* if (yn==0) */ { if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos) { yn=ftbe->yesno; weight*=ftbe->weight; } else { ftbe->cur_weight+=weight; break; } } return 0; } FT_DOCLIST *ft_boolean_search(MI_INFO *info, uint keynr, byte *query, uint query_len) /* update queue */ r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, SEARCH_FIND | SEARCH_PREFIX, keyroot); if (!r) { ALL_IN_ONE aio; FT_DOC *dptr; FT_DOCLIST *dlist=NULL; aio.info=info; aio.keynr=keynr; aio.keybuff=aio.info->lastkey+aio.info->s->base.max_key_length; aio.keyinfo=aio.info->s->keyinfo+keynr; aio.key_root=aio.info->s->state.key_root[keynr]; aio.start=query; aio.end=query+query_len; aio.total_yes=aio.total_no=0; init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, NULL, NULL); if (do_boolean(&aio,0,0,0,0)) goto err; dlist=(FT_DOCLIST *)my_malloc(sizeof(FT_DOCLIST)+sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0)); if(!dlist) goto err; dlist->ndocs=aio.dtree.elements_in_tree; dlist->curdoc=-1; dlist->info=aio.info; dptr=dlist->doc; tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, &dptr, left_root_right); r=_mi_compare_text(default_charset_info, info->lastkey+ftbw->trunc,ftbw->len, ftbw->word+ftbw->trunc,ftbw->len,0); } if (r) /* not found */ { queue_remove(& ftb->queue, 0); if (ftbw->yesno>0 && ftbw->up->up==0) { /* this word MUST BE present in every document returned, so we can stop the search right now */ ftb->ok=0; } } else { memcpy(ftbw->word, info->lastkey, info->lastkey_length); ftbw->docid=info->lastpos; queue_replaced(& ftb->queue); } } dlist->ndocs=dptr - dlist->doc; ftbe=ftb->root; if (ftbe->cur_weight>0 && ftbe->yesses>=ftbe->ythresh && !ftbe->nos) { /* curdoc matched ! */ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); /* why is this ? */ err: delete_tree(&aio.dtree); return dlist; /* info->lastpos=curdoc; */ /* do I need this ? */ if (!(*info->read_record)(info,curdoc,record)) { info->update|= HA_STATE_AKTIV; /* Record is read */ return 0; } return my_errno; } } return my_errno=HA_ERR_END_OF_FILE; } myisam/ft_search.c +4 −3 Original line number Diff line number Diff line Loading @@ -38,9 +38,9 @@ FT_DOCLIST *ft_init_search(void *info, uint keynr, byte *query, return NULL; /* black magic OFF */ if (is_boolean(query, query_len)) dlist=ft_boolean_search(info,keynr,query,query_len); else // if (is_boolean(query, query_len)) // dlist=ft_boolean_search(info,keynr,query,query_len); // else dlist=ft_nlq_search(info,keynr,query,query_len); if(dlist && presort) Loading Loading @@ -72,3 +72,4 @@ int ft_read_next(FT_DOCLIST *handler, char *record) } return my_errno; } sql/item_func.cc +5 −9 Original line number Diff line number Diff line Loading @@ -1903,7 +1903,7 @@ longlong Item_func_inet_aton::val_int() return 0; } double Item_func_match::val() double Item_func_match_nl::val() { if (ft_handler==NULL) init_search(1); Loading @@ -1922,7 +1922,7 @@ double Item_func_match::val() /* we'll have to find ft_relevance manually in ft_handler array */ int a,b,c; FT_DOC *docs=ft_handler->doc; FT_DOC *docs=((FT_DOCLIST *)ft_handler)->doc; my_off_t docid=table->file->row_position(); if ((null_value=(docid==HA_OFFSET_ERROR))) Loading @@ -1930,7 +1930,7 @@ double Item_func_match::val() // Assuming docs[] is sorted by dpos... for (a=0, b=ft_handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) for (a=0, b=((FT_DOCLIST *)ft_handler)->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) { if (docs[c].dpos > docid) b=c; Loading @@ -1941,7 +1941,6 @@ double Item_func_match::val() return docs[a].weight; else return 0.0; } void Item_func_match::init_search(bool no_order) Loading Loading @@ -1969,9 +1968,7 @@ void Item_func_match::init_search(bool no_order) tmp2.set("",0); } ft_handler=(FT_DOCLIST *) table->file->ft_init_ext(key, (byte*) ft_tmp->ptr(), ft_tmp->length(), join_key && !no_order); ft_handler_init(ft_tmp->ptr(), ft_tmp->length(), join_key && !no_order); if (join_key) { Loading Loading @@ -2024,7 +2021,6 @@ bool Item_func_match::fix_fields(THD *thd,struct st_table_list *tlist) return 0; } bool Item_func_match::fix_index() { List_iterator_fast<Item> li(fields); Loading sql/item_func.h +21 −11 Original line number Diff line number Diff line Loading @@ -863,30 +863,40 @@ class Item_func_match :public Item_real_func uint key; bool join_key; Item_func_match *master; FT_DOCLIST *ft_handler; void * ft_handler; Item_func_match(List<Item> &a, Item *b): Item_real_func(b), fields(a), table(0), join_key(0), master(0), ft_handler(0) {} ~Item_func_match() { if (!master) if (!master && ft_handler) { if (ft_handler) { ft_close_search(ft_handler); ft_handler_close(); if(join_key) table->file->ft_handler=0; } } } const char *func_name() const { return "match"; } virtual int ft_handler_init(const byte *key, uint keylen, bool presort) { return 1; } virtual int ft_handler_close() { return 1; } enum Functype functype() const { return FT_FUNC; } void update_used_tables() {} bool fix_fields(THD *thd,struct st_table_list *tlist); bool eq(const Item *) const; double val(); longlong val_int() { return val()!=0.0; } bool fix_index(); void init_search(bool no_order); }; class Item_func_match_nl :public Item_func_match { public: Item_func_match_nl(List<Item> &a, Item *b): Item_func_match(a,b) {} const char *func_name() const { return "match_NL"; } double val(); int ft_handler_init(const byte *query, uint querylen, bool presort) { ft_handler=table->file->ft_init_ext(key, query, querylen, presort); } int ft_handler_close() { ft_close_search(ft_handler); ft_handler=0; } }; Loading
include/ft_global.h +2 −2 Original line number Diff line number Diff line Loading @@ -53,8 +53,8 @@ void ft_free_stopwords(void); FT_DOCLIST * ft_init_search(void *, uint, byte *, uint, my_bool); int ft_read_next(FT_DOCLIST *, char *); #define ft_close_search(handler) my_free(((gptr)(handler)),MYF(0)) #define ft_get_relevance(handler) ((handler)->doc[(handler)->curdoc].weight) #define ft_get_docid(handler) ((handler)->doc[(handler)->curdoc].dpos) #define ft_get_relevance(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].weight) #define ft_get_docid(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].dpos) #define ft_reinit_search(handler) (((FT_DOCLIST *)(handler))->curdoc=-1) #ifdef __cplusplus Loading
myisam/ft_boolean_search.c +250 −160 Original line number Diff line number Diff line Loading @@ -17,52 +17,10 @@ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ #include "ftdefs.h" #include <queues.h> /* search with boolean queries */ typedef struct st_all_in_one { MI_INFO *info; uint keynr; uchar *keybuff; MI_KEYDEF *keyinfo; my_off_t key_root; TREE dtree; byte *start, *end; uint total_yes, total_no; } ALL_IN_ONE; typedef struct st_ft_superdoc { FT_DOC doc; //FT_WORD *word_ptr; //double tmp_weight; uint yes; uint no; uint wno; ALL_IN_ONE *aio; } FT_SUPERDOC; static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)), FT_SUPERDOC *p1, FT_SUPERDOC *p2) { if (p1->doc.dpos < p2->doc.dpos) return -1; if (p1->doc.dpos == p2->doc.dpos) return 0; return 1; } static int walk_and_copy(FT_SUPERDOC *from, uint32 count __attribute__((unused)), FT_DOC **to) { if (from->yes == from->aio->total_yes && !from->no) { (*to)->dpos=from->doc.dpos; (*to)->weight=from->doc.weight; (*to)++; } return 0; } static double _wghts[11]={ 0.131687242798354, 0.197530864197531, Loading Loading @@ -91,136 +49,268 @@ static double _nwghts[11]={ -3.796875000000000}; static double *nwghts=_nwghts+5; // nwghts[i] = -0.5*1.5**i int do_boolean(ALL_IN_ONE *aio, uint nested __attribute__((unused)), int yesno __attribute__((unused)), int plusminus, bool pmsign) { int r, res; uint keylen, wno; FT_SUPERDOC sdoc, *sptr; TREE_ELEMENT *selem; FT_WORD w; FTB_PARAM param; #ifdef EVAL_RUN return 1; #endif /* EVAL_RUN */ param.prev=' '; typedef struct st_ftb_expr FTB_EXPR; struct st_ftb_expr { FTB_EXPR *up; float weight; int yesno; my_off_t docid; float cur_weight; int yesses; /* number of "yes" words matched */ int nos; /* number of "no" words matched */ int ythresh; /* number of "yes" words in expr */ }; typedef struct { FTB_EXPR *up; float weight; int yesno; int trunc; my_off_t docid; uint ndepth; int len; /* ... there can be docid cache added here. SerG */ byte word[1]; } FTB_WORD; typedef struct st_ftb_handler { MI_INFO *info; uint keynr; int ok; FTB_EXPR *root; QUEUE queue; MEM_ROOT mem_root; } FTB; for(wno=1; (res=ft_get_word(&aio->start,aio->end,&w,¶m)); wno++) int FTB_WORD_cmp(void *v, byte *a, byte *b) { r=plusminus+param.plusminus; if (param.pmsign^pmsign) w.weight=nwghts[(r>5)?5:((r<-5)?-5:r)]; else w.weight=wghts[(r>5)?5:((r<-5)?-5:r)]; if (param.yesno>0) aio->total_yes++; if (param.yesno<0) aio->total_no++; /* ORDER BY docid, ndepth DESC */ int i=((FTB_WORD *)a)->docid-((FTB_WORD *)b)->docid; if (!i) i=((FTB_WORD *)b)->ndepth-((FTB_WORD *)a)->ndepth; return sgn(i); } void _ftb_parse_query(FTB *ftb, byte **start, byte *end, FTB_EXPR *up, uint ndepth, uint depth) { byte res; FTB_PARAM param; FT_WORD w; FTB_WORD *ftbw; FTB_EXPR *ftbe; MI_INFO *info=ftb->info; int r; MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr; my_off_t keyroot=info->s->state.key_root[ftb->keynr]; uint extra=HA_FT_WLEN+info->s->rec_reflength; /* just a shortcut */ if (! ftb->ok) return; while (res=ftb_get_word(&start,end,&w,¶m)) { byte r=param.plusminus; float weight=(param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; switch (res) { case FTB_LBR: // ( //if (do_boolean(aio,nested+1,my_yesno,plusminus+my_plusminus)) // return 1; // ??? case FTB_LBR: ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); ftbe->yesno=param.yesno; ftbe->weight=weight; ftbe->up=up; ftbe->ythresh=0; ftbe->docid=HA_POS_ERROR; if (ftbw->yesno > 0) up->ythresh++; _ftb_parse_query(ftb, start, end, ftbe, depth+1, (param.yesno<0 ? depth+1 : ndepth)); break; case 1: // word keylen=_ft_make_key(aio->info,aio->keynr,(char*) aio->keybuff,&w,0); keylen-=HA_FT_WLEN; r=_mi_search(aio->info, aio->keyinfo, aio->keybuff, keylen, SEARCH_FIND | SEARCH_PREFIX, aio->key_root); while (!r) case FTB_RBR: return; case 1: ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root, sizeof(FTB_WORD) + (param.trunc ? MI_MAX_KEY_BUFF : w.len+extra)); ftbw->len=w.len + !param.trunc; ftbw->yesno=param.yesno; ftbw->trunc=param.trunc; /* 0 or 1 */ ftbw->weight=weight; ftbw->up=up; ftbw->docid=HA_POS_ERROR; ftbw->ndepth= param.yesno<0 ? depth : ndepth; memcpy(ftbw->word+1, w.pos, w.len); ftbw->word[0]=w.len; if (ftbw->yesno > 0) up->ythresh++; /*****************************************/ r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, SEARCH_FIND | SEARCH_PREFIX, keyroot); if (!r) { if (param.trunc) r=_mi_compare_text(default_charset_info, aio->info->lastkey+1,keylen-1, aio->keybuff+1,keylen-1,0); info->lastkey+ftbw->trunc,ftbw->len, ftbw->word+ftbw->trunc,ftbw->len,0); } if (r) /* not found */ { if (ftbw->yesno>0 && ftbw->up->up==0) { /* this word MUST BE present in every document returned, so we can abort the search right now */ ftb->ok=0; return; } } else r=_mi_compare_text(default_charset_info, aio->info->lastkey,keylen, aio->keybuff,keylen,0); if (r) break; { memcpy(ftbw->word, info->lastkey, info->lastkey_length); ftbw->docid=info->lastpos; queue_insert(& ftb->queue, (byte *)ftbw); } /*****************************************/ break; } } return; } sdoc.doc.dpos=aio->info->lastpos; FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query, uint query_len) { FTB *ftb; FTB_EXPR *ftbe; uint res; /* saving document matched into dtree */ if (!(selem=tree_insert(&aio->dtree, &sdoc, 0))) return 1; if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME)))) return 0; ftb->ok=1; ftb->info=info; ftb->keynr=keynr; init_alloc_root(&ftb->mem_root, query_len,0); /* hack: instead of init_queue, we'll use reinit queue to be able * to alloc queue with alloc_root() */ res=ftb->queue.max_elements=query_len/(ft_min_word_len+1); ftb->queue.root=(byte **)alloc_root(&ftb->mem_root, (res+1)*sizeof(void*)); reinit_queue(& ftb->queue, res, 0, 0, FTB_WORD_cmp, ftb); ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); ftbe->weight=ftbe->yesno=ftbe->nos=1; ftbe->up=0; ftbe->ythresh=0; ftbe->docid=HA_POS_ERROR; ftb->root=ftbe; _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0, 0); return ftb; } sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem); int ft_boolean_search_next(FTB *ftb, char *record) { FTB_EXPR *ftbe, *up; FTB_WORD *ftbw; MI_INFO *info=ftb->info; MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr; my_off_t keyroot=info->s->state.key_root[ftb->keynr]; my_off_t curdoc; int r; /* black magic ON */ if ((int) _mi_check_index(info, ftb->keynr) < 0) return my_errno; if (_mi_readinfo(info, F_RDLCK, 1)) return my_errno; /* black magic OFF */ while(ftb->ok && ftb->queue.elements) { curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid; if (selem->count==1) /* document's first match */ while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid) { sptr->yes=0; sptr->no=0; sptr->doc.weight=0; sptr->aio=aio; sptr->wno=0; float weight=ftbw->weight; uint yn=ftbw->yesno; for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) { if (ftbe->docid != curdoc) { ftbe->cur_weight=ftbe->yesses=ftbe->nos=0; ftbe->docid=curdoc; } if (sptr->wno != wno) if (yn>0) { ftbe->cur_weight+=weight; if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos) { if (param.yesno>0) sptr->yes++; if (param.yesno<0) sptr->no++; sptr->wno=wno; yn=ftbe->yesno; weight=ftbe->cur_weight*ftbe->weight; } sptr->doc.weight+=w.weight; if (_mi_test_if_changed(aio->info) == 0) r=_mi_search_next(aio->info, aio->keyinfo, aio->info->lastkey, aio->info->lastkey_length, SEARCH_BIGGER, aio->key_root); else r=_mi_search(aio->info, aio->keyinfo, aio->info->lastkey, aio->info->lastkey_length, SEARCH_BIGGER, aio->key_root); break; } else if (yn<0) { /* NOTE: special sort function of queue assures that all yn<0 * events for every particular subexpression will happen * BEFORE all yn>=0 events. So no already matched expression * can become not-matched again. */ ++ftbe->nos; break; case FTB_RBR: // ) } else /* if (yn==0) */ { if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos) { yn=ftbe->yesno; weight*=ftbe->weight; } else { ftbe->cur_weight+=weight; break; } } return 0; } FT_DOCLIST *ft_boolean_search(MI_INFO *info, uint keynr, byte *query, uint query_len) /* update queue */ r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, SEARCH_FIND | SEARCH_PREFIX, keyroot); if (!r) { ALL_IN_ONE aio; FT_DOC *dptr; FT_DOCLIST *dlist=NULL; aio.info=info; aio.keynr=keynr; aio.keybuff=aio.info->lastkey+aio.info->s->base.max_key_length; aio.keyinfo=aio.info->s->keyinfo+keynr; aio.key_root=aio.info->s->state.key_root[keynr]; aio.start=query; aio.end=query+query_len; aio.total_yes=aio.total_no=0; init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, NULL, NULL); if (do_boolean(&aio,0,0,0,0)) goto err; dlist=(FT_DOCLIST *)my_malloc(sizeof(FT_DOCLIST)+sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0)); if(!dlist) goto err; dlist->ndocs=aio.dtree.elements_in_tree; dlist->curdoc=-1; dlist->info=aio.info; dptr=dlist->doc; tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, &dptr, left_root_right); r=_mi_compare_text(default_charset_info, info->lastkey+ftbw->trunc,ftbw->len, ftbw->word+ftbw->trunc,ftbw->len,0); } if (r) /* not found */ { queue_remove(& ftb->queue, 0); if (ftbw->yesno>0 && ftbw->up->up==0) { /* this word MUST BE present in every document returned, so we can stop the search right now */ ftb->ok=0; } } else { memcpy(ftbw->word, info->lastkey, info->lastkey_length); ftbw->docid=info->lastpos; queue_replaced(& ftb->queue); } } dlist->ndocs=dptr - dlist->doc; ftbe=ftb->root; if (ftbe->cur_weight>0 && ftbe->yesses>=ftbe->ythresh && !ftbe->nos) { /* curdoc matched ! */ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); /* why is this ? */ err: delete_tree(&aio.dtree); return dlist; /* info->lastpos=curdoc; */ /* do I need this ? */ if (!(*info->read_record)(info,curdoc,record)) { info->update|= HA_STATE_AKTIV; /* Record is read */ return 0; } return my_errno; } } return my_errno=HA_ERR_END_OF_FILE; }
myisam/ft_search.c +4 −3 Original line number Diff line number Diff line Loading @@ -38,9 +38,9 @@ FT_DOCLIST *ft_init_search(void *info, uint keynr, byte *query, return NULL; /* black magic OFF */ if (is_boolean(query, query_len)) dlist=ft_boolean_search(info,keynr,query,query_len); else // if (is_boolean(query, query_len)) // dlist=ft_boolean_search(info,keynr,query,query_len); // else dlist=ft_nlq_search(info,keynr,query,query_len); if(dlist && presort) Loading Loading @@ -72,3 +72,4 @@ int ft_read_next(FT_DOCLIST *handler, char *record) } return my_errno; }
sql/item_func.cc +5 −9 Original line number Diff line number Diff line Loading @@ -1903,7 +1903,7 @@ longlong Item_func_inet_aton::val_int() return 0; } double Item_func_match::val() double Item_func_match_nl::val() { if (ft_handler==NULL) init_search(1); Loading @@ -1922,7 +1922,7 @@ double Item_func_match::val() /* we'll have to find ft_relevance manually in ft_handler array */ int a,b,c; FT_DOC *docs=ft_handler->doc; FT_DOC *docs=((FT_DOCLIST *)ft_handler)->doc; my_off_t docid=table->file->row_position(); if ((null_value=(docid==HA_OFFSET_ERROR))) Loading @@ -1930,7 +1930,7 @@ double Item_func_match::val() // Assuming docs[] is sorted by dpos... for (a=0, b=ft_handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) for (a=0, b=((FT_DOCLIST *)ft_handler)->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) { if (docs[c].dpos > docid) b=c; Loading @@ -1941,7 +1941,6 @@ double Item_func_match::val() return docs[a].weight; else return 0.0; } void Item_func_match::init_search(bool no_order) Loading Loading @@ -1969,9 +1968,7 @@ void Item_func_match::init_search(bool no_order) tmp2.set("",0); } ft_handler=(FT_DOCLIST *) table->file->ft_init_ext(key, (byte*) ft_tmp->ptr(), ft_tmp->length(), join_key && !no_order); ft_handler_init(ft_tmp->ptr(), ft_tmp->length(), join_key && !no_order); if (join_key) { Loading Loading @@ -2024,7 +2021,6 @@ bool Item_func_match::fix_fields(THD *thd,struct st_table_list *tlist) return 0; } bool Item_func_match::fix_index() { List_iterator_fast<Item> li(fields); Loading
sql/item_func.h +21 −11 Original line number Diff line number Diff line Loading @@ -863,30 +863,40 @@ class Item_func_match :public Item_real_func uint key; bool join_key; Item_func_match *master; FT_DOCLIST *ft_handler; void * ft_handler; Item_func_match(List<Item> &a, Item *b): Item_real_func(b), fields(a), table(0), join_key(0), master(0), ft_handler(0) {} ~Item_func_match() { if (!master) if (!master && ft_handler) { if (ft_handler) { ft_close_search(ft_handler); ft_handler_close(); if(join_key) table->file->ft_handler=0; } } } const char *func_name() const { return "match"; } virtual int ft_handler_init(const byte *key, uint keylen, bool presort) { return 1; } virtual int ft_handler_close() { return 1; } enum Functype functype() const { return FT_FUNC; } void update_used_tables() {} bool fix_fields(THD *thd,struct st_table_list *tlist); bool eq(const Item *) const; double val(); longlong val_int() { return val()!=0.0; } bool fix_index(); void init_search(bool no_order); }; class Item_func_match_nl :public Item_func_match { public: Item_func_match_nl(List<Item> &a, Item *b): Item_func_match(a,b) {} const char *func_name() const { return "match_NL"; } double val(); int ft_handler_init(const byte *query, uint querylen, bool presort) { ft_handler=table->file->ft_init_ext(key, query, querylen, presort); } int ft_handler_close() { ft_close_search(ft_handler); ft_handler=0; } };