Commit 1f5c9976 authored by serg@serg.mysql.com's avatar serg@serg.mysql.com
Browse files

ft boolean search by table scan; queue_fix()

parent 2050505f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ void delete_queue(QUEUE *queue);
void queue_insert(QUEUE *queue,byte *element);
byte *queue_remove(QUEUE *queue,uint idx);
void _downheap(QUEUE *queue,uint idx);
void queue_fix(QUEUE *queue);
#define is_queue_inited(queue) ((queue)->root != 0)

#ifdef	__cplusplus
+166 −91
Original line number Diff line number Diff line
@@ -16,6 +16,8 @@

/* Written by Sergei A. Golubchik, who has a shared copyright to this code */

/*  TODO: add caching - pre-read several index entries at once */

#define FT_CORE
#include "ftdefs.h"
#include <queues.h>
@@ -78,7 +80,7 @@ typedef struct st_ft_info {
  struct _ft_vft *please;
  MI_INFO  *info;
  uint       keynr;
  int        ok;
  enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE, SCAN } state;
  FTB_EXPR  *root;
  QUEUE      queue;
  MEM_ROOT   mem_root;
@@ -101,13 +103,9 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
  FT_WORD     w;
  FTB_WORD   *ftbw;
  FTB_EXPR   *ftbe;
  MI_INFO    *info=ftb->info;
  int        r;
  MI_KEYDEF  *keyinfo=info->s->keyinfo+ftb->keynr;
  my_off_t    keyroot=info->s->state.key_root[ftb->keynr];
  uint  extra=HA_FT_WLEN+info->s->rec_reflength; /* just a shortcut */
  uint  extra=HA_FT_WLEN+ftb->info->s->rec_reflength; /* just a shortcut */

  if (! ftb->ok)
  if (ftb->state != UNINITIALIZED)
    return;

  param.prev=' ';
@@ -132,7 +130,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
      case 1:
        ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root,
            sizeof(FTB_WORD) + (param.trunc ? MI_MAX_KEY_BUFF : w.len+extra));
        ftbw->len=w.len + !param.trunc;
        ftbw->len=w.len+1;
        ftbw->yesno=param.yesno;
        ftbw->trunc=param.trunc; /* 0 or 1 */
        ftbw->weight=weight;
@@ -142,21 +140,43 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
        memcpy(ftbw->word+1, w.pos, w.len);
        ftbw->word[0]=w.len;
        if (ftbw->yesno > 0) up->ythresh++;
        /*****************************************/
        queue_insert(& ftb->queue, (byte *)ftbw);
        break;
    }
  }
  return;
}

void  _ftb_init_index_search(FT_INFO *ftb)
{
  int i, r;
  FTB_WORD *ftbw;
  MI_INFO    *info=ftb->info;
  MI_KEYDEF  *keyinfo=info->s->keyinfo+ftb->keynr;
  my_off_t    keyroot=info->s->state.key_root[ftb->keynr];

  if (ftb->state != READY)
    return;
  ftb->state=INDEX_SEARCH;

  for (i=ftb->queue.elements; i; i--)
  {
    ftbw=(FTB_WORD *)(ftb->queue.root[i]);

    r=_mi_search(info, keyinfo, ftbw->word, ftbw->len,
                 SEARCH_FIND | SEARCH_PREFIX, keyroot);
    if (!r)
    {
      r=_mi_compare_text(default_charset_info,
                             info->lastkey+ftbw->trunc,ftbw->len,
                             ftbw->word+ftbw->trunc,ftbw->len,0);
                         info->lastkey+ftbw->trunc,ftbw->len-ftbw->trunc,
                         ftbw->word+ftbw->trunc,ftbw->len-ftbw->trunc,0);
    }
    if (r) /* not found */
    {
      if (ftbw->yesno>0 && ftbw->up->up==0)
      { /* this word MUST BE present in every document returned,
           so we can abort the search right now */
            ftb->ok=0;
        ftb->state=INDEX_DONE;
        return;
      }
    }
@@ -164,13 +184,9 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
    {
      memcpy(ftbw->word, info->lastkey, info->lastkey_length);
      ftbw->docid=info->lastpos;
          queue_insert(& ftb->queue, (byte *)ftbw);
        }
        /*****************************************/
        break;
    }
  }
  return;
  queue_fix(& ftb->queue);
}

FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
@@ -183,7 +199,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
  if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
    return 0;
  ftb->please=& _ft_vft_boolean;
  ftb->ok=1;
  ftb->state=UNINITIALIZED;
  ftb->info=info;
  ftb->keynr=keynr;

@@ -202,36 +218,16 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
  ftbe->docid=HA_POS_ERROR;
  ftb->root=ftbe;
  _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0, 0);
  ftb->state=READY;
  return ftb;
}

int ft_boolean_read_next(FT_INFO *ftb, char *record)
{
  FTB_EXPR  *ftbe, *up;
  FTB_WORD  *ftbw;
  MI_INFO   *info=ftb->info;
  MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr;
  my_off_t   keyroot=info->s->state.key_root[ftb->keynr];
  my_off_t   curdoc;
  int        r;

  /* black magic ON */
  if ((int) _mi_check_index(info, ftb->keynr) < 0)
    return my_errno;
  if (_mi_readinfo(info, F_RDLCK, 1))
    return my_errno;
  /* black magic OFF */

  if (!ftb->queue.elements)
    return my_errno=HA_ERR_END_OF_FILE;

  while(ftb->ok &&
    (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid) != HA_POS_ERROR)
  {
    while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid)
void _ftb_climb_the_tree(FTB_WORD *ftbw, my_off_t curdoc)
{
  FTB_EXPR *ftbe;
  float weight=ftbw->weight;
  int  yn=ftbw->yesno;

  for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
  {
    if (ftbe->docid != curdoc)
@@ -277,14 +273,46 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
      }
    }
  }
}

int ft_boolean_read_next(FT_INFO *ftb, char *record)
{
  FTB_EXPR  *ftbe, *up;
  FTB_WORD  *ftbw;
  MI_INFO   *info=ftb->info;
  MI_KEYDEF *keyinfo=info->s->keyinfo+ftb->keynr;
  my_off_t   keyroot=info->s->state.key_root[ftb->keynr];
  my_off_t   curdoc;
  int        r;

  if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
    return -1;
  
  /* black magic ON */
  if ((int) _mi_check_index(info, ftb->keynr) < 0)
    return my_errno;
  if (_mi_readinfo(info, F_RDLCK, 1))
    return my_errno;
  /* black magic OFF */

  if (!ftb->queue.elements)
    return my_errno=HA_ERR_END_OF_FILE;

  while(ftb->state == INDEX_SEARCH &&
    (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid) != HA_POS_ERROR)
  {
    while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid)
    {
      _ftb_climb_the_tree(ftbw, curdoc);

      /* update queue */
      r=_mi_search(info, keyinfo, ftbw->word, USE_WHOLE_KEY, /*ftbw->len,*/
      r=_mi_search(info, keyinfo, ftbw->word, USE_WHOLE_KEY,
                   SEARCH_BIGGER , keyroot);
      if (!r)
      {
        r=_mi_compare_text(default_charset_info,
                           info->lastkey+ftbw->trunc,ftbw->len,
                           ftbw->word+ftbw->trunc,ftbw->len,0);
                           info->lastkey+ftbw->trunc,ftbw->len-ftbw->trunc,
                           ftbw->word+ftbw->trunc,ftbw->len-ftbw->trunc,0);
      }
      if (r) /* not found */
      {
@@ -292,7 +320,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
        if (ftbw->yesno>0 && ftbw->up->up==0)
        { /* this word MUST BE present in every document returned,
             so we can stop the search right now */
          ftb->ok=0;
          ftb->state=INDEX_DONE;
        }
      }
      else
@@ -304,7 +332,8 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
    }

    ftbe=ftb->root;
    if (ftbe->cur_weight>0 && ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
    if (ftbe->docid==curdoc && ftbe->cur_weight>0 &&
        ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
    {
      /* curdoc matched ! */
      info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); /* why is this ? */
@@ -321,10 +350,56 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
  return my_errno=HA_ERR_END_OF_FILE;
}

float ft_boolean_find_relevance(FT_INFO *ftb,
    my_off_t docid __attribute__((unused)), byte *record)
float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid, byte *record)
{
  TREE      ptree;
  FT_WORD   word;
  FTB_WORD *ftbw;
  FTB_EXPR *ftbe;
  uint      i;

  if (ftb->state == READY)
  {
    queue_fix(& ftb->queue);
    ftb->state=SCAN;
  }
  else if (ftb->state != SCAN)
    return -1.0;

  bzero(&ptree, sizeof(ptree));
  if (_mi_ft_parse(& ptree, ftb->info, ftb->keynr, record))
    return -1.0;

  for (i=1; i<=ftb->queue.elements; i++)
  {
  return -1.0; /* to be done via str scan */
    ftbw=(FTB_WORD *)(ftb->queue.root[i]);
    ptree.custom_arg=(void *)(ftbw->trunc);
    word.pos=ftbw->word+1;
    word.len=ftbw->len-1;
    if (tree_search(& ptree, & word))
    { /* found! */
      _ftb_climb_the_tree(ftbw, docid);
    }
    else
    { /* not found! */
      if (ftbw->yesno>0 && ftbw->up->up==0)
      { /* but this word MUST BE present in every document matched,
           so we can stop the search right now */
        break;
      }
    }
  }
  delete_tree(& ptree);
  ftbe=ftb->root;
  if (ftbe->docid==docid && ftbe->cur_weight>0 &&
      ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
  { /* row matched ! */
    return ftbe->cur_weight;
  }
  else
  { /* match failed ! */
    return 0.0;
  }
}

void ft_boolean_close_search(FT_INFO *ftb)
@@ -345,6 +420,6 @@ my_off_t ft_boolean_get_docid(FT_INFO *ftb)

void ft_boolean_reinit_search(FT_INFO *ftb)
{
  fprintf(stderr, "ft_boolean_reinit_search called!\n");
  _ftb_init_index_search(ftb);
}
+11 −23
Original line number Diff line number Diff line
@@ -33,17 +33,16 @@ typedef struct st_ft_docstat {
  double max, nsum, nsum2;
#endif /* EVAL_RUN */

  MI_INFO *info;
  uint keynr;
  byte *keybuf;
//  MI_INFO *info;
//  uint keynr;
//  byte *keybuf;
} FT_DOCSTAT;

static int FT_WORD_cmp(void* cmp_arg __attribute__((unused)),
		       FT_WORD *w1, FT_WORD *w2)
static int FT_WORD_cmp(void* cmp_arg, FT_WORD *w1, FT_WORD *w2)
{
  return _mi_compare_text(default_charset_info,
			  (uchar*) w1->pos,w1->len,
			  (uchar*) w2->pos, w2->len,0);
			  (uchar*) w2->pos, w2->len,(my_bool)cmp_arg);
}

static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
@@ -64,7 +63,9 @@ static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)

/* transforms tree of words into the array, applying normalization */

FT_WORD * ft_linearize(MI_INFO *info, uint keynr, byte *keybuf, TREE *wtree)
FT_WORD * ft_linearize(//MI_INFO *info, uint keynr,
    //byte *keybuf,
    TREE *wtree)
{
  FT_WORD *wlist,*p;
  FT_DOCSTAT docstat;
@@ -73,9 +74,9 @@ FT_WORD * ft_linearize(MI_INFO *info, uint keynr, byte *keybuf, TREE *wtree)
  if ((wlist=(FT_WORD *) my_malloc(sizeof(FT_WORD)*
				   (1+wtree->elements_in_tree),MYF(0))))
  {
    docstat.info=info;
    docstat.keynr=keynr;
    docstat.keybuf=keybuf;
//    docstat.info=info;
//    docstat.keynr=keynr;
//    docstat.keybuf=keybuf;
    docstat.list=wlist;
    docstat.uniq=wtree->elements_in_tree;
#ifdef EVAL_RUN
@@ -207,19 +208,6 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word)
  return 0;
}

int is_boolean(byte *q, uint len)
{
  if (!len) return 0;
  if (*q == FTB_YES || *q == FTB_NO) return 1;

  for (++q; --len; ++q)
  {
    if ((*q == FTB_YES || *q == FTB_NO) && q[-1] == ' ' && true_word_char(q[1]))
      return 1;
  }
  return 0;
}

TREE * ft_parse(TREE *wtree, byte *doc, int doclen)
{
  byte   *end=doc+doclen;
+20 −12
Original line number Diff line number Diff line
@@ -29,17 +29,12 @@


/* parses a document i.e. calls _mi_ft_parse for every keyseg */
FT_WORD * _mi_ft_parserecord(MI_INFO *info, uint keynr, byte *keybuf,
				    const byte *record)
uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record)
{
  TREE *parsed, ptree;
  MI_KEYSEG *keyseg;
  byte *pos;
  uint i;
  MI_KEYSEG *keyseg=info->s->keyinfo[keynr].seg;

  bzero(parsed=&ptree, sizeof(ptree));

  keyseg=info->s->keyinfo[keynr].seg;
  for (i=info->s->keyinfo[keynr].keysegs-FT_SEGS ; i-- ; )
  {
    uint len;
@@ -62,13 +57,26 @@ FT_WORD * _mi_ft_parserecord(MI_INFO *info, uint keynr, byte *keybuf,
    }
    else
      len=keyseg->length;
    if (!(parsed=ft_parse(parsed, pos, len)))
      return NULL;
    if (!(ft_parse(parsed, pos, len)))
      return 1;
  }
  /* Handle the case where all columns are NULL */
  if (!is_tree_inited(parsed) && !(parsed=ft_parse(parsed, (byte*) "", 0)))
  if (!is_tree_inited(parsed) && !(ft_parse(parsed, (byte*) "", 0)))
    return 1;
  else
    return 0;
}

FT_WORD * _mi_ft_parserecord(MI_INFO *info, uint keynr, byte *keybuf,
				    const byte *record)
{
  TREE ptree;

  bzero(&ptree, sizeof(ptree));
  if (_mi_ft_parse(& ptree, info, keynr, record))
    return NULL;
  return ft_linearize(info, keynr, keybuf, parsed);

  return ft_linearize(/*info, keynr, keybuf, */ & ptree);
}

static int _mi_ft_store(MI_INFO *info, uint keynr, byte *keybuf,
+2 −2
Original line number Diff line number Diff line
@@ -120,7 +120,7 @@ byte ft_get_word(byte **, byte *, FT_WORD *, FTB_PARAM *);
byte ft_simple_get_word(byte **, byte *, FT_WORD *);

TREE * ft_parse(TREE *, byte *, int);
FT_WORD * ft_linearize(MI_INFO *, uint, byte *, TREE *);
FT_WORD * ft_linearize(/*MI_INFO *, uint, byte *, */TREE *);
FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *);

const struct _ft_vft _ft_vft_nlq;
Loading