Commit 9c6effa6 authored by serg@serg.mysql.com's avatar serg@serg.mysql.com
Browse files

ft_boolean_search.c bugfix "+(aaa bbb) +ccc"

ft_dump.c	-l option - report word length distribution (for ft_max_word_len_for_sort)
parent d1e97195
Loading
Loading
Loading
Loading
+6 −9
Original line number Diff line number Diff line
@@ -239,10 +239,12 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, my_off_t curdoc)
      ftbe->cur_weight=ftbe->yesses=ftbe->nos=0;
      ftbe->docid=curdoc;
    }
    if (ftbe->nos)
      break;
    if (yn>0)
    {
      ftbe->cur_weight+=weight;
      if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos)
      if (++ftbe->yesses == ftbe->ythresh)
      {
        yn=ftbe->yesno;
        weight=ftbe->cur_weight*ftbe->weight;
@@ -265,15 +267,10 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, my_off_t curdoc)
 /* if (yn==0) */
    {
      ftbe->cur_weight+=weight;
      if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos)
      {
        yn=ftbe->yesno;
        weight*=ftbe->weight;
      }
      else
      {
      if (ftbe->yesses < ftbe->ythresh)
        break;
      }
      yn= (ftbe->yesses++ == ftbe->ythresh) * ftbe->yesno;
      weight*=ftbe->weight;
    }
  }
}
+35 −9
Original line number Diff line number Diff line
@@ -23,16 +23,17 @@ static void get_options(int argc,char *argv[]);
static void usage(char *argv[]);
static void complain(int val);

static int count=0, stats=0, dump=0, verbose=0;
static int count=0, stats=0, dump=0, verbose=0, lstats=0;
static char *query=NULL;
static uint lengths[256];

#define MAX (HA_FT_MAXLEN+10)
#define HOW_OFTEN_TO_WRITE 1000
#define HOW_OFTEN_TO_WRITE 10000

int main(int argc,char *argv[])
{
  int error=0;
  uint keylen, inx, doc_cnt=0;
  uint keylen, keylen2, inx, doc_cnt=0;
  float weight;
  double gws, min_gws=0, avg_gws=0;
  MI_INFO *info;
@@ -44,7 +45,7 @@ int main(int argc,char *argv[])
  get_options(argc,argv);
  if (count || dump)
    verbose=0;
  else
  if (!count && !dump && !lstats && !query)
    stats=1;

  if (verbose)
@@ -107,6 +108,7 @@ int main(int argc,char *argv[])
      snprintf(buf,MAX,"%.*s",(int) keylen,info->lastkey+1);
      casedn_str(buf);
      total++;
      lengths[keylen]++;

      if (count || stats)
      {
@@ -119,9 +121,9 @@ int main(int argc,char *argv[])
            avg_gws+=gws=GWS_IN_USE;
            if (count)
              printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
            if (maxlen<keylen)
            if (maxlen<keylen2)
            {
              maxlen=keylen;
              maxlen=keylen2;
              strcpy(buf_maxlen, buf2);
            }
            if (max_doc_cnt < doc_cnt)
@@ -132,6 +134,7 @@ int main(int argc,char *argv[])
            }
          }
          strcpy(buf2, buf);
          keylen2=keylen;
          doc_cnt=0;
        }
      }
@@ -143,12 +146,33 @@ int main(int argc,char *argv[])
    }

    if (stats)
    {
      count=0;
      for (inx=0;inx<256;inx++)
      {
        count+=lengths[inx];
        if (count >= total/2)
          break;
      }
      printf("Total rows: %qu\nTotal words: %lu\n"
             "Unique words: %lu\nLongest word: %lu chars (%s)\n"
             "Median length: %u\n"
             "Average global weight: %f\n"
             "Most common word: %lu times, weight: %f (%s)\n",
             (ulonglong)info->state->records, total, uniq, maxlen, buf_maxlen,
             avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
             inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
    }
    if (lstats)
    {
      count=0;
      for (inx=0; inx<256; inx++)
      {
        count+=lengths[inx];
        if (count && lengths[inx])
          printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
              lengths[inx],100.0*lengths[inx]/total,count, 100.0*count/total);
      }
    }
  }

err:
@@ -159,7 +183,7 @@ int main(int argc,char *argv[])
  return 0;
}

const char *options="dscvh";
const char *options="dslcvh";

static void get_options(int argc, char *argv[])
{
@@ -172,6 +196,7 @@ static void get_options(int argc, char *argv[])
    case 's': stats=1; complain(query!=0); break;
    case 'v': verbose=1; break;
    case 'c': count=1; complain(dump || query); break;
    case 'l': lstats=1; complain(query!=0); break;
    case 'e': query=my_strdup(optarg,MYF(MY_FAE)); complain(dump || count || stats); break;
    case '?':
    case 'h':
@@ -189,6 +214,7 @@ Use: %s [-%s] <table_name> <index_no>

-d      dump index (incl. data offsets and word weights)
-s      report global stats
-l      report length distribution
-c      calculate per-word stats (counts and global weights)
-v      be verbose
-h      this text\n