Loading Docs/manual.texi +385 −281 Original line number Diff line number Diff line Loading @@ -13435,6 +13435,13 @@ mysql> CREATE TABLE test ( For @code{BLOB} and @code{TEXT} columns, you must index a prefix of the column, you cannot index the entire thing. In @strong{MySQL} 3.23.23 or later, you can also create special @strong{fulltext} indexes. They are used for full-text search. Only @code{MyISAM} table type supports fulltext indexes. They can be created only from @code{VARCHAR}, @code{BLOB}, and @code{TEXT} columns. Indexing always happens over the entire column, partial indexing is not supported. See @ref{MySQL full-text search} for details of operation. @node Multiple-column indexes, Other-vendor column types, Indexes, Column types @subsection Multiple-column indexes Loading Loading @@ -14150,6 +14157,17 @@ mysql> select STRCMP('text2', 'text'); mysql> select STRCMP('text', 'text'); -> 0 @end example @findex MATCH ... AGAINST() @item MATCH (col1,col2,...) AGAINST (expr) @code{MATCH ... AGAINST()} is used for full-text search and returns relevance - similarity measure between the text in columns @code{(col1,col2,...)} and the query @code{expr}. Relevance is a positive floating point number. Zero relevance means no similarity. For @code{MATCH ... AGAINST()} to work, a @strong{fulltext index} must be created first. @xref{CREATE TABLE, , @code{CREATE TABLE}}. @code{MATCH ... AGAINST()} is available in @code{MySQL} 3.23.23 or later. For details and usage examples see @xref{MySQL full-text search}. @end table @findex Casts Loading Loading @@ -16159,6 +16177,7 @@ create_definition: or KEY [index_name] (index_col_name,...) or INDEX [index_name] (index_col_name,...) or UNIQUE [INDEX] [index_name] (index_col_name,...) or FULLTEXT [INDEX] [index_name] (index_col_name,...) or [CONSTRAINT symbol] FOREIGN KEY index_name (index_col_name,...) [reference_definition] or CHECK (expr) Loading Loading @@ -16401,6 +16420,14 @@ When you use @code{ORDER BY} or @code{GROUP BY} with a @code{TEXT} or @code{BLOB} column, only the first @code{max_sort_length} bytes are used. @xref{BLOB, , @code{BLOB}}. @item In @strong{MySQL} 3.23.23 or later, you can also create special @strong{fulltext} indexes. They are used for full-text search. Only @code{MyISAM} table type supports fulltext indexes. They can be created only from @code{VARCHAR}, @code{BLOB}, and @code{TEXT} columns. Indexing always happens over the entire column, partial indexing is not supported. See @ref{MySQL full-text search} for details of operation. @item The @code{FOREIGN KEY}, @code{CHECK} and @code{REFERENCES} clauses don't actually do anything. The syntax for them is provided only for compatibility, Loading Loading @@ -16570,6 +16597,7 @@ alter_specification: or ADD INDEX [index_name] (index_col_name,...) or ADD PRIMARY KEY (index_col_name,...) or ADD UNIQUE [index_name] (index_col_name,...) or ADD FULLTEXT [index_name] (index_col_name,...) or ALTER [COLUMN] col_name @{SET DEFAULT literal | DROP DEFAULT@} or CHANGE [COLUMN] old_col_name create_definition or MODIFY [COLUMN] create_definition Loading Loading @@ -19741,7 +19769,7 @@ dropped only with explicit @code{REVOKE} commands or by manipulating the @section @code{CREATE INDEX} syntax @example CREATE [UNIQUE] INDEX index_name ON tbl_name (col_name[(length)],... ) CREATE [UNIQUE|FULLTEXT] INDEX index_name ON tbl_name (col_name[(length)],... ) @end example The @code{CREATE INDEX} statement doesn't do anything in @strong{MySQL} prior Loading Loading @@ -19775,13 +19803,17 @@ which could save a lot of disk space and might also speed up @code{INSERT} operations! Note that you can only add an index on a column that can have @code{NULL} values or on a @code{BLOB}/@code{TEXT} column if you are useing values or on a @code{BLOB}/@code{TEXT} column if you are using @strong{MySQL} version 3.23.2 or newer and are using the @code{MyISAM} table type. For more information about how @strong{MySQL} uses indexes, see @ref{MySQL indexes, , @strong{MySQL} indexes}. Fulltext indexes can index only @code{VARCHAR}, @code{BLOB}, and @code{TEXT} columns, and only in @code{MyISAM} tables. Fulltext indexes are available from @strong{MySQL} 3.23.23. @ref{MySQL full-text search}. @findex DROP INDEX @node DROP INDEX, Comments, CREATE INDEX, Reference @section @code{DROP INDEX} syntax Loading Loading @@ -34010,9 +34042,10 @@ working on the @strong{MySQL} code. @menu * MySQL threads:: MySQL threads * MySQL full-text search:: MySQL full-text search @end menu @node MySQL threads, , MySQL internals, MySQL internals @node MySQL threads, MySQL full-text search, MySQL internals, MySQL internals @section MySQL threads The @strong{MySQL} server creates the the following threads: Loading Loading @@ -34051,6 +34084,77 @@ started to read and apply updates from the master. @code{mysqladmin processlist} only shows the connection and @code{INSERT DELAYED} threads. @node MySQL full-text search, , MySQL threads, MySQL internals @section MySQL full-text search Since version 3.23.23, @strong{MySQL} has support for full-text indexing and searching. Full-text index in @strong{MySQL} is an index of type @code{FULLTEXT}. Fulltext indexes can be created from @code{VARCHAR}, @code{TEXT}, and @code{BLOB} columns at @code{CREATE TABLE} time or added later with @code{ALTER TABLE} or @code{CREATE INDEX}. Full-text search is performed with @code{MATCH} function. @example mysql> CREATE TABLE t (a VARCHAR(200), b TEXT, FULLTEXT (a,b)); Query OK, 0 rows affected (0.00 sec) mysql> INSERT INTO t VALUES -> ('MySQL has now support', 'for full-text search'), -> ('Full-text indexes', 'are called collections'), -> ('Only MyISAM tables','support collections'), -> ('Function MATCH ... AGAINST()','is used to do a search'), -> ('Full-text search in MySQL', 'implements vector space model'); Query OK, 5 rows affected (0.00 sec) Records: 5 Duplicates: 0 Warnings: 0 mysql> SELECT * FROM t WHERE MATCH (a,b) AGAINST ('MySQL'); +---------------------------+-------------------------------+ | a | b | +---------------------------+-------------------------------+ | MySQL has now support | for full-text search | | Full-text search in MySQL | implements vector-space-model | +---------------------------+-------------------------------+ 2 rows in set (0.00 sec) mysql> SELECT *,MATCH a,b AGAINST ('collections support') as x FROM t; +------------------------------+-------------------------------+--------+ | a | b | x | +------------------------------+-------------------------------+--------+ | MySQL has now support | for full-text search | 0.3834 | | Full-text indexes | are called collections | 0.3834 | | Only MyISAM tables | support collections | 0.7668 | | Function MATCH ... AGAINST() | is used to do a search | 0 | | Full-text search in MySQL | implements vector space model | 0 | +------------------------------+-------------------------------+--------+ 5 rows in set (0.00 sec) @end example Function @code{MATCH} matches a natural language query @code{AGAINST} a text collection (which is simply the columns that are covered by fulltext index). For every row in a table it returns relevance - similarity measure between the text in that row (in the columns, that are part of the collection) and the query. When it used in a @code{WHERE} clause (see example above) the rows returned are automatically sorted with relevance decreasing. Relevance is a non- negative floating point number. Zero relevance means no similarity. Relevance is computed based on number of words in the row and number of unique words in that row, total number of words in the collection, number of documents (rows), that contain a particular word, etc. MySQL uses very simple parser to split text into words. "Word" is any sequence of letters, numbers, @code{'}, and @code{_}. Any "word" that is present in stopword list or just too short (3 characters or less) is ignored. Every correct word in the collection and in the query is weighted, according to their significance in the query or collection. This way, a word that is present in many documents will have lower weight (and may even have a zero weight), because it has lower semantic value in this particular collection. Otherwise, if the word is rare, it will receive a higher weight. Weights of the words are then combined to compute the relevance. Such a technique works best with big collections (in fact, it was carefully tuned up this way). For very small tables word distribution does not reflect adequately their semantical value, and this model may sometimes produce bizarre results. @page @node Environment variables, Users, MySQL internals, Top include/Makefile.am +1 −0 Original line number Diff line number Diff line Loading @@ -45,6 +45,7 @@ my_config.h: ../config.h # This should be changed in the source and removed. my_global.h: global.h -$(RM) my_global.h $(CP) global.h my_global.h # These files should not be included in distributions since they are Loading include/ft_global.h +2 −0 Original line number Diff line number Diff line Loading @@ -40,6 +40,8 @@ typedef struct st_ft_doclist { FT_DOC doc[1]; } FT_DOCLIST; extern const char *ft_precompiled_stopwords[]; int ft_init_stopwords(const char **); FT_DOCLIST * ft_init_search(void *, uint, byte *, uint, my_bool); Loading myisam/ft_search.c +6 −1 Original line number Diff line number Diff line Loading @@ -158,6 +158,7 @@ FT_DOCLIST * ft_init_search(void *info, uint keynr, byte *key, ALL_IN_ONE aio; FT_DOCLIST *dlist; FT_DOC *dptr; my_off_t saved_lastpos; /* black magic ON */ if ((int) (keynr = _mi_check_index((MI_INFO *)info,keynr)) < 0) Loading @@ -173,6 +174,8 @@ FT_DOCLIST * ft_init_search(void *info, uint keynr, byte *key, aio.keyinfo=aio.info->s->keyinfo+keynr; aio.key_root=aio.info->s->state.key_root[keynr]; saved_lastpos=aio.info->lastpos; if(!(wtree=ft_parse(NULL,key,key_len))) return NULL; init_tree(&aio.dtree,0,sizeof(FT_SUPERDOC),(qsort_cmp)&FT_SUPERDOC_cmp,0, Loading @@ -199,6 +202,7 @@ FT_DOCLIST * ft_init_search(void *info, uint keynr, byte *key, } err: aio.info->lastpos=saved_lastpos; delete_tree(&aio.dtree); delete_tree(wtree); free(wtree); Loading @@ -217,7 +221,8 @@ int ft_read_next(FT_DOCLIST *handler, char *record) info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); if (!(*info->read_record)(info,handler->doc[handler->curdoc].dpos,record)) info->lastpos=handler->doc[handler->curdoc].dpos; if (!(*info->read_record)(info,info->lastpos,record)) { info->update|= HA_STATE_AKTIV; /* Record is read */ return 0; Loading myisam/fulltext.h +3 −5 Original line number Diff line number Diff line Loading @@ -35,8 +35,6 @@ extern const MI_KEYSEG ft_keysegs[FT_SEGS]; extern const char *ft_precompiled_stopwords[]; int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *); int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t); int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t); Loading
Docs/manual.texi +385 −281 Original line number Diff line number Diff line Loading @@ -13435,6 +13435,13 @@ mysql> CREATE TABLE test ( For @code{BLOB} and @code{TEXT} columns, you must index a prefix of the column, you cannot index the entire thing. In @strong{MySQL} 3.23.23 or later, you can also create special @strong{fulltext} indexes. They are used for full-text search. Only @code{MyISAM} table type supports fulltext indexes. They can be created only from @code{VARCHAR}, @code{BLOB}, and @code{TEXT} columns. Indexing always happens over the entire column, partial indexing is not supported. See @ref{MySQL full-text search} for details of operation. @node Multiple-column indexes, Other-vendor column types, Indexes, Column types @subsection Multiple-column indexes Loading Loading @@ -14150,6 +14157,17 @@ mysql> select STRCMP('text2', 'text'); mysql> select STRCMP('text', 'text'); -> 0 @end example @findex MATCH ... AGAINST() @item MATCH (col1,col2,...) AGAINST (expr) @code{MATCH ... AGAINST()} is used for full-text search and returns relevance - similarity measure between the text in columns @code{(col1,col2,...)} and the query @code{expr}. Relevance is a positive floating point number. Zero relevance means no similarity. For @code{MATCH ... AGAINST()} to work, a @strong{fulltext index} must be created first. @xref{CREATE TABLE, , @code{CREATE TABLE}}. @code{MATCH ... AGAINST()} is available in @code{MySQL} 3.23.23 or later. For details and usage examples see @xref{MySQL full-text search}. @end table @findex Casts Loading Loading @@ -16159,6 +16177,7 @@ create_definition: or KEY [index_name] (index_col_name,...) or INDEX [index_name] (index_col_name,...) or UNIQUE [INDEX] [index_name] (index_col_name,...) or FULLTEXT [INDEX] [index_name] (index_col_name,...) or [CONSTRAINT symbol] FOREIGN KEY index_name (index_col_name,...) [reference_definition] or CHECK (expr) Loading Loading @@ -16401,6 +16420,14 @@ When you use @code{ORDER BY} or @code{GROUP BY} with a @code{TEXT} or @code{BLOB} column, only the first @code{max_sort_length} bytes are used. @xref{BLOB, , @code{BLOB}}. @item In @strong{MySQL} 3.23.23 or later, you can also create special @strong{fulltext} indexes. They are used for full-text search. Only @code{MyISAM} table type supports fulltext indexes. They can be created only from @code{VARCHAR}, @code{BLOB}, and @code{TEXT} columns. Indexing always happens over the entire column, partial indexing is not supported. See @ref{MySQL full-text search} for details of operation. @item The @code{FOREIGN KEY}, @code{CHECK} and @code{REFERENCES} clauses don't actually do anything. The syntax for them is provided only for compatibility, Loading Loading @@ -16570,6 +16597,7 @@ alter_specification: or ADD INDEX [index_name] (index_col_name,...) or ADD PRIMARY KEY (index_col_name,...) or ADD UNIQUE [index_name] (index_col_name,...) or ADD FULLTEXT [index_name] (index_col_name,...) or ALTER [COLUMN] col_name @{SET DEFAULT literal | DROP DEFAULT@} or CHANGE [COLUMN] old_col_name create_definition or MODIFY [COLUMN] create_definition Loading Loading @@ -19741,7 +19769,7 @@ dropped only with explicit @code{REVOKE} commands or by manipulating the @section @code{CREATE INDEX} syntax @example CREATE [UNIQUE] INDEX index_name ON tbl_name (col_name[(length)],... ) CREATE [UNIQUE|FULLTEXT] INDEX index_name ON tbl_name (col_name[(length)],... ) @end example The @code{CREATE INDEX} statement doesn't do anything in @strong{MySQL} prior Loading Loading @@ -19775,13 +19803,17 @@ which could save a lot of disk space and might also speed up @code{INSERT} operations! Note that you can only add an index on a column that can have @code{NULL} values or on a @code{BLOB}/@code{TEXT} column if you are useing values or on a @code{BLOB}/@code{TEXT} column if you are using @strong{MySQL} version 3.23.2 or newer and are using the @code{MyISAM} table type. For more information about how @strong{MySQL} uses indexes, see @ref{MySQL indexes, , @strong{MySQL} indexes}. Fulltext indexes can index only @code{VARCHAR}, @code{BLOB}, and @code{TEXT} columns, and only in @code{MyISAM} tables. Fulltext indexes are available from @strong{MySQL} 3.23.23. @ref{MySQL full-text search}. @findex DROP INDEX @node DROP INDEX, Comments, CREATE INDEX, Reference @section @code{DROP INDEX} syntax Loading Loading @@ -34010,9 +34042,10 @@ working on the @strong{MySQL} code. @menu * MySQL threads:: MySQL threads * MySQL full-text search:: MySQL full-text search @end menu @node MySQL threads, , MySQL internals, MySQL internals @node MySQL threads, MySQL full-text search, MySQL internals, MySQL internals @section MySQL threads The @strong{MySQL} server creates the the following threads: Loading Loading @@ -34051,6 +34084,77 @@ started to read and apply updates from the master. @code{mysqladmin processlist} only shows the connection and @code{INSERT DELAYED} threads. @node MySQL full-text search, , MySQL threads, MySQL internals @section MySQL full-text search Since version 3.23.23, @strong{MySQL} has support for full-text indexing and searching. Full-text index in @strong{MySQL} is an index of type @code{FULLTEXT}. Fulltext indexes can be created from @code{VARCHAR}, @code{TEXT}, and @code{BLOB} columns at @code{CREATE TABLE} time or added later with @code{ALTER TABLE} or @code{CREATE INDEX}. Full-text search is performed with @code{MATCH} function. @example mysql> CREATE TABLE t (a VARCHAR(200), b TEXT, FULLTEXT (a,b)); Query OK, 0 rows affected (0.00 sec) mysql> INSERT INTO t VALUES -> ('MySQL has now support', 'for full-text search'), -> ('Full-text indexes', 'are called collections'), -> ('Only MyISAM tables','support collections'), -> ('Function MATCH ... AGAINST()','is used to do a search'), -> ('Full-text search in MySQL', 'implements vector space model'); Query OK, 5 rows affected (0.00 sec) Records: 5 Duplicates: 0 Warnings: 0 mysql> SELECT * FROM t WHERE MATCH (a,b) AGAINST ('MySQL'); +---------------------------+-------------------------------+ | a | b | +---------------------------+-------------------------------+ | MySQL has now support | for full-text search | | Full-text search in MySQL | implements vector-space-model | +---------------------------+-------------------------------+ 2 rows in set (0.00 sec) mysql> SELECT *,MATCH a,b AGAINST ('collections support') as x FROM t; +------------------------------+-------------------------------+--------+ | a | b | x | +------------------------------+-------------------------------+--------+ | MySQL has now support | for full-text search | 0.3834 | | Full-text indexes | are called collections | 0.3834 | | Only MyISAM tables | support collections | 0.7668 | | Function MATCH ... AGAINST() | is used to do a search | 0 | | Full-text search in MySQL | implements vector space model | 0 | +------------------------------+-------------------------------+--------+ 5 rows in set (0.00 sec) @end example Function @code{MATCH} matches a natural language query @code{AGAINST} a text collection (which is simply the columns that are covered by fulltext index). For every row in a table it returns relevance - similarity measure between the text in that row (in the columns, that are part of the collection) and the query. When it used in a @code{WHERE} clause (see example above) the rows returned are automatically sorted with relevance decreasing. Relevance is a non- negative floating point number. Zero relevance means no similarity. Relevance is computed based on number of words in the row and number of unique words in that row, total number of words in the collection, number of documents (rows), that contain a particular word, etc. MySQL uses very simple parser to split text into words. "Word" is any sequence of letters, numbers, @code{'}, and @code{_}. Any "word" that is present in stopword list or just too short (3 characters or less) is ignored. Every correct word in the collection and in the query is weighted, according to their significance in the query or collection. This way, a word that is present in many documents will have lower weight (and may even have a zero weight), because it has lower semantic value in this particular collection. Otherwise, if the word is rare, it will receive a higher weight. Weights of the words are then combined to compute the relevance. Such a technique works best with big collections (in fact, it was carefully tuned up this way). For very small tables word distribution does not reflect adequately their semantical value, and this model may sometimes produce bizarre results. @page @node Environment variables, Users, MySQL internals, Top
include/Makefile.am +1 −0 Original line number Diff line number Diff line Loading @@ -45,6 +45,7 @@ my_config.h: ../config.h # This should be changed in the source and removed. my_global.h: global.h -$(RM) my_global.h $(CP) global.h my_global.h # These files should not be included in distributions since they are Loading
include/ft_global.h +2 −0 Original line number Diff line number Diff line Loading @@ -40,6 +40,8 @@ typedef struct st_ft_doclist { FT_DOC doc[1]; } FT_DOCLIST; extern const char *ft_precompiled_stopwords[]; int ft_init_stopwords(const char **); FT_DOCLIST * ft_init_search(void *, uint, byte *, uint, my_bool); Loading
myisam/ft_search.c +6 −1 Original line number Diff line number Diff line Loading @@ -158,6 +158,7 @@ FT_DOCLIST * ft_init_search(void *info, uint keynr, byte *key, ALL_IN_ONE aio; FT_DOCLIST *dlist; FT_DOC *dptr; my_off_t saved_lastpos; /* black magic ON */ if ((int) (keynr = _mi_check_index((MI_INFO *)info,keynr)) < 0) Loading @@ -173,6 +174,8 @@ FT_DOCLIST * ft_init_search(void *info, uint keynr, byte *key, aio.keyinfo=aio.info->s->keyinfo+keynr; aio.key_root=aio.info->s->state.key_root[keynr]; saved_lastpos=aio.info->lastpos; if(!(wtree=ft_parse(NULL,key,key_len))) return NULL; init_tree(&aio.dtree,0,sizeof(FT_SUPERDOC),(qsort_cmp)&FT_SUPERDOC_cmp,0, Loading @@ -199,6 +202,7 @@ FT_DOCLIST * ft_init_search(void *info, uint keynr, byte *key, } err: aio.info->lastpos=saved_lastpos; delete_tree(&aio.dtree); delete_tree(wtree); free(wtree); Loading @@ -217,7 +221,8 @@ int ft_read_next(FT_DOCLIST *handler, char *record) info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); if (!(*info->read_record)(info,handler->doc[handler->curdoc].dpos,record)) info->lastpos=handler->doc[handler->curdoc].dpos; if (!(*info->read_record)(info,info->lastpos,record)) { info->update|= HA_STATE_AKTIV; /* Record is read */ return 0; Loading
myisam/fulltext.h +3 −5 Original line number Diff line number Diff line Loading @@ -35,8 +35,6 @@ extern const MI_KEYSEG ft_keysegs[FT_SEGS]; extern const char *ft_precompiled_stopwords[]; int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *); int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t); int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t);