Loading Docs/manual.texi +8 −0 Original line number Diff line number Diff line Loading @@ -26728,6 +26728,12 @@ In the first statement, the @code{LIKE} value begins with a wildcard character. In the second statement, the @code{LIKE} value is not a constant. MySQL 4.0 does another optimization on @code{LIKE}. If you are using @code{... LIKE "%string%"} and @code{string} is longer than 3 characters then MySQL will use the turbo-boyer-more algorithm to once initialize the pattern for the string and then use this pattern to quickly search after the given string. @findex IS NULL, and indexes @cindex indexes, and @code{IS NULL} Searching using @code{column_name IS NULL} will use indexes if column_name Loading Loading @@ -49310,6 +49316,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item Use turbo-boyer-more to speed up @code{LIKE "%keyword%"} searches. @item Fixed bug in @code{DROP DATABASE} with symlink. @item Fixed crash in @code{REPAIR ... USE_FRM}. mysql-test/r/func_like.result +11 −0 Original line number Diff line number Diff line Loading @@ -15,4 +15,15 @@ test select * from t1 where a like "te_t"; a test select * from t1 where a like "%a%"; a a abc abcd select * from t1 where a like "%abcd%"; a abcd select * from t1 where a like "%abc\d%"; a abcd drop table t1; mysql-test/t/func_like.test +8 −0 Original line number Diff line number Diff line Loading @@ -9,4 +9,12 @@ select * from t1 where a like "abc%"; select * from t1 where a like "ABC%"; select * from t1 where a like "test%"; select * from t1 where a like "te_t"; # # The following will test the boyer-more code # select * from t1 where a like "%a%"; select * from t1 where a like "%abcd%"; select * from t1 where a like "%abc\d%"; drop table t1; sql/item_cmpfunc.cc +261 −5 Original line number Diff line number Diff line Loading @@ -1228,23 +1228,23 @@ void Item_func_like::fix_length_and_dec() // cmp_type=STRING_RESULT; // For quick select } longlong Item_func_like::val_int() { String *res,*res2; res=args[0]->val_str(&tmp_value1); String* res = args[0]->val_str(&tmp_value1); if (args[0]->null_value) { null_value=1; return 0; } res2=args[1]->val_str(&tmp_value2); String* res2 = args[1]->val_str(&tmp_value2); if (args[1]->null_value) { null_value=1; return 0; } null_value=0; if (canDoTurboBM) return turboBM_matches(res->ptr(), res->length()) ? 1 : 0; if (binary) return wild_compare(*res,*res2,escape) ? 0 : 1; else Loading @@ -1268,6 +1268,51 @@ Item_func::optimize_type Item_func_like::select_optimize() const return OPTIMIZE_NONE; } bool Item_func_like::fix_fields(THD *thd,struct st_table_list *tlist) { if (Item_bool_func2::fix_fields(thd, tlist)) return 1; /* TODO--we could do it for non-const, but we'd have to recompute the tables for each row--probably not worth it. */ if (args[1]->const_item() && !(specialflag & SPECIAL_NO_NEW_FUNC)) { String* res2 = args[1]->val_str(&tmp_value2); const size_t len = res2->length(); const char* first = res2->ptr(); const char* last = first + len - 1; /* len must be > 2 ('%pattern%') heuristic: only do TurboBM for pattern_len > 2 */ if (len > MIN_TURBOBM_PATTERN_LEN + 2 && *first == wild_many && *last == wild_many) { const char* tmp = first + 1; for ( ; *tmp != wild_many && *tmp != wild_one && *tmp != escape; tmp++) ; canDoTurboBM = tmp == last; } if (canDoTurboBM) { pattern = first + 1; pattern_len = len - 2; DBUG_PRINT("TurboBM", ("Initializing pattern: '%s'...", first)); int* suff = (int*)thd->alloc(sizeof(int[pattern_len + 1])); bmGs = (int*)thd->alloc(sizeof(int[pattern_len + 1])); bmBc = (int*)thd->alloc(sizeof(int[alphabet_size])); turboBM_compute_good_suffix_shifts(suff); turboBM_compute_bad_character_shifts(); DBUG_PRINT("turboBM",("done")); } } return 0; } #ifdef USE_REGEX bool Loading Loading @@ -1307,7 +1352,6 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables) return 0; } longlong Item_func_regex::val_int() { char buff[MAX_FIELD_WIDTH]; Loading Loading @@ -1364,3 +1408,215 @@ Item_func_regex::~Item_func_regex() } #endif /* USE_REGEX */ #ifdef LIKE_CMP_TOUPPER #define likeconv(A) (uchar) toupper(A) #else #define likeconv(A) (uchar) my_sort_order[(uchar) (A)] #endif /********************************************************************** turboBM_compute_suffixes() Precomputation dependent only on pattern_len. **********************************************************************/ void Item_func_like::turboBM_compute_suffixes(int* suff) { const int plm1 = pattern_len - 1; int f = 0; int g = plm1; int* const splm1 = suff + plm1; *splm1 = pattern_len; if (binary) { int i; for (i = pattern_len - 2; i >= 0; i--) { int tmp = *(splm1 + i - f); if (g < i && tmp < i - g) suff[i] = tmp; else { if (i < g) g = i; // g = min(i, g) f = i; while (g >= 0 && pattern[g] == pattern[g + plm1 - f]) g--; suff[i] = f - g; } } } else { int i; for (i = pattern_len - 2; 0 <= i; --i) { int tmp = *(splm1 + i - f); if (g < i && tmp < i - g) suff[i] = tmp; else { if (i < g) g = i; // g = min(i, g) f = i; while (g >= 0 && likeconv(pattern[g]) == likeconv(pattern[g + plm1 - f])) g--; suff[i] = f - g; } } } } /********************************************************************** turboBM_compute_good_suffix_shifts() Precomputation dependent only on pattern_len. **********************************************************************/ void Item_func_like::turboBM_compute_good_suffix_shifts(int* suff) { turboBM_compute_suffixes(suff); int* end = bmGs + pattern_len; int* k; for (k = bmGs; k < end; k++) *k = pattern_len; int tmp; int i; int j = 0; const int plm1 = pattern_len - 1; for (i = plm1; i > -1; i--) { if (suff[i] == i + 1) { for (tmp = plm1 - i; j < tmp; j++) { int* tmp2 = bmGs + j; if (*tmp2 == pattern_len) *tmp2 = tmp; } } } int* tmp2; for (tmp = plm1 - i; j < tmp; j++) { tmp2 = bmGs + j; if (*tmp2 == pattern_len) *tmp2 = tmp; } tmp2 = bmGs + plm1; for (i = 0; i <= pattern_len - 2; i++) *(tmp2 - suff[i]) = plm1 - i; } /********************************************************************** turboBM_compute_bad_character_shifts() Precomputation dependent on pattern_len. **********************************************************************/ void Item_func_like::turboBM_compute_bad_character_shifts() { int* i; int* end = bmBc + alphabet_size; for (i = bmBc; i < end; i++) *i = pattern_len; int j; const int plm1 = pattern_len - 1; if (binary) for (j = 0; j < plm1; j++) bmBc[pattern[j]] = plm1 - j; else for (j = 0; j < plm1; j++) bmBc[likeconv(pattern[j])] = plm1 - j; } /********************************************************************** turboBM_matches() Search for pattern in text, returns true/false for match/no match **********************************************************************/ bool Item_func_like::turboBM_matches(const char* text, int text_len) const { register int bcShift; register int turboShift; int shift = pattern_len; int j = 0; int u = 0; const int plm1 = pattern_len - 1; const int tlmpl = text_len - pattern_len; /* Searching */ if (binary) { while (j <= tlmpl) { register int i = plm1; while (i >= 0 && pattern[i] == text[i + j]) { i--; if (i == plm1 - shift) i -= u; } if (i < 0) return true; register const int v = plm1 - i; turboShift = u - v; bcShift = bmBc[text[i + j]] - plm1 + i; shift = max(turboShift, bcShift); shift = max(shift, bmGs[i]); if (shift == bmGs[i]) u = min(pattern_len - shift, v); else { if (turboShift < bcShift) shift = max(shift, u + 1); u = 0; } j += shift; } return false; } else { while (j <= tlmpl) { register int i = plm1; while (i >= 0 && likeconv(pattern[i]) == likeconv(text[i + j])) { i--; if (i == plm1 - shift) i -= u; } if (i < 0) return true; register const int v = plm1 - i; turboShift = u - v; bcShift = bmBc[likeconv(text[i + j])] - plm1 + i; shift = max(turboShift, bcShift); shift = max(shift, bmGs[i]); if (shift == bmGs[i]) u = min(pattern_len - shift, v); else { if (turboShift < bcShift) shift = max(shift, u + 1); u = 0; } j += shift; } return false; } } sql/item_cmpfunc.h +27 −2 Original line number Diff line number Diff line Loading @@ -478,15 +478,40 @@ class Item_func_isnotnull :public Item_bool_func class Item_func_like :public Item_bool_func2 { char escape; public: Item_func_like(Item *a,Item *b, char* escape_arg) :Item_bool_func2(a,b),escape(*escape_arg) // Turbo Boyer-Moore data bool canDoTurboBM; // pattern is '%abcd%' case const char* pattern; int pattern_len; // TurboBM buffers, *this is owner int* bmGs; // good suffix shift table, size is pattern_len + 1 int* bmBc; // bad character shift table, size is alphabet_size void turboBM_compute_suffixes(int* suff); void turboBM_compute_good_suffix_shifts(int* suff); void turboBM_compute_bad_character_shifts(); bool turboBM_matches(const char* text, int text_len) const; enum { alphabet_size = 256 }; public: Item_func_like::Item_func_like(Item *a,Item *b, char* escape_arg) : Item_bool_func2(a,b), escape(*escape_arg), canDoTurboBM(false), pattern(0), pattern_len(0), bmGs(0), bmBc(0) {} longlong val_int(); enum Functype functype() const { return LIKE_FUNC; } optimize_type select_optimize() const; cond_result eq_cmp_result() const { return COND_TRUE; } const char *func_name() const { return "like"; } void fix_length_and_dec(); bool fix_fields(THD *thd,struct st_table_list *tlist); }; #ifdef USE_REGEX Loading Loading
Docs/manual.texi +8 −0 Original line number Diff line number Diff line Loading @@ -26728,6 +26728,12 @@ In the first statement, the @code{LIKE} value begins with a wildcard character. In the second statement, the @code{LIKE} value is not a constant. MySQL 4.0 does another optimization on @code{LIKE}. If you are using @code{... LIKE "%string%"} and @code{string} is longer than 3 characters then MySQL will use the turbo-boyer-more algorithm to once initialize the pattern for the string and then use this pattern to quickly search after the given string. @findex IS NULL, and indexes @cindex indexes, and @code{IS NULL} Searching using @code{column_name IS NULL} will use indexes if column_name Loading Loading @@ -49310,6 +49316,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item Use turbo-boyer-more to speed up @code{LIKE "%keyword%"} searches. @item Fixed bug in @code{DROP DATABASE} with symlink. @item Fixed crash in @code{REPAIR ... USE_FRM}.
mysql-test/r/func_like.result +11 −0 Original line number Diff line number Diff line Loading @@ -15,4 +15,15 @@ test select * from t1 where a like "te_t"; a test select * from t1 where a like "%a%"; a a abc abcd select * from t1 where a like "%abcd%"; a abcd select * from t1 where a like "%abc\d%"; a abcd drop table t1;
mysql-test/t/func_like.test +8 −0 Original line number Diff line number Diff line Loading @@ -9,4 +9,12 @@ select * from t1 where a like "abc%"; select * from t1 where a like "ABC%"; select * from t1 where a like "test%"; select * from t1 where a like "te_t"; # # The following will test the boyer-more code # select * from t1 where a like "%a%"; select * from t1 where a like "%abcd%"; select * from t1 where a like "%abc\d%"; drop table t1;
sql/item_cmpfunc.cc +261 −5 Original line number Diff line number Diff line Loading @@ -1228,23 +1228,23 @@ void Item_func_like::fix_length_and_dec() // cmp_type=STRING_RESULT; // For quick select } longlong Item_func_like::val_int() { String *res,*res2; res=args[0]->val_str(&tmp_value1); String* res = args[0]->val_str(&tmp_value1); if (args[0]->null_value) { null_value=1; return 0; } res2=args[1]->val_str(&tmp_value2); String* res2 = args[1]->val_str(&tmp_value2); if (args[1]->null_value) { null_value=1; return 0; } null_value=0; if (canDoTurboBM) return turboBM_matches(res->ptr(), res->length()) ? 1 : 0; if (binary) return wild_compare(*res,*res2,escape) ? 0 : 1; else Loading @@ -1268,6 +1268,51 @@ Item_func::optimize_type Item_func_like::select_optimize() const return OPTIMIZE_NONE; } bool Item_func_like::fix_fields(THD *thd,struct st_table_list *tlist) { if (Item_bool_func2::fix_fields(thd, tlist)) return 1; /* TODO--we could do it for non-const, but we'd have to recompute the tables for each row--probably not worth it. */ if (args[1]->const_item() && !(specialflag & SPECIAL_NO_NEW_FUNC)) { String* res2 = args[1]->val_str(&tmp_value2); const size_t len = res2->length(); const char* first = res2->ptr(); const char* last = first + len - 1; /* len must be > 2 ('%pattern%') heuristic: only do TurboBM for pattern_len > 2 */ if (len > MIN_TURBOBM_PATTERN_LEN + 2 && *first == wild_many && *last == wild_many) { const char* tmp = first + 1; for ( ; *tmp != wild_many && *tmp != wild_one && *tmp != escape; tmp++) ; canDoTurboBM = tmp == last; } if (canDoTurboBM) { pattern = first + 1; pattern_len = len - 2; DBUG_PRINT("TurboBM", ("Initializing pattern: '%s'...", first)); int* suff = (int*)thd->alloc(sizeof(int[pattern_len + 1])); bmGs = (int*)thd->alloc(sizeof(int[pattern_len + 1])); bmBc = (int*)thd->alloc(sizeof(int[alphabet_size])); turboBM_compute_good_suffix_shifts(suff); turboBM_compute_bad_character_shifts(); DBUG_PRINT("turboBM",("done")); } } return 0; } #ifdef USE_REGEX bool Loading Loading @@ -1307,7 +1352,6 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables) return 0; } longlong Item_func_regex::val_int() { char buff[MAX_FIELD_WIDTH]; Loading Loading @@ -1364,3 +1408,215 @@ Item_func_regex::~Item_func_regex() } #endif /* USE_REGEX */ #ifdef LIKE_CMP_TOUPPER #define likeconv(A) (uchar) toupper(A) #else #define likeconv(A) (uchar) my_sort_order[(uchar) (A)] #endif /********************************************************************** turboBM_compute_suffixes() Precomputation dependent only on pattern_len. **********************************************************************/ void Item_func_like::turboBM_compute_suffixes(int* suff) { const int plm1 = pattern_len - 1; int f = 0; int g = plm1; int* const splm1 = suff + plm1; *splm1 = pattern_len; if (binary) { int i; for (i = pattern_len - 2; i >= 0; i--) { int tmp = *(splm1 + i - f); if (g < i && tmp < i - g) suff[i] = tmp; else { if (i < g) g = i; // g = min(i, g) f = i; while (g >= 0 && pattern[g] == pattern[g + plm1 - f]) g--; suff[i] = f - g; } } } else { int i; for (i = pattern_len - 2; 0 <= i; --i) { int tmp = *(splm1 + i - f); if (g < i && tmp < i - g) suff[i] = tmp; else { if (i < g) g = i; // g = min(i, g) f = i; while (g >= 0 && likeconv(pattern[g]) == likeconv(pattern[g + plm1 - f])) g--; suff[i] = f - g; } } } } /********************************************************************** turboBM_compute_good_suffix_shifts() Precomputation dependent only on pattern_len. **********************************************************************/ void Item_func_like::turboBM_compute_good_suffix_shifts(int* suff) { turboBM_compute_suffixes(suff); int* end = bmGs + pattern_len; int* k; for (k = bmGs; k < end; k++) *k = pattern_len; int tmp; int i; int j = 0; const int plm1 = pattern_len - 1; for (i = plm1; i > -1; i--) { if (suff[i] == i + 1) { for (tmp = plm1 - i; j < tmp; j++) { int* tmp2 = bmGs + j; if (*tmp2 == pattern_len) *tmp2 = tmp; } } } int* tmp2; for (tmp = plm1 - i; j < tmp; j++) { tmp2 = bmGs + j; if (*tmp2 == pattern_len) *tmp2 = tmp; } tmp2 = bmGs + plm1; for (i = 0; i <= pattern_len - 2; i++) *(tmp2 - suff[i]) = plm1 - i; } /********************************************************************** turboBM_compute_bad_character_shifts() Precomputation dependent on pattern_len. **********************************************************************/ void Item_func_like::turboBM_compute_bad_character_shifts() { int* i; int* end = bmBc + alphabet_size; for (i = bmBc; i < end; i++) *i = pattern_len; int j; const int plm1 = pattern_len - 1; if (binary) for (j = 0; j < plm1; j++) bmBc[pattern[j]] = plm1 - j; else for (j = 0; j < plm1; j++) bmBc[likeconv(pattern[j])] = plm1 - j; } /********************************************************************** turboBM_matches() Search for pattern in text, returns true/false for match/no match **********************************************************************/ bool Item_func_like::turboBM_matches(const char* text, int text_len) const { register int bcShift; register int turboShift; int shift = pattern_len; int j = 0; int u = 0; const int plm1 = pattern_len - 1; const int tlmpl = text_len - pattern_len; /* Searching */ if (binary) { while (j <= tlmpl) { register int i = plm1; while (i >= 0 && pattern[i] == text[i + j]) { i--; if (i == plm1 - shift) i -= u; } if (i < 0) return true; register const int v = plm1 - i; turboShift = u - v; bcShift = bmBc[text[i + j]] - plm1 + i; shift = max(turboShift, bcShift); shift = max(shift, bmGs[i]); if (shift == bmGs[i]) u = min(pattern_len - shift, v); else { if (turboShift < bcShift) shift = max(shift, u + 1); u = 0; } j += shift; } return false; } else { while (j <= tlmpl) { register int i = plm1; while (i >= 0 && likeconv(pattern[i]) == likeconv(text[i + j])) { i--; if (i == plm1 - shift) i -= u; } if (i < 0) return true; register const int v = plm1 - i; turboShift = u - v; bcShift = bmBc[likeconv(text[i + j])] - plm1 + i; shift = max(turboShift, bcShift); shift = max(shift, bmGs[i]); if (shift == bmGs[i]) u = min(pattern_len - shift, v); else { if (turboShift < bcShift) shift = max(shift, u + 1); u = 0; } j += shift; } return false; } }
sql/item_cmpfunc.h +27 −2 Original line number Diff line number Diff line Loading @@ -478,15 +478,40 @@ class Item_func_isnotnull :public Item_bool_func class Item_func_like :public Item_bool_func2 { char escape; public: Item_func_like(Item *a,Item *b, char* escape_arg) :Item_bool_func2(a,b),escape(*escape_arg) // Turbo Boyer-Moore data bool canDoTurboBM; // pattern is '%abcd%' case const char* pattern; int pattern_len; // TurboBM buffers, *this is owner int* bmGs; // good suffix shift table, size is pattern_len + 1 int* bmBc; // bad character shift table, size is alphabet_size void turboBM_compute_suffixes(int* suff); void turboBM_compute_good_suffix_shifts(int* suff); void turboBM_compute_bad_character_shifts(); bool turboBM_matches(const char* text, int text_len) const; enum { alphabet_size = 256 }; public: Item_func_like::Item_func_like(Item *a,Item *b, char* escape_arg) : Item_bool_func2(a,b), escape(*escape_arg), canDoTurboBM(false), pattern(0), pattern_len(0), bmGs(0), bmBc(0) {} longlong val_int(); enum Functype functype() const { return LIKE_FUNC; } optimize_type select_optimize() const; cond_result eq_cmp_result() const { return COND_TRUE; } const char *func_name() const { return "like"; } void fix_length_and_dec(); bool fix_fields(THD *thd,struct st_table_list *tlist); }; #ifdef USE_REGEX Loading