Loading include/my_tree.h +1 −1 Original line number Diff line number Diff line Loading @@ -57,7 +57,7 @@ typedef struct st_tree { void (*free)(void *); } TREE; /* Functions on hole tree */ /* Functions on whole tree */ void init_tree(TREE *tree,uint default_alloc_size, int element_size, qsort_cmp2 compare, my_bool with_delete, void (*free_element)(void*)); Loading mysys/tree.c +1 −1 Original line number Diff line number Diff line Loading @@ -84,7 +84,7 @@ void init_tree(TREE *tree, uint default_alloc_size, int size, ((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1)))) { tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */ /* Fix allocation size so that we don't loose any memory */ /* Fix allocation size so that we don't lose any memory */ default_alloc_size/=(sizeof(TREE_ELEMENT)+size); if (!default_alloc_size) default_alloc_size=1; Loading sql/item_sum.cc +102 −1 Original line number Diff line number Diff line Loading @@ -788,11 +788,56 @@ String *Item_std_field::val_str(String *str) #include "sql_select.h" static int simple_raw_key_cmp(void* arg, byte* key1, byte* key2) { return memcmp(key1, key2, (int)arg); } static int simple_str_key_cmp(void* arg, byte* key1, byte* key2) { return my_sortcmp(key1, key2, (int)arg); } // did not make this one static - at least gcc gets confused when // I try to declare a static function as a friend. If you can figure // out the syntax to make a static function a friend, make this one // static int composite_key_cmp(void* arg, byte* key1, byte* key2) { Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg; Field** field = item->table->field, **field_end; field_end = field + item->table->fields; for(; field < field_end; ++field) { int res; int len = (*field)->field_length; switch((*field)->type()) { case FIELD_TYPE_STRING: case FIELD_TYPE_VAR_STRING: res = my_sortcmp(key1, key2, len); break; default: res = memcmp(key1, key2, len); break; } if(res) return res; key1 += len; key2 += len; } return 0; } Item_sum_count_distinct::~Item_sum_count_distinct() { if (table) free_tmp_table(current_thd, table); delete tmp_table_param; if(use_tree) delete_tree(&tree); } Loading Loading @@ -821,6 +866,53 @@ bool Item_sum_count_distinct::setup(THD *thd) 0, 0, current_lex->options | thd->options))) return 1; table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows if(table->db_type == DB_TYPE_HEAP) // no blobs, otherwise it would be // MyISAM { qsort_cmp2 compare_key; void* cmp_arg; int key_len; if(table->fields == 1) // if we have only one field, which is // the most common use of count(distinct), it is much faster // to use a simpler key compare method that can take advantage // of not having to worry about other fields { switch(table->field[0]->type()) { // if we have a string, we must take care of charsets // and case sensitivity case FIELD_TYPE_STRING: case FIELD_TYPE_VAR_STRING: compare_key = (qsort_cmp2)simple_str_key_cmp; break; default: // since at this point we cannot have blobs // anything else can be compared with memcmp compare_key = (qsort_cmp2)simple_raw_key_cmp; break; } cmp_arg = (void*)(key_len = table->field[0]->field_length); rec_offset = 1; } else // too bad, cannot cheat - there is more than one field { cmp_arg = (void*)this; compare_key = (qsort_cmp2)composite_key_cmp; Field** field, **field_end; field_end = (field = table->field) + table->fields; for(key_len = 0; field < field_end; ++field) { key_len += (*field)->field_length; } rec_offset = table->reclength - key_len; } init_tree(&tree, 0, key_len, compare_key, 0, 0); tree.cmp_arg = cmp_arg; use_tree = 1; } return 0; } Loading @@ -830,6 +922,8 @@ void Item_sum_count_distinct::reset() table->file->extra(HA_EXTRA_NO_CACHE); table->file->delete_all_rows(); table->file->extra(HA_EXTRA_WRITE_CACHE); if(use_tree) delete_tree(&tree); (void) add(); } Loading @@ -843,7 +937,12 @@ bool Item_sum_count_distinct::add() if ((*field)->is_real_null(0)) return 0; // Don't count NULL if ((error=table->file->write_row(table->record[0]))) if(use_tree) { if(!tree_insert(&tree, table->record[0] + rec_offset, 0)) return 1; } else if ((error=table->file->write_row(table->record[0]))) { if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE) Loading @@ -859,6 +958,8 @@ longlong Item_sum_count_distinct::val_int() { if (!table) // Empty query return LL(0); if(use_tree) return tree.elements_in_tree; table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); return table->file->records; } Loading sql/item_sum.h +13 −2 Original line number Diff line number Diff line Loading @@ -21,6 +21,8 @@ #pragma interface /* gcc class implementation */ #endif #include <my_tree.h> class Item_sum :public Item_result_field { public: Loading Loading @@ -145,11 +147,20 @@ class Item_sum_count_distinct :public Item_sum_int table_map used_table_cache; bool fix_fields(THD *thd,TABLE_LIST *tables); TMP_TABLE_PARAM *tmp_table_param; TREE tree; bool use_tree; // If there are no blobs, we can use a tree, which // is faster than heap table. In that case, we still use the table // to help get things set up, but we insert nothing in it int rec_offset; // the first few bytes of record ( at least one) // are just markers for deleted and NULLs. We want to skip them since // they will just bloat the tree without providing any valuable info friend int composite_key_cmp(void* arg, byte* key1, byte* key2); public: Item_sum_count_distinct(List<Item> &list) :Item_sum_int(list),table(0),used_table_cache(~(table_map) 0), tmp_table_param(0) tmp_table_param(0),use_tree(0) { quick_group=0; } ~Item_sum_count_distinct(); table_map used_tables() const { return used_table_cache; } Loading Loading
include/my_tree.h +1 −1 Original line number Diff line number Diff line Loading @@ -57,7 +57,7 @@ typedef struct st_tree { void (*free)(void *); } TREE; /* Functions on hole tree */ /* Functions on whole tree */ void init_tree(TREE *tree,uint default_alloc_size, int element_size, qsort_cmp2 compare, my_bool with_delete, void (*free_element)(void*)); Loading
mysys/tree.c +1 −1 Original line number Diff line number Diff line Loading @@ -84,7 +84,7 @@ void init_tree(TREE *tree, uint default_alloc_size, int size, ((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1)))) { tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */ /* Fix allocation size so that we don't loose any memory */ /* Fix allocation size so that we don't lose any memory */ default_alloc_size/=(sizeof(TREE_ELEMENT)+size); if (!default_alloc_size) default_alloc_size=1; Loading
sql/item_sum.cc +102 −1 Original line number Diff line number Diff line Loading @@ -788,11 +788,56 @@ String *Item_std_field::val_str(String *str) #include "sql_select.h" static int simple_raw_key_cmp(void* arg, byte* key1, byte* key2) { return memcmp(key1, key2, (int)arg); } static int simple_str_key_cmp(void* arg, byte* key1, byte* key2) { return my_sortcmp(key1, key2, (int)arg); } // did not make this one static - at least gcc gets confused when // I try to declare a static function as a friend. If you can figure // out the syntax to make a static function a friend, make this one // static int composite_key_cmp(void* arg, byte* key1, byte* key2) { Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg; Field** field = item->table->field, **field_end; field_end = field + item->table->fields; for(; field < field_end; ++field) { int res; int len = (*field)->field_length; switch((*field)->type()) { case FIELD_TYPE_STRING: case FIELD_TYPE_VAR_STRING: res = my_sortcmp(key1, key2, len); break; default: res = memcmp(key1, key2, len); break; } if(res) return res; key1 += len; key2 += len; } return 0; } Item_sum_count_distinct::~Item_sum_count_distinct() { if (table) free_tmp_table(current_thd, table); delete tmp_table_param; if(use_tree) delete_tree(&tree); } Loading Loading @@ -821,6 +866,53 @@ bool Item_sum_count_distinct::setup(THD *thd) 0, 0, current_lex->options | thd->options))) return 1; table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows if(table->db_type == DB_TYPE_HEAP) // no blobs, otherwise it would be // MyISAM { qsort_cmp2 compare_key; void* cmp_arg; int key_len; if(table->fields == 1) // if we have only one field, which is // the most common use of count(distinct), it is much faster // to use a simpler key compare method that can take advantage // of not having to worry about other fields { switch(table->field[0]->type()) { // if we have a string, we must take care of charsets // and case sensitivity case FIELD_TYPE_STRING: case FIELD_TYPE_VAR_STRING: compare_key = (qsort_cmp2)simple_str_key_cmp; break; default: // since at this point we cannot have blobs // anything else can be compared with memcmp compare_key = (qsort_cmp2)simple_raw_key_cmp; break; } cmp_arg = (void*)(key_len = table->field[0]->field_length); rec_offset = 1; } else // too bad, cannot cheat - there is more than one field { cmp_arg = (void*)this; compare_key = (qsort_cmp2)composite_key_cmp; Field** field, **field_end; field_end = (field = table->field) + table->fields; for(key_len = 0; field < field_end; ++field) { key_len += (*field)->field_length; } rec_offset = table->reclength - key_len; } init_tree(&tree, 0, key_len, compare_key, 0, 0); tree.cmp_arg = cmp_arg; use_tree = 1; } return 0; } Loading @@ -830,6 +922,8 @@ void Item_sum_count_distinct::reset() table->file->extra(HA_EXTRA_NO_CACHE); table->file->delete_all_rows(); table->file->extra(HA_EXTRA_WRITE_CACHE); if(use_tree) delete_tree(&tree); (void) add(); } Loading @@ -843,7 +937,12 @@ bool Item_sum_count_distinct::add() if ((*field)->is_real_null(0)) return 0; // Don't count NULL if ((error=table->file->write_row(table->record[0]))) if(use_tree) { if(!tree_insert(&tree, table->record[0] + rec_offset, 0)) return 1; } else if ((error=table->file->write_row(table->record[0]))) { if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE) Loading @@ -859,6 +958,8 @@ longlong Item_sum_count_distinct::val_int() { if (!table) // Empty query return LL(0); if(use_tree) return tree.elements_in_tree; table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); return table->file->records; } Loading
sql/item_sum.h +13 −2 Original line number Diff line number Diff line Loading @@ -21,6 +21,8 @@ #pragma interface /* gcc class implementation */ #endif #include <my_tree.h> class Item_sum :public Item_result_field { public: Loading Loading @@ -145,11 +147,20 @@ class Item_sum_count_distinct :public Item_sum_int table_map used_table_cache; bool fix_fields(THD *thd,TABLE_LIST *tables); TMP_TABLE_PARAM *tmp_table_param; TREE tree; bool use_tree; // If there are no blobs, we can use a tree, which // is faster than heap table. In that case, we still use the table // to help get things set up, but we insert nothing in it int rec_offset; // the first few bytes of record ( at least one) // are just markers for deleted and NULLs. We want to skip them since // they will just bloat the tree without providing any valuable info friend int composite_key_cmp(void* arg, byte* key1, byte* key2); public: Item_sum_count_distinct(List<Item> &list) :Item_sum_int(list),table(0),used_table_cache(~(table_map) 0), tmp_table_param(0) tmp_table_param(0),use_tree(0) { quick_group=0; } ~Item_sum_count_distinct(); table_map used_tables() const { return used_table_cache; } Loading