Loading mysql-test/r/count_distinct2.result +8 −0 Original line number Diff line number Diff line Loading @@ -74,3 +74,11 @@ count(distinct n2) n1 1 NULL 1 1 3 2 count(distinct n) 5000 Variable_name Value Created_tmp_disk_tables 1 count(distinct s) 5000 Variable_name Value Created_tmp_disk_tables 1 mysql-test/t/count_distinct2-master.opt 0 → 100644 +1 −0 Original line number Diff line number Diff line -O max_heap_table_size=16384 mysql-test/t/count_distinct2.test +29 −0 Original line number Diff line number Diff line Loading @@ -43,3 +43,32 @@ select count(distinct n1), count(distinct n2) from t1; select count(distinct n2), n1 from t1 group by n1; drop table t1; # test the converstion from tree to MyISAM create table t1 (n int); let $1=5000; while ($1) { eval insert into t1 values($1); dec $1; } flush status; select count(distinct n) from t1; show status like 'Created_tmp_disk_tables'; drop table t1; #test conversion from heap to MyISAM create table t1 (s text); let $1=5000; while ($1) { eval insert into t1 values('$1'); dec $1; } flush status; select count(distinct s) from t1; show status like 'Created_tmp_disk_tables'; drop table t1; sql/item_sum.cc +42 −1 Original line number Diff line number Diff line Loading @@ -830,7 +830,26 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2) return 0; } // helper function for walking the tree when we dump it to MyISAM - // tree_walk will call it for each // leaf int dump_leaf(byte* key, uint32 count __attribute__((unused)), Item_sum_count_distinct* item) { char* buf = item->table->record[0]; int error; memset(buf, 0xff, item->rec_offset); // make up for cheating in the tree memcpy(buf + item->rec_offset, key, item->tree.size_of_element); if ((error = item->table->file->write_row(buf))) { if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE) return 1; } return 0; } Item_sum_count_distinct::~Item_sum_count_distinct() { Loading Loading @@ -916,11 +935,29 @@ bool Item_sum_count_distinct::setup(THD *thd) key_len, compare_key, 0, 0); tree.cmp_arg = cmp_arg; use_tree = 1; // the only time key_len could be 0 is if someone does // count(distinct) on a char(0) field - stupid thing to do, // but this has to be handled - otherwise someone can crash // the server with a DoS attack max_elements_in_tree = (key_len) ? max_heap_table_size/key_len : max_heap_table_size; } return 0; } int Item_sum_count_distinct::tree_to_myisam() { if(create_myisam_from_heap(table, tmp_table_param, HA_ERR_RECORD_FILE_FULL, 1) || tree_walk(&tree, (tree_walk_action)&dump_leaf, (void*)this, left_root_right)) return 1; delete_tree(&tree); use_tree = 0; return 0; } void Item_sum_count_distinct::reset() { Loading @@ -947,7 +984,11 @@ bool Item_sum_count_distinct::add() if(use_tree) { if(!tree_insert(&tree, table->record[0] + rec_offset, 0)) // if the tree got too big, convert to MyISAM, otherwise // insert into the tree if((tree.elements_in_tree > max_elements_in_tree && tree_to_myisam()) || !tree_insert(&tree, table->record[0] + rec_offset, 0)) return 1; } else if ((error=table->file->write_row(table->record[0]))) Loading sql/item_sum.h +14 −3 Original line number Diff line number Diff line Loading @@ -148,14 +148,25 @@ class Item_sum_count_distinct :public Item_sum_int bool fix_fields(THD *thd,TABLE_LIST *tables); TMP_TABLE_PARAM *tmp_table_param; TREE tree; bool use_tree; // If there are no blobs, we can use a tree, which uint max_elements_in_tree; // calculated based on max_heap_table_size. If reached, // walk the tree and dump it into MyISAM table bool use_tree; // If there are no blobs, we can use a tree, which // is faster than heap table. In that case, we still use the table // to help get things set up, but we insert nothing in it int rec_offset; // the first few bytes of record ( at least one) int rec_offset; // the first few bytes of record ( at least one) // are just markers for deleted and NULLs. We want to skip them since // they will just bloat the tree without providing any valuable info int tree_to_myisam(); friend int composite_key_cmp(void* arg, byte* key1, byte* key2); friend int dump_leaf(byte* key, uint32 count __attribute__((unused)), Item_sum_count_distinct* item); public: Item_sum_count_distinct(List<Item> &list) Loading Loading
mysql-test/r/count_distinct2.result +8 −0 Original line number Diff line number Diff line Loading @@ -74,3 +74,11 @@ count(distinct n2) n1 1 NULL 1 1 3 2 count(distinct n) 5000 Variable_name Value Created_tmp_disk_tables 1 count(distinct s) 5000 Variable_name Value Created_tmp_disk_tables 1
mysql-test/t/count_distinct2-master.opt 0 → 100644 +1 −0 Original line number Diff line number Diff line -O max_heap_table_size=16384
mysql-test/t/count_distinct2.test +29 −0 Original line number Diff line number Diff line Loading @@ -43,3 +43,32 @@ select count(distinct n1), count(distinct n2) from t1; select count(distinct n2), n1 from t1 group by n1; drop table t1; # test the converstion from tree to MyISAM create table t1 (n int); let $1=5000; while ($1) { eval insert into t1 values($1); dec $1; } flush status; select count(distinct n) from t1; show status like 'Created_tmp_disk_tables'; drop table t1; #test conversion from heap to MyISAM create table t1 (s text); let $1=5000; while ($1) { eval insert into t1 values('$1'); dec $1; } flush status; select count(distinct s) from t1; show status like 'Created_tmp_disk_tables'; drop table t1;
sql/item_sum.cc +42 −1 Original line number Diff line number Diff line Loading @@ -830,7 +830,26 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2) return 0; } // helper function for walking the tree when we dump it to MyISAM - // tree_walk will call it for each // leaf int dump_leaf(byte* key, uint32 count __attribute__((unused)), Item_sum_count_distinct* item) { char* buf = item->table->record[0]; int error; memset(buf, 0xff, item->rec_offset); // make up for cheating in the tree memcpy(buf + item->rec_offset, key, item->tree.size_of_element); if ((error = item->table->file->write_row(buf))) { if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE) return 1; } return 0; } Item_sum_count_distinct::~Item_sum_count_distinct() { Loading Loading @@ -916,11 +935,29 @@ bool Item_sum_count_distinct::setup(THD *thd) key_len, compare_key, 0, 0); tree.cmp_arg = cmp_arg; use_tree = 1; // the only time key_len could be 0 is if someone does // count(distinct) on a char(0) field - stupid thing to do, // but this has to be handled - otherwise someone can crash // the server with a DoS attack max_elements_in_tree = (key_len) ? max_heap_table_size/key_len : max_heap_table_size; } return 0; } int Item_sum_count_distinct::tree_to_myisam() { if(create_myisam_from_heap(table, tmp_table_param, HA_ERR_RECORD_FILE_FULL, 1) || tree_walk(&tree, (tree_walk_action)&dump_leaf, (void*)this, left_root_right)) return 1; delete_tree(&tree); use_tree = 0; return 0; } void Item_sum_count_distinct::reset() { Loading @@ -947,7 +984,11 @@ bool Item_sum_count_distinct::add() if(use_tree) { if(!tree_insert(&tree, table->record[0] + rec_offset, 0)) // if the tree got too big, convert to MyISAM, otherwise // insert into the tree if((tree.elements_in_tree > max_elements_in_tree && tree_to_myisam()) || !tree_insert(&tree, table->record[0] + rec_offset, 0)) return 1; } else if ((error=table->file->write_row(table->record[0]))) Loading
sql/item_sum.h +14 −3 Original line number Diff line number Diff line Loading @@ -148,14 +148,25 @@ class Item_sum_count_distinct :public Item_sum_int bool fix_fields(THD *thd,TABLE_LIST *tables); TMP_TABLE_PARAM *tmp_table_param; TREE tree; bool use_tree; // If there are no blobs, we can use a tree, which uint max_elements_in_tree; // calculated based on max_heap_table_size. If reached, // walk the tree and dump it into MyISAM table bool use_tree; // If there are no blobs, we can use a tree, which // is faster than heap table. In that case, we still use the table // to help get things set up, but we insert nothing in it int rec_offset; // the first few bytes of record ( at least one) int rec_offset; // the first few bytes of record ( at least one) // are just markers for deleted and NULLs. We want to skip them since // they will just bloat the tree without providing any valuable info int tree_to_myisam(); friend int composite_key_cmp(void* arg, byte* key1, byte* key2); friend int dump_leaf(byte* key, uint32 count __attribute__((unused)), Item_sum_count_distinct* item); public: Item_sum_count_distinct(List<Item> &list) Loading