Commit b2977103 authored by sasha@mysql.sashanet.com's avatar sasha@mysql.sashanet.com
Browse files

fixes/optimizations for count(distinct)

more extensive testing of count(distinct)
parent c706bf40
Loading
Loading
Loading
Loading
+72 −0
Original line number Diff line number Diff line
n1
1
2
NULL
count(distinct n1)
2
n2
11
12
13
NULL
count(distinct n2)
3
s
one
two
NULL
count(distinct s)
2
vs
eleven
twevle
thirteen
NULL
count(distinct vs)
3
t
eleven
twelve
foo
bar
NULL
count(distinct t)
4
n1	n2
1	11
2	11
2	12
2	13
NULL	13
2	NULL
count(distinct n1,n2)
4
n1	s
1	one
2	two
NULL	two
2	NULL
count(distinct n1,s)
2
s	n1	vs
one	1	eleven
two	2	eleven
two	2	twevle
two	2	thirteen
two	NULL	thirteen
NULL	2	thirteen
two	2	NULL
count(distinct s,n1,vs)
4
s	t
one	eleven
two	eleven
two	twelve
two	foo
two	bar
NULL	bar
two	NULL
count(distinct s,t)
5
count(distinct n1)	count(distinct n2)
2	3
+42 −0
Original line number Diff line number Diff line
create table t1(n1 int, n2 int, s char(20), vs varchar(20), t text);
insert into t1 values (1,11, 'one','eleven', 'eleven'),
 (1,11, 'one','eleven', 'eleven'),
 (2,11, 'two','eleven', 'eleven'),
 (2,12, 'two','twevle', 'twelve'),
 (2,13, 'two','thirteen', 'foo'),
 (2,13, 'two','thirteen', 'foo'),
 (2,13, 'two','thirteen', 'bar'),
 (NULL,13, 'two','thirteen', 'bar'),
 (2,NULL, 'two','thirteen', 'bar'),
 (2,13, NULL,'thirteen', 'bar'),
 (2,13, 'two',NULL, 'bar'),
 (2,13, 'two','thirteen', NULL);

select distinct n1 from t1;
select count(distinct n1) from t1;

select distinct n2 from t1;
select count(distinct n2) from t1;

select distinct s from t1;
select count(distinct s) from t1;

select distinct vs from t1;
select count(distinct vs) from t1;

select distinct t from t1;
select count(distinct t) from t1;

select distinct n1,n2 from t1;
select count(distinct n1,n2) from t1;

select distinct n1,s from t1;
select count(distinct n1,s) from t1;

select distinct s,n1,vs from t1;
select count(distinct s,n1,vs) from t1;

select distinct s,t from t1;
select count(distinct s,t) from t1;

select count(distinct n1), count(distinct n2) from t1;
+16 −9
Original line number Diff line number Diff line
@@ -810,12 +810,13 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
  for(; field < field_end; ++field)
    {
      int res;
      int len = (*field)->field_length;
      Field* f = *field;
      int len = f->field_length;
      switch((*field)->type())
	{
	case FIELD_TYPE_STRING:
	case FIELD_TYPE_VAR_STRING:
	  res = my_sortcmp(key1, key2, len);
	  res = f->key_cmp(key1, key2);
	  break;
	default:
	  res = memcmp(key1, key2, len);
@@ -879,20 +880,22 @@ bool Item_sum_count_distinct::setup(THD *thd)
	// to use a simpler key compare method that can take advantage
	// of not having to worry about other fields
	{
	  switch(table->field[0]->type())
	  Field* field = table->field[0];
	  switch(field->type())
	    {
	      // if we have a string, we must take care of charsets
	      // and case sensitivity
	    case FIELD_TYPE_STRING:
	    case FIELD_TYPE_VAR_STRING:
	      compare_key = (qsort_cmp2)simple_str_key_cmp;
	      compare_key = (qsort_cmp2)(field->binary() ? simple_raw_key_cmp:
					 simple_str_key_cmp);
	      break;
	    default: // since at this point we cannot have blobs
	      // anything else can be compared with memcmp
	      compare_key = (qsort_cmp2)simple_raw_key_cmp;
	      break;
	    }
	  cmp_arg = (void*)(key_len = table->field[0]->field_length);
	  cmp_arg = (void*)(key_len = field->field_length);
	  rec_offset = 1;
	}
      else // too bad, cannot cheat - there is more than one field
@@ -908,7 +911,8 @@ bool Item_sum_count_distinct::setup(THD *thd)
	  rec_offset = table->reclength - key_len;
	}

      init_tree(&tree, 0, key_len, compare_key, 0, 0);
      init_tree(&tree, min(max_heap_table_size, sortbuff_size/16),
		key_len, compare_key, 0, 0);
      tree.cmp_arg = cmp_arg;
      use_tree = 1;
    }
@@ -918,12 +922,15 @@ bool Item_sum_count_distinct::setup(THD *thd)


void Item_sum_count_distinct::reset()
{
  if(use_tree)
    delete_tree(&tree);
  else
    {
      table->file->extra(HA_EXTRA_NO_CACHE);
      table->file->delete_all_rows();
      table->file->extra(HA_EXTRA_WRITE_CACHE);
  if(use_tree)
    delete_tree(&tree);
    }
  (void) add();
}