Loading include/my_base.h +2 −0 Original line number Diff line number Diff line Loading @@ -319,6 +319,8 @@ enum ha_base_keytype { #define SEARCH_NULL_ARE_EQUAL 32768 /* NULL in keys are equal */ #define SEARCH_NULL_ARE_NOT_EQUAL 65536 /* NULL in keys are not equal */ #define SEARCH_RETURN_B_POS (65536*2) /* see ha_key_cmp for description */ /* bits in opt_flag */ #define QUICK_USED 1 #define READ_CACHE_USED 2 Loading include/my_handler.h +2 −0 Original line number Diff line number Diff line Loading @@ -63,4 +63,6 @@ extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, register uchar *b, uint key_length, uint nextflag, uint *diff_pos); extern HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a); #endif /* _my_handler_h */ include/myisam.h +12 −2 Original line number Diff line number Diff line Loading @@ -322,7 +322,9 @@ typedef enum /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */ MI_STATS_METHOD_NULLS_NOT_EQUAL, /* Treat NULLs as equal when collecting statistics (like 4.0 did) */ MI_STATS_METHOD_NULLS_EQUAL MI_STATS_METHOD_NULLS_EQUAL, /* Ignore NULLs - count only tuples without NULLs in the index components */ MI_STATS_METHOD_IGNORE_NULLS } enum_mi_stats_method; typedef struct st_mi_check_param Loading @@ -349,7 +351,14 @@ typedef struct st_mi_check_param int tmpfile_createflag; myf myf_rw; IO_CACHE read_cache; /* The next two are used to collect statistics, see update_key_parts for description. */ ulonglong unique_count[MI_MAX_KEY_SEG+1]; ulonglong notnull_count[MI_MAX_KEY_SEG+1]; ha_checksum key_crc[MI_MAX_POSSIBLE_KEY]; ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY]; void *thd; Loading Loading @@ -409,7 +418,8 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, my_bool repair); int update_state_info(MI_CHECK *param, MI_INFO *info,uint update); void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, ulonglong *unique, ulonglong records); ulonglong *unique, ulonglong *notnull, ulonglong records); int filecopy(MI_CHECK *param, File to,File from,my_off_t start, my_off_t length, const char *type); int movepoint(MI_INFO *info,byte *record,my_off_t oldpos, Loading myisam/mi_check.c +178 −22 Original line number Diff line number Diff line Loading @@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) found_keys++; param->record_checksum=init_checksum; bzero((char*) ¶m->unique_count,sizeof(param->unique_count)); bzero((char*) ¶m->notnull_count,sizeof(param->notnull_count)); if ((!(param->testflag & T_SILENT))) printf ("- check data record references index: %d\n",key+1); if (keyinfo->flag & HA_FULLTEXT) Loading Loading @@ -496,6 +499,8 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) if (param->testflag & T_STATISTICS) update_key_parts(keyinfo, rec_per_key_part, param->unique_count, param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? param->notnull_count: NULL, (ulonglong)info->state->records); } if (param->testflag & T_INFO) Loading Loading @@ -552,6 +557,96 @@ static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, return 1; } /* "Ignore NULLs" statistics collection method: process first index tuple. SYNOPSIS mi_collect_stats_nonulls_first() keyseg IN Array of key part descriptions notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} tuples that don't contain NULLs) key IN Key values tuple DESCRIPTION Process the first index tuple - find out which prefix tuples don't contain NULLs, and update the array of notnull counters accordingly. */ static void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull, uchar *key) { uint first_null, kp; first_null= ha_find_null(keyseg, key) - keyseg; /* All prefix tuples that don't include keypart_{first_null} are not-null tuples (and all others aren't), increment counters for them. */ for (kp= 0; kp < first_null; kp++) notnull[kp]++; } /* "Ignore NULLs" statistics collection method: process next index tuple. SYNOPSIS mi_collect_stats_nonulls_next() keyseg IN Array of key part descriptions notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} tuples that don't contain NULLs) prev_key IN Previous key values tuple last_key IN Next key values tuple DESCRIPTION Process the next index tuple: 1. Find out which prefix tuples of last_key don't contain NULLs, and update the array of notnull counters accordingly. 2. Find the first keypart number where the prev_key and last_key tuples are different(A), or last_key has NULL value(B), and return it, so the caller can count number of unique tuples for each key prefix. We don't need (B) to be counted, and that is compensated back in update_key_parts(). RETURN 1 + number of first keypart where values differ or last_key tuple has NULL */ static int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, uchar *prev_key, uchar *last_key) { uint diffs[2]; uint first_null_seg, kp; /* Find the first keypart where values are different or either of them is NULL. We get results in diffs array: diffs[0]= 1 + number of first different keypart diffs[1]=offset: (last_key + diffs[1]) points to first value in last_key that is NULL or different from corresponding value in prev_key. */ ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL | SEARCH_RETURN_B_POS, diffs); HA_KEYSEG *seg= keyseg + diffs[0] - 1; /* Find first NULL in last_key */ first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg; for (kp= 0; kp < first_null_seg; kp++) notnull[kp]++; /* Return 1+ number of first key part where values differ. Don't care if these were NULLs and not .... We compensate for that in update_key_parts. */ return diffs[0]; } /* Check if index is ok */ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, Loading Loading @@ -641,8 +736,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos); else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) { diff_pos= mi_collect_stats_nonulls_next(keyinfo->seg, param->notnull_count, info->lastkey, key); } param->unique_count[diff_pos-1]++; } else { if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count, key); } } (*key_checksum)+= mi_byte_checksum((byte*) key, key_length- info->s->rec_reflength); Loading Loading @@ -2088,7 +2195,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, if (param->testflag & T_STATISTICS) update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, (ulonglong) info->state->records); param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? sort_param.notnull: NULL,(ulonglong) info->state->records); share->state.key_map|=(ulonglong) 1 << sort_param.key; if (sort_param.fix_datafile) Loading Loading @@ -3255,11 +3363,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos); else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) { diff_pos= mi_collect_stats_nonulls_next(sort_param->seg, sort_param->notnull, sort_info->key_block->lastkey, (uchar*)a); } sort_param->unique[diff_pos-1]++; } else { cmp= -1; if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull, (uchar*)a); } if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) { Loading Loading @@ -3981,21 +4099,31 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, SYNOPSIS update_key_parts() keyinfo Index information (only key->keysegs used) keyinfo IN Index information (only key->keysegs used) rec_per_key_part OUT Store statistics here unique IN Array of #distinct values collected over index run. unique IN Array of (#distinct tuples) notnull_tuples IN Array of (#tuples), or NULL records Number of records in the table NOTES DESCRIPTION This function is called produce index statistics values from unique and notnull_tuples arrays after these arrays were produced with sequential index scan (the scan is done in two places: chk_index() and sort_key_write()). This function handles all 3 index statistics collection methods. Unique is an array: unique[0]= (#different values of {keypart1}) - 1 unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1 unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1 ... For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too: notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL) notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all keypart{i} are not NULL) ... The 'unique' array is collected in one sequential scan through the entire index. This is done in two places: in chk_index() and in sort_key_write(). Statistics collection may consider NULLs as either equal or unequal (see SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*). For all other statistics collection methods notnull_tuples==NULL. Output is an array: rec_per_key_part[k] = Loading @@ -4007,25 +4135,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, index tuples} = #tuples-in-the-index / #distinct-tuples-in-the-index. The #tuples-in-the-index and #distinct-tuples-in-the-index have different meaning depending on which statistics collection method is used: MI_STATS_METHOD_* how are nulls compared? which tuples are counted? NULLS_EQUAL NULL == NULL all tuples in table NULLS_NOT_EQUAL NULL != NULL all tuples in table IGNORE_NULLS n/a tuples that don't have NULLs */ void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, ulonglong *unique, ulonglong records) ulonglong *unique, ulonglong *notnull, ulonglong records) { ulonglong count=0,tmp; ulonglong count=0,tmp, unique_tuples; ulonglong tuples= records; uint parts; for (parts=0 ; parts < keyinfo->keysegs ; parts++) { count+=unique[parts]; if (count == 0) tmp=records; unique_tuples= count + 1; if (notnull) { tuples= notnull[parts]; /* #(unique_tuples not counting tuples with NULLs) = #(unique_tuples counting tuples with NULLs as different) - #(tuples with NULLs) */ unique_tuples -= (records - notnull[parts]); } if (unique_tuples == 0) tmp= 1; else if (count == 0) tmp= tuples; /* 1 unique tuple */ else tmp= (records + (count+1)/2) / (count+1); /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here. let's ensure it is not */ tmp= (tuples + unique_tuples/2) / unique_tuples; /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here. let's ensure it is not */ set_if_bigger(tmp,1); if (tmp >= (ulonglong) ~(ulong) 0) tmp=(ulonglong) ~(ulong) 0; *rec_per_key_part=(ulong) tmp; rec_per_key_part++; } Loading myisam/myisamchk.c +20 −4 Original line number Diff line number Diff line Loading @@ -339,7 +339,8 @@ static struct my_option my_long_options[] = REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"stats_method", OPT_STATS_METHOD, "Specifies how index statistics collection code should threat NULLs. " "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).", "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", (gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} Loading Loading @@ -451,6 +452,10 @@ static void usage(void) -a, --analyze Analyze distribution of keys. Will make some joins in\n\ MySQL faster. You can check the calculated distribution\n\ by using '--description --verbose table_name'.\n\ --stats_method=name Specifies how index statistics collection code should\n\ threat NULLs. Possible values of name are \"nulls_unequal\"\n\ (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\ \"nulls_ignored\".\n\ -d, --description Prints some information about table.\n\ -A, --set-auto-increment[=value]\n\ Force auto_increment to start at this or higher value\n\ Loading @@ -472,7 +477,7 @@ static void usage(void) #include <help_end.h> const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal", NullS}; "nulls_ignored", NullS}; TYPELIB myisam_stats_method_typelib= { array_elements(myisam_stats_method_names) - 1, "", myisam_stats_method_names, NULL}; Loading Loading @@ -699,14 +704,25 @@ get_one_option(int optid, case OPT_STATS_METHOD: { int method; enum_mi_stats_method method_conv; myisam_stats_method_str= argument; if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) { fprintf(stderr, "Invalid value of stats_method: %s.\n", argument); exit(1); } check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL : MI_STATS_METHOD_NULLS_NOT_EQUAL; switch (method-1) { case 0: method_conv= MI_STATS_METHOD_NULLS_EQUAL; break; case 1: method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL; break; case 2: method_conv= MI_STATS_METHOD_IGNORE_NULLS; break; } check_param.stats_method= method_conv; break; } #ifdef DEBUG /* Only useful if debugging */ Loading Loading
include/my_base.h +2 −0 Original line number Diff line number Diff line Loading @@ -319,6 +319,8 @@ enum ha_base_keytype { #define SEARCH_NULL_ARE_EQUAL 32768 /* NULL in keys are equal */ #define SEARCH_NULL_ARE_NOT_EQUAL 65536 /* NULL in keys are not equal */ #define SEARCH_RETURN_B_POS (65536*2) /* see ha_key_cmp for description */ /* bits in opt_flag */ #define QUICK_USED 1 #define READ_CACHE_USED 2 Loading
include/my_handler.h +2 −0 Original line number Diff line number Diff line Loading @@ -63,4 +63,6 @@ extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, register uchar *b, uint key_length, uint nextflag, uint *diff_pos); extern HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a); #endif /* _my_handler_h */
include/myisam.h +12 −2 Original line number Diff line number Diff line Loading @@ -322,7 +322,9 @@ typedef enum /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */ MI_STATS_METHOD_NULLS_NOT_EQUAL, /* Treat NULLs as equal when collecting statistics (like 4.0 did) */ MI_STATS_METHOD_NULLS_EQUAL MI_STATS_METHOD_NULLS_EQUAL, /* Ignore NULLs - count only tuples without NULLs in the index components */ MI_STATS_METHOD_IGNORE_NULLS } enum_mi_stats_method; typedef struct st_mi_check_param Loading @@ -349,7 +351,14 @@ typedef struct st_mi_check_param int tmpfile_createflag; myf myf_rw; IO_CACHE read_cache; /* The next two are used to collect statistics, see update_key_parts for description. */ ulonglong unique_count[MI_MAX_KEY_SEG+1]; ulonglong notnull_count[MI_MAX_KEY_SEG+1]; ha_checksum key_crc[MI_MAX_POSSIBLE_KEY]; ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY]; void *thd; Loading Loading @@ -409,7 +418,8 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, my_bool repair); int update_state_info(MI_CHECK *param, MI_INFO *info,uint update); void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, ulonglong *unique, ulonglong records); ulonglong *unique, ulonglong *notnull, ulonglong records); int filecopy(MI_CHECK *param, File to,File from,my_off_t start, my_off_t length, const char *type); int movepoint(MI_INFO *info,byte *record,my_off_t oldpos, Loading
myisam/mi_check.c +178 −22 Original line number Diff line number Diff line Loading @@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) found_keys++; param->record_checksum=init_checksum; bzero((char*) ¶m->unique_count,sizeof(param->unique_count)); bzero((char*) ¶m->notnull_count,sizeof(param->notnull_count)); if ((!(param->testflag & T_SILENT))) printf ("- check data record references index: %d\n",key+1); if (keyinfo->flag & HA_FULLTEXT) Loading Loading @@ -496,6 +499,8 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) if (param->testflag & T_STATISTICS) update_key_parts(keyinfo, rec_per_key_part, param->unique_count, param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? param->notnull_count: NULL, (ulonglong)info->state->records); } if (param->testflag & T_INFO) Loading Loading @@ -552,6 +557,96 @@ static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, return 1; } /* "Ignore NULLs" statistics collection method: process first index tuple. SYNOPSIS mi_collect_stats_nonulls_first() keyseg IN Array of key part descriptions notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} tuples that don't contain NULLs) key IN Key values tuple DESCRIPTION Process the first index tuple - find out which prefix tuples don't contain NULLs, and update the array of notnull counters accordingly. */ static void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull, uchar *key) { uint first_null, kp; first_null= ha_find_null(keyseg, key) - keyseg; /* All prefix tuples that don't include keypart_{first_null} are not-null tuples (and all others aren't), increment counters for them. */ for (kp= 0; kp < first_null; kp++) notnull[kp]++; } /* "Ignore NULLs" statistics collection method: process next index tuple. SYNOPSIS mi_collect_stats_nonulls_next() keyseg IN Array of key part descriptions notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} tuples that don't contain NULLs) prev_key IN Previous key values tuple last_key IN Next key values tuple DESCRIPTION Process the next index tuple: 1. Find out which prefix tuples of last_key don't contain NULLs, and update the array of notnull counters accordingly. 2. Find the first keypart number where the prev_key and last_key tuples are different(A), or last_key has NULL value(B), and return it, so the caller can count number of unique tuples for each key prefix. We don't need (B) to be counted, and that is compensated back in update_key_parts(). RETURN 1 + number of first keypart where values differ or last_key tuple has NULL */ static int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, uchar *prev_key, uchar *last_key) { uint diffs[2]; uint first_null_seg, kp; /* Find the first keypart where values are different or either of them is NULL. We get results in diffs array: diffs[0]= 1 + number of first different keypart diffs[1]=offset: (last_key + diffs[1]) points to first value in last_key that is NULL or different from corresponding value in prev_key. */ ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL | SEARCH_RETURN_B_POS, diffs); HA_KEYSEG *seg= keyseg + diffs[0] - 1; /* Find first NULL in last_key */ first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg; for (kp= 0; kp < first_null_seg; kp++) notnull[kp]++; /* Return 1+ number of first key part where values differ. Don't care if these were NULLs and not .... We compensate for that in update_key_parts. */ return diffs[0]; } /* Check if index is ok */ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, Loading Loading @@ -641,8 +736,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos); else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) { diff_pos= mi_collect_stats_nonulls_next(keyinfo->seg, param->notnull_count, info->lastkey, key); } param->unique_count[diff_pos-1]++; } else { if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count, key); } } (*key_checksum)+= mi_byte_checksum((byte*) key, key_length- info->s->rec_reflength); Loading Loading @@ -2088,7 +2195,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, if (param->testflag & T_STATISTICS) update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, (ulonglong) info->state->records); param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? sort_param.notnull: NULL,(ulonglong) info->state->records); share->state.key_map|=(ulonglong) 1 << sort_param.key; if (sort_param.fix_datafile) Loading Loading @@ -3255,11 +3363,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos); else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) { diff_pos= mi_collect_stats_nonulls_next(sort_param->seg, sort_param->notnull, sort_info->key_block->lastkey, (uchar*)a); } sort_param->unique[diff_pos-1]++; } else { cmp= -1; if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull, (uchar*)a); } if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) { Loading Loading @@ -3981,21 +4099,31 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, SYNOPSIS update_key_parts() keyinfo Index information (only key->keysegs used) keyinfo IN Index information (only key->keysegs used) rec_per_key_part OUT Store statistics here unique IN Array of #distinct values collected over index run. unique IN Array of (#distinct tuples) notnull_tuples IN Array of (#tuples), or NULL records Number of records in the table NOTES DESCRIPTION This function is called produce index statistics values from unique and notnull_tuples arrays after these arrays were produced with sequential index scan (the scan is done in two places: chk_index() and sort_key_write()). This function handles all 3 index statistics collection methods. Unique is an array: unique[0]= (#different values of {keypart1}) - 1 unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1 unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1 ... For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too: notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL) notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all keypart{i} are not NULL) ... The 'unique' array is collected in one sequential scan through the entire index. This is done in two places: in chk_index() and in sort_key_write(). Statistics collection may consider NULLs as either equal or unequal (see SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*). For all other statistics collection methods notnull_tuples==NULL. Output is an array: rec_per_key_part[k] = Loading @@ -4007,25 +4135,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, index tuples} = #tuples-in-the-index / #distinct-tuples-in-the-index. The #tuples-in-the-index and #distinct-tuples-in-the-index have different meaning depending on which statistics collection method is used: MI_STATS_METHOD_* how are nulls compared? which tuples are counted? NULLS_EQUAL NULL == NULL all tuples in table NULLS_NOT_EQUAL NULL != NULL all tuples in table IGNORE_NULLS n/a tuples that don't have NULLs */ void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, ulonglong *unique, ulonglong records) ulonglong *unique, ulonglong *notnull, ulonglong records) { ulonglong count=0,tmp; ulonglong count=0,tmp, unique_tuples; ulonglong tuples= records; uint parts; for (parts=0 ; parts < keyinfo->keysegs ; parts++) { count+=unique[parts]; if (count == 0) tmp=records; unique_tuples= count + 1; if (notnull) { tuples= notnull[parts]; /* #(unique_tuples not counting tuples with NULLs) = #(unique_tuples counting tuples with NULLs as different) - #(tuples with NULLs) */ unique_tuples -= (records - notnull[parts]); } if (unique_tuples == 0) tmp= 1; else if (count == 0) tmp= tuples; /* 1 unique tuple */ else tmp= (records + (count+1)/2) / (count+1); /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here. let's ensure it is not */ tmp= (tuples + unique_tuples/2) / unique_tuples; /* for some weird keys (e.g. FULLTEXT) tmp can be <1 here. let's ensure it is not */ set_if_bigger(tmp,1); if (tmp >= (ulonglong) ~(ulong) 0) tmp=(ulonglong) ~(ulong) 0; *rec_per_key_part=(ulong) tmp; rec_per_key_part++; } Loading
myisam/myisamchk.c +20 −4 Original line number Diff line number Diff line Loading @@ -339,7 +339,8 @@ static struct my_option my_long_options[] = REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"stats_method", OPT_STATS_METHOD, "Specifies how index statistics collection code should threat NULLs. " "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).", "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", (gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} Loading Loading @@ -451,6 +452,10 @@ static void usage(void) -a, --analyze Analyze distribution of keys. Will make some joins in\n\ MySQL faster. You can check the calculated distribution\n\ by using '--description --verbose table_name'.\n\ --stats_method=name Specifies how index statistics collection code should\n\ threat NULLs. Possible values of name are \"nulls_unequal\"\n\ (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\ \"nulls_ignored\".\n\ -d, --description Prints some information about table.\n\ -A, --set-auto-increment[=value]\n\ Force auto_increment to start at this or higher value\n\ Loading @@ -472,7 +477,7 @@ static void usage(void) #include <help_end.h> const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal", NullS}; "nulls_ignored", NullS}; TYPELIB myisam_stats_method_typelib= { array_elements(myisam_stats_method_names) - 1, "", myisam_stats_method_names, NULL}; Loading Loading @@ -699,14 +704,25 @@ get_one_option(int optid, case OPT_STATS_METHOD: { int method; enum_mi_stats_method method_conv; myisam_stats_method_str= argument; if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) { fprintf(stderr, "Invalid value of stats_method: %s.\n", argument); exit(1); } check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL : MI_STATS_METHOD_NULLS_NOT_EQUAL; switch (method-1) { case 0: method_conv= MI_STATS_METHOD_NULLS_EQUAL; break; case 1: method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL; break; case 2: method_conv= MI_STATS_METHOD_IGNORE_NULLS; break; } check_param.stats_method= method_conv; break; } #ifdef DEBUG /* Only useful if debugging */ Loading