Loading mysys/charset-def.c +6 −0 Original line number Diff line number Diff line Loading @@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci; extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci; extern CHARSET_INFO my_charset_utf8_roman_uca_ci; extern CHARSET_INFO my_charset_utf8_persian_uca_ci; #ifdef HAVE_CYBOZU_COLLATION extern CHARSET_INFO my_charset_utf8_general_cs; #endif #endif #endif /* HAVE_UCA_COLLATIONS */ Loading Loading @@ -146,6 +149,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) #ifdef HAVE_CHARSET_utf8 add_compiled_collation(&my_charset_utf8_general_ci); add_compiled_collation(&my_charset_utf8_bin); #ifdef HAVE_CYBOZU_COLLATION add_compiled_collation(&my_charset_utf8_general_cs); #endif #ifdef HAVE_UCA_COLLATIONS add_compiled_collation(&my_charset_utf8_general_uca_ci); add_compiled_collation(&my_charset_utf8_icelandic_uca_ci); Loading mysys/default.c +121 −14 Original line number Diff line number Diff line Loading @@ -73,7 +73,7 @@ static int search_default_file(DYNAMIC_ARRAY *args,MEM_ROOT *alloc, static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, const char *dir, const char *ext, const char *config_file, TYPELIB *group); TYPELIB *group, int recursion_level); static char *remove_end_comment(char *ptr); Loading Loading @@ -194,7 +194,7 @@ int load_defaults(const char *conf_file, const char **groups, { if ((error= search_default_file_with_ext(&args, &alloc, "", "", forced_default_file, &group)) < 0) &group, 0)) < 0) goto err; if (error > 0) { Loading Loading @@ -311,7 +311,7 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, { int error; if ((error= search_default_file_with_ext(args, alloc, dir, *ext, config_file, group)) < 0) config_file, group, 0)) < 0) return error; } return 0; Loading @@ -326,9 +326,11 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, args Store pointer to found options here alloc Allocate strings in this object dir directory to read config_file Name of configuration file ext Extension for configuration file config_file Name of configuration file group groups to read recursion_level the level of recursion, got while processing "!include" or "!includedir" RETURN 0 Success Loading @@ -340,12 +342,18 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, const char *dir, const char *ext, const char *config_file, TYPELIB *group) TYPELIB *group, int recursion_level) { char name[FN_REFLEN+10],buff[4096],*ptr,*end,*value,*tmp; char name[FN_REFLEN + 10], buff[4096], *ptr, *end, *value, *tmp, **tmp_ext; static const char includedir_keyword[]= "includedir"; static const char include_keyword[]= "include"; const int max_recursion_level= 10; FILE *fp; uint line= 0; my_bool read_values= 0, found_group= 0; uint i; MY_DIR *search_dir; FILEINFO *search_file; if ((dir ? strlen(dir) : 0 )+strlen(config_file) >= FN_REFLEN-3) return 0; /* Ignore wrong paths */ Loading Loading @@ -374,7 +382,7 @@ static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, if ((stat_info.st_mode & S_IWOTH) && (stat_info.st_mode & S_IFMT) == S_IFREG) { fprintf(stderr, "warning: World-writeable config file %s is ignored\n", fprintf(stderr, "warning: World-writable config file %s is ignored\n", name); return 0; } Loading @@ -387,9 +395,108 @@ static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, { line++; /* Ignore comment and empty lines */ for (ptr=buff ; my_isspace(&my_charset_latin1,*ptr) ; ptr++ ) ; for (ptr= buff; my_isspace(&my_charset_latin1, *ptr); ptr++) {} if (*ptr == '#' || *ptr == ';' || !*ptr) continue; /* Configuration File Directives */ if ((*ptr == '!') && (recursion_level < max_recursion_level)) { /* skip over `!' and following whitespace */ for (++ptr; my_isspace(&my_charset_latin1, ptr[0]); ptr++) {} if ((!strncmp(ptr, includedir_keyword, sizeof(includedir_keyword) - 1)) && my_isspace(&my_charset_latin1, ptr[sizeof(includedir_keyword) - 1])) { /* skip over "includedir" and following whitespace */ for (ptr+= sizeof(includedir_keyword) - 1; my_isspace(&my_charset_latin1, ptr[0]); ptr++) {} /* trim trailing whitespace from directory name */ end= ptr + strlen(ptr) - 1; /* This would work fine even if no whitespaces are met since fgets() stores the newline character in the buffer */ for (; my_isspace(&my_charset_latin1, *(end - 1)); end--) {} end[0]= 0; /* print error msg if there is nothing after !inludedir directive */ if (end == ptr) { fprintf(stderr, "error: Wrong !includedir directive in config " "file: %s at line %d\n", name,line); goto err; } if (!(search_dir= my_dir(ptr, MYF(MY_WME)))) goto err; for (i= 0; i < (uint) search_dir->number_off_files; i++) { search_file= search_dir->dir_entry + i; ext= fn_ext(search_file->name); /* check extenstion */ for (tmp_ext= (char**) f_extensions; *tmp_ext; *tmp_ext++) { if (!strcmp(ext, *tmp_ext)) break; } if (*tmp_ext) { if (!(tmp= alloc_root(alloc, 2 + strlen(search_file->name) + strlen(ptr)))) goto err; fn_format(tmp, search_file->name, ptr, "", MY_UNPACK_FILENAME | MY_SAFE_PATH); search_default_file_with_ext(args, alloc, "", "", tmp, group, recursion_level + 1); } } my_dirend(search_dir); } else if ((!strncmp(ptr, include_keyword, sizeof(include_keyword) - 1)) && my_isspace(&my_charset_latin1, ptr[sizeof(include_keyword) - 1])) { /* skip over `include' and following whitespace */ for (ptr+= sizeof(include_keyword) - 1; my_isspace(&my_charset_latin1, ptr[0]); ptr++) {} /* trim trailing whitespace from filename */ end= ptr + strlen(ptr) - 1; for (; my_isspace(&my_charset_latin1, *(end - 1)) ; end--) {} end[0]= 0; if (end == ptr) { fprintf(stderr, "error: Wrong !include directive in config " "file: %s at line %d\n", name,line); goto err; } search_default_file_with_ext(args, alloc, "", "", ptr, group, recursion_level + 1); } continue; } if (*ptr == '[') /* Group name */ { found_group=1; Loading strings/ctype-utf8.c +166 −0 Original line number Diff line number Diff line Loading @@ -2380,6 +2380,172 @@ CHARSET_INFO my_charset_utf8_bin= &my_collation_mb_bin_handler }; #ifdef HAVE_CYBOZU_COLLATION /* * These functions bacically do the same as their original, except * that they return 0 only when two comparing unicode strings are * strictly the same in case-sensitive way. See "save_diff" local * variable to what they actually do. */ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, const uchar *s, uint slen, const uchar *t, uint tlen, my_bool t_is_prefix) { int s_res,t_res; my_wc_t s_wc,t_wc; const uchar *se=s+slen; const uchar *te=t+tlen; int save_diff = 0; int diff; while ( s < se && t < te ) { int plane; s_res=my_utf8_uni(cs,&s_wc, s, se); t_res=my_utf8_uni(cs,&t_wc, t, te); if ( s_res <= 0 || t_res <= 0 ) { /* Incorrect string, compare by char value */ return ((int)s[0]-(int)t[0]); } if ( save_diff == 0 ) { save_diff = ((int)s_wc) - ((int)t_wc); } plane=(s_wc>>8) & 0xFF; s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; plane=(t_wc>>8) & 0xFF; t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; if ( s_wc != t_wc ) { return ((int) s_wc) - ((int) t_wc); } s+=s_res; t+=t_res; } diff = ( (se-s) - (te-t) ); return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff); } static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, const uchar *s, uint slen, const uchar *t, uint tlen) { int s_res,t_res; my_wc_t s_wc,t_wc; const uchar *se= s+slen; const uchar *te= t+tlen; int save_diff = 0; while ( s < se && t < te ) { int plane; s_res=my_utf8_uni(cs,&s_wc, s, se); t_res=my_utf8_uni(cs,&t_wc, t, te); if ( s_res <= 0 || t_res <= 0 ) { /* Incorrect string, compare by char value */ return ((int)s[0]-(int)t[0]); } if ( save_diff == 0 ) { save_diff = ((int)s_wc) - ((int)t_wc); } plane=(s_wc>>8) & 0xFF; s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; plane=(t_wc>>8) & 0xFF; t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; if ( s_wc != t_wc ) { return ((int) s_wc) - ((int) t_wc); } s+=s_res; t+=t_res; } slen= se-s; tlen= te-t; if (slen != tlen) { int swap= 0; if (slen < tlen) { slen= tlen; s= t; se= te; swap= -1; } /* This following loop uses the fact that in UTF-8 all multibyte characters are greater than space, and all multibyte head characters are greater than space. It means if we meet a character greater than space, it always means that the longer string is greater. So we can reuse the same loop from the 8bit version, without having to process full multibute sequences. */ for ( ; s < se; s++) { if (*s != ' ') return ((int)*s - (int) ' ') ^ swap; } } return save_diff; } static MY_COLLATION_HANDLER my_collation_cs_handler = { NULL, /* init */ my_strnncoll_utf8_cs, my_strnncollsp_utf8_cs, my_strnxfrm_utf8, my_like_range_simple, my_wildcmp_mb, my_strcasecmp_utf8, my_instr_mb, my_hash_sort_utf8 }; CHARSET_INFO my_charset_utf8_general_cs= { 254,0,0, /* number */ MY_CS_COMPILED|MY_CS_UNICODE, /* state */ "utf8", /* cs name */ "utf8_general_cs", /* name */ "", /* comment */ NULL, /* tailoring */ ctype_utf8, /* ctype */ to_lower_utf8, /* to_lower */ to_upper_utf8, /* to_upper */ to_upper_utf8, /* sort_order */ NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ 255, /* max_sort_char */ &my_charset_utf8_handler, &my_collation_cs_handler }; #endif /* Cybozu Hack */ #ifdef MY_TEST_UTF8 #include <stdio.h> Loading Loading
mysys/charset-def.c +6 −0 Original line number Diff line number Diff line Loading @@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci; extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci; extern CHARSET_INFO my_charset_utf8_roman_uca_ci; extern CHARSET_INFO my_charset_utf8_persian_uca_ci; #ifdef HAVE_CYBOZU_COLLATION extern CHARSET_INFO my_charset_utf8_general_cs; #endif #endif #endif /* HAVE_UCA_COLLATIONS */ Loading Loading @@ -146,6 +149,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) #ifdef HAVE_CHARSET_utf8 add_compiled_collation(&my_charset_utf8_general_ci); add_compiled_collation(&my_charset_utf8_bin); #ifdef HAVE_CYBOZU_COLLATION add_compiled_collation(&my_charset_utf8_general_cs); #endif #ifdef HAVE_UCA_COLLATIONS add_compiled_collation(&my_charset_utf8_general_uca_ci); add_compiled_collation(&my_charset_utf8_icelandic_uca_ci); Loading
mysys/default.c +121 −14 Original line number Diff line number Diff line Loading @@ -73,7 +73,7 @@ static int search_default_file(DYNAMIC_ARRAY *args,MEM_ROOT *alloc, static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, const char *dir, const char *ext, const char *config_file, TYPELIB *group); TYPELIB *group, int recursion_level); static char *remove_end_comment(char *ptr); Loading Loading @@ -194,7 +194,7 @@ int load_defaults(const char *conf_file, const char **groups, { if ((error= search_default_file_with_ext(&args, &alloc, "", "", forced_default_file, &group)) < 0) &group, 0)) < 0) goto err; if (error > 0) { Loading Loading @@ -311,7 +311,7 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, { int error; if ((error= search_default_file_with_ext(args, alloc, dir, *ext, config_file, group)) < 0) config_file, group, 0)) < 0) return error; } return 0; Loading @@ -326,9 +326,11 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, args Store pointer to found options here alloc Allocate strings in this object dir directory to read config_file Name of configuration file ext Extension for configuration file config_file Name of configuration file group groups to read recursion_level the level of recursion, got while processing "!include" or "!includedir" RETURN 0 Success Loading @@ -340,12 +342,18 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, const char *dir, const char *ext, const char *config_file, TYPELIB *group) TYPELIB *group, int recursion_level) { char name[FN_REFLEN+10],buff[4096],*ptr,*end,*value,*tmp; char name[FN_REFLEN + 10], buff[4096], *ptr, *end, *value, *tmp, **tmp_ext; static const char includedir_keyword[]= "includedir"; static const char include_keyword[]= "include"; const int max_recursion_level= 10; FILE *fp; uint line= 0; my_bool read_values= 0, found_group= 0; uint i; MY_DIR *search_dir; FILEINFO *search_file; if ((dir ? strlen(dir) : 0 )+strlen(config_file) >= FN_REFLEN-3) return 0; /* Ignore wrong paths */ Loading Loading @@ -374,7 +382,7 @@ static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, if ((stat_info.st_mode & S_IWOTH) && (stat_info.st_mode & S_IFMT) == S_IFREG) { fprintf(stderr, "warning: World-writeable config file %s is ignored\n", fprintf(stderr, "warning: World-writable config file %s is ignored\n", name); return 0; } Loading @@ -387,9 +395,108 @@ static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc, { line++; /* Ignore comment and empty lines */ for (ptr=buff ; my_isspace(&my_charset_latin1,*ptr) ; ptr++ ) ; for (ptr= buff; my_isspace(&my_charset_latin1, *ptr); ptr++) {} if (*ptr == '#' || *ptr == ';' || !*ptr) continue; /* Configuration File Directives */ if ((*ptr == '!') && (recursion_level < max_recursion_level)) { /* skip over `!' and following whitespace */ for (++ptr; my_isspace(&my_charset_latin1, ptr[0]); ptr++) {} if ((!strncmp(ptr, includedir_keyword, sizeof(includedir_keyword) - 1)) && my_isspace(&my_charset_latin1, ptr[sizeof(includedir_keyword) - 1])) { /* skip over "includedir" and following whitespace */ for (ptr+= sizeof(includedir_keyword) - 1; my_isspace(&my_charset_latin1, ptr[0]); ptr++) {} /* trim trailing whitespace from directory name */ end= ptr + strlen(ptr) - 1; /* This would work fine even if no whitespaces are met since fgets() stores the newline character in the buffer */ for (; my_isspace(&my_charset_latin1, *(end - 1)); end--) {} end[0]= 0; /* print error msg if there is nothing after !inludedir directive */ if (end == ptr) { fprintf(stderr, "error: Wrong !includedir directive in config " "file: %s at line %d\n", name,line); goto err; } if (!(search_dir= my_dir(ptr, MYF(MY_WME)))) goto err; for (i= 0; i < (uint) search_dir->number_off_files; i++) { search_file= search_dir->dir_entry + i; ext= fn_ext(search_file->name); /* check extenstion */ for (tmp_ext= (char**) f_extensions; *tmp_ext; *tmp_ext++) { if (!strcmp(ext, *tmp_ext)) break; } if (*tmp_ext) { if (!(tmp= alloc_root(alloc, 2 + strlen(search_file->name) + strlen(ptr)))) goto err; fn_format(tmp, search_file->name, ptr, "", MY_UNPACK_FILENAME | MY_SAFE_PATH); search_default_file_with_ext(args, alloc, "", "", tmp, group, recursion_level + 1); } } my_dirend(search_dir); } else if ((!strncmp(ptr, include_keyword, sizeof(include_keyword) - 1)) && my_isspace(&my_charset_latin1, ptr[sizeof(include_keyword) - 1])) { /* skip over `include' and following whitespace */ for (ptr+= sizeof(include_keyword) - 1; my_isspace(&my_charset_latin1, ptr[0]); ptr++) {} /* trim trailing whitespace from filename */ end= ptr + strlen(ptr) - 1; for (; my_isspace(&my_charset_latin1, *(end - 1)) ; end--) {} end[0]= 0; if (end == ptr) { fprintf(stderr, "error: Wrong !include directive in config " "file: %s at line %d\n", name,line); goto err; } search_default_file_with_ext(args, alloc, "", "", ptr, group, recursion_level + 1); } continue; } if (*ptr == '[') /* Group name */ { found_group=1; Loading
strings/ctype-utf8.c +166 −0 Original line number Diff line number Diff line Loading @@ -2380,6 +2380,172 @@ CHARSET_INFO my_charset_utf8_bin= &my_collation_mb_bin_handler }; #ifdef HAVE_CYBOZU_COLLATION /* * These functions bacically do the same as their original, except * that they return 0 only when two comparing unicode strings are * strictly the same in case-sensitive way. See "save_diff" local * variable to what they actually do. */ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, const uchar *s, uint slen, const uchar *t, uint tlen, my_bool t_is_prefix) { int s_res,t_res; my_wc_t s_wc,t_wc; const uchar *se=s+slen; const uchar *te=t+tlen; int save_diff = 0; int diff; while ( s < se && t < te ) { int plane; s_res=my_utf8_uni(cs,&s_wc, s, se); t_res=my_utf8_uni(cs,&t_wc, t, te); if ( s_res <= 0 || t_res <= 0 ) { /* Incorrect string, compare by char value */ return ((int)s[0]-(int)t[0]); } if ( save_diff == 0 ) { save_diff = ((int)s_wc) - ((int)t_wc); } plane=(s_wc>>8) & 0xFF; s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; plane=(t_wc>>8) & 0xFF; t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; if ( s_wc != t_wc ) { return ((int) s_wc) - ((int) t_wc); } s+=s_res; t+=t_res; } diff = ( (se-s) - (te-t) ); return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff); } static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, const uchar *s, uint slen, const uchar *t, uint tlen) { int s_res,t_res; my_wc_t s_wc,t_wc; const uchar *se= s+slen; const uchar *te= t+tlen; int save_diff = 0; while ( s < se && t < te ) { int plane; s_res=my_utf8_uni(cs,&s_wc, s, se); t_res=my_utf8_uni(cs,&t_wc, t, te); if ( s_res <= 0 || t_res <= 0 ) { /* Incorrect string, compare by char value */ return ((int)s[0]-(int)t[0]); } if ( save_diff == 0 ) { save_diff = ((int)s_wc) - ((int)t_wc); } plane=(s_wc>>8) & 0xFF; s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; plane=(t_wc>>8) & 0xFF; t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; if ( s_wc != t_wc ) { return ((int) s_wc) - ((int) t_wc); } s+=s_res; t+=t_res; } slen= se-s; tlen= te-t; if (slen != tlen) { int swap= 0; if (slen < tlen) { slen= tlen; s= t; se= te; swap= -1; } /* This following loop uses the fact that in UTF-8 all multibyte characters are greater than space, and all multibyte head characters are greater than space. It means if we meet a character greater than space, it always means that the longer string is greater. So we can reuse the same loop from the 8bit version, without having to process full multibute sequences. */ for ( ; s < se; s++) { if (*s != ' ') return ((int)*s - (int) ' ') ^ swap; } } return save_diff; } static MY_COLLATION_HANDLER my_collation_cs_handler = { NULL, /* init */ my_strnncoll_utf8_cs, my_strnncollsp_utf8_cs, my_strnxfrm_utf8, my_like_range_simple, my_wildcmp_mb, my_strcasecmp_utf8, my_instr_mb, my_hash_sort_utf8 }; CHARSET_INFO my_charset_utf8_general_cs= { 254,0,0, /* number */ MY_CS_COMPILED|MY_CS_UNICODE, /* state */ "utf8", /* cs name */ "utf8_general_cs", /* name */ "", /* comment */ NULL, /* tailoring */ ctype_utf8, /* ctype */ to_lower_utf8, /* to_lower */ to_upper_utf8, /* to_upper */ to_upper_utf8, /* sort_order */ NULL, /* contractions */ NULL, /* sort_order_big*/ NULL, /* tab_to_uni */ NULL, /* tab_from_uni */ NULL, /* state_map */ NULL, /* ident_map */ 1, /* strxfrm_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 0, /* min_sort_char */ 255, /* max_sort_char */ &my_charset_utf8_handler, &my_collation_cs_handler }; #endif /* Cybozu Hack */ #ifdef MY_TEST_UTF8 #include <stdio.h> Loading