Loading mysql-test/r/ctype_uca.result +30 −0 Original line number Diff line number Diff line DROP TABLE IF EXISTS t1; set names utf8; set collation_connection=utf8_unicode_ci; select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; 'a' = 'a' 'a' = 'a ' 'a ' = 'a' 1 1 1 select 'a\t' = 'a' , 'a\t' < 'a' , 'a\t' > 'a'; 'a\t' = 'a' 'a\t' < 'a' 'a\t' > 'a' 0 1 0 select 'a\t' = 'a ', 'a\t' < 'a ', 'a\t' > 'a '; 'a\t' = 'a ' 'a\t' < 'a ' 'a\t' > 'a ' 0 1 0 select 'a' = 'a\t', 'a' < 'a\t', 'a' > 'a\t'; 'a' = 'a\t' 'a' < 'a\t' 'a' > 'a\t' 0 0 1 select 'a ' = 'a\t', 'a ' < 'a\t', 'a ' > 'a\t'; 'a ' = 'a\t' 'a ' < 'a\t' 'a ' > 'a\t' 0 0 1 select 'a a' > 'a', 'a \t' < 'a'; 'a a' > 'a' 'a \t' < 'a' 1 1 CREATE TABLE t ( c char(20) NOT NULL ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; INSERT INTO t VALUES ('a'),('ab'),('aba'); ALTER TABLE t ADD INDEX (c); SELECT c FROM t WHERE c LIKE 'a%'; c a ab aba DROP TABLE t; create table t1 (c1 char(10) character set utf8 collate utf8_bin); insert into t1 values ('A'),('a'); insert into t1 values ('B'),('b'); Loading mysql-test/t/ctype_uca.test +28 −1 Original line number Diff line number Diff line Loading @@ -7,8 +7,35 @@ DROP TABLE IF EXISTS t1; # # Test Unicode collations. # set names utf8; # # Check trailing spaces # set collation_connection=utf8_unicode_ci; select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; select 'a\t' = 'a' , 'a\t' < 'a' , 'a\t' > 'a'; select 'a\t' = 'a ', 'a\t' < 'a ', 'a\t' > 'a '; select 'a' = 'a\t', 'a' < 'a\t', 'a' > 'a\t'; select 'a ' = 'a\t', 'a ' < 'a\t', 'a ' > 'a\t'; select 'a a' > 'a', 'a \t' < 'a'; # # Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters # CREATE TABLE t ( c char(20) NOT NULL ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; INSERT INTO t VALUES ('a'),('ab'),('aba'); ALTER TABLE t ADD INDEX (c); SELECT c FROM t WHERE c LIKE 'a%'; #should find 3 rows but only found 2 DROP TABLE t; create table t1 (c1 char(10) character set utf8 collate utf8_bin); # Loading strings/CHARSET_INFO.txt +11 −2 Original line number Diff line number Diff line Loading @@ -74,7 +74,16 @@ Conversion tables ctype - pointer to array[257] of "type of characters" bit mask for each chatacter, e.g. if a character is a digit or a letter or a separator, etc. to_lower - pointer to arrat[256] used in LCASE() Monty 2004-10-21: If you look at the macros, we use ctype[(char)+1]. ctype[0] is traditionally in most ctype libraries reserved for EOF (-1). The idea is that you can use the result from fgetc() directly with ctype[]. As we have to be compatible with external ctype[] versions, it's better to do it the same way as they do... to_lower - pointer to array[256] used in LCASE() to_upper - pointer to array[256] used in UCASE() sort_order - pointer to array[256] used for strings comparison Loading Loading @@ -137,7 +146,7 @@ following set of functions: Multibyte routines ------------------ ismbchar() - detects if the given string is a multibyte sequence mbcharlen() - retuturns length of multibyte sequence starting with mbcharlen() - returns length of multibyte sequence starting with the given character numchars() - returns number of characters in the given string, e.g. in SQL function CHAR_LENGTH(). Loading strings/ctype-uca.c +53 −3 Original line number Diff line number Diff line Loading @@ -7053,6 +7053,28 @@ static int my_strnncoll_uca(CHARSET_INFO *cs, Works exactly the same with my_strnncoll_uca(), but ignores trailing spaces. In the while() comparison these situations are possible: 1. (s_res>0) and (t_res>0) and (s_res == t_res) Weights are the same so far, continue comparison 2. (s_res>0) and (t_res>0) and (s_res!=t_res) A difference has been found, return. 3. (s_res>0) and (t_res<0) We have reached the end of the second string, or found an illegal multibyte sequence in the second string. Compare the first string to an infinite array of space characters until difference is found, or until the end of the first string. 4. (s_res<0) and (t_res>0) We have reached the end of the first string, or found an illegal multibyte sequence in the first string. Compare the second string to an infinite array of space characters until difference is found or until the end of the second steing. 5. (s_res<0) and (t_res<0) Both scanners returned -1. It means we have riched the end-of-string of illegal-sequence in both strings at the same time. Return 0, strings are equal. RETURN Difference between two strings, according to the collation: 0 - means strings are equal Loading @@ -7070,9 +7092,6 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, int s_res; int t_res; slen= cs->cset->lengthsp(cs, (char*) s, slen); tlen= cs->cset->lengthsp(cs, (char*) t, tlen); scanner_handler->init(&sscanner, cs, s, slen); scanner_handler->init(&tscanner, cs, t, tlen); Loading @@ -7080,6 +7099,37 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, { s_res= scanner_handler->next(&sscanner); t_res= scanner_handler->next(&tscanner); if (s_res > 0 && t_res < 0) { /* Calculate weight for SPACE character */ t_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]]; /* compare the first string to spaces */ do { if (s_res != t_res) return (s_res - t_res); s_res= scanner_handler->next(&sscanner); } while (s_res > 0); return 0; } if (s_res < 0 && t_res > 0) { /* Calculate weight for SPACE character */ s_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]]; /* compare the second string to spaces */ do { if (s_res != t_res) return (s_res - t_res); t_res= scanner_handler->next(&tscanner); } while (t_res > 0); return 0; } } while ( s_res == t_res && s_res >0); return ( s_res - t_res ); Loading Loading
mysql-test/r/ctype_uca.result +30 −0 Original line number Diff line number Diff line DROP TABLE IF EXISTS t1; set names utf8; set collation_connection=utf8_unicode_ci; select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; 'a' = 'a' 'a' = 'a ' 'a ' = 'a' 1 1 1 select 'a\t' = 'a' , 'a\t' < 'a' , 'a\t' > 'a'; 'a\t' = 'a' 'a\t' < 'a' 'a\t' > 'a' 0 1 0 select 'a\t' = 'a ', 'a\t' < 'a ', 'a\t' > 'a '; 'a\t' = 'a ' 'a\t' < 'a ' 'a\t' > 'a ' 0 1 0 select 'a' = 'a\t', 'a' < 'a\t', 'a' > 'a\t'; 'a' = 'a\t' 'a' < 'a\t' 'a' > 'a\t' 0 0 1 select 'a ' = 'a\t', 'a ' < 'a\t', 'a ' > 'a\t'; 'a ' = 'a\t' 'a ' < 'a\t' 'a ' > 'a\t' 0 0 1 select 'a a' > 'a', 'a \t' < 'a'; 'a a' > 'a' 'a \t' < 'a' 1 1 CREATE TABLE t ( c char(20) NOT NULL ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; INSERT INTO t VALUES ('a'),('ab'),('aba'); ALTER TABLE t ADD INDEX (c); SELECT c FROM t WHERE c LIKE 'a%'; c a ab aba DROP TABLE t; create table t1 (c1 char(10) character set utf8 collate utf8_bin); insert into t1 values ('A'),('a'); insert into t1 values ('B'),('b'); Loading
mysql-test/t/ctype_uca.test +28 −1 Original line number Diff line number Diff line Loading @@ -7,8 +7,35 @@ DROP TABLE IF EXISTS t1; # # Test Unicode collations. # set names utf8; # # Check trailing spaces # set collation_connection=utf8_unicode_ci; select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; select 'a\t' = 'a' , 'a\t' < 'a' , 'a\t' > 'a'; select 'a\t' = 'a ', 'a\t' < 'a ', 'a\t' > 'a '; select 'a' = 'a\t', 'a' < 'a\t', 'a' > 'a\t'; select 'a ' = 'a\t', 'a ' < 'a\t', 'a ' > 'a\t'; select 'a a' > 'a', 'a \t' < 'a'; # # Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters # CREATE TABLE t ( c char(20) NOT NULL ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; INSERT INTO t VALUES ('a'),('ab'),('aba'); ALTER TABLE t ADD INDEX (c); SELECT c FROM t WHERE c LIKE 'a%'; #should find 3 rows but only found 2 DROP TABLE t; create table t1 (c1 char(10) character set utf8 collate utf8_bin); # Loading
strings/CHARSET_INFO.txt +11 −2 Original line number Diff line number Diff line Loading @@ -74,7 +74,16 @@ Conversion tables ctype - pointer to array[257] of "type of characters" bit mask for each chatacter, e.g. if a character is a digit or a letter or a separator, etc. to_lower - pointer to arrat[256] used in LCASE() Monty 2004-10-21: If you look at the macros, we use ctype[(char)+1]. ctype[0] is traditionally in most ctype libraries reserved for EOF (-1). The idea is that you can use the result from fgetc() directly with ctype[]. As we have to be compatible with external ctype[] versions, it's better to do it the same way as they do... to_lower - pointer to array[256] used in LCASE() to_upper - pointer to array[256] used in UCASE() sort_order - pointer to array[256] used for strings comparison Loading Loading @@ -137,7 +146,7 @@ following set of functions: Multibyte routines ------------------ ismbchar() - detects if the given string is a multibyte sequence mbcharlen() - retuturns length of multibyte sequence starting with mbcharlen() - returns length of multibyte sequence starting with the given character numchars() - returns number of characters in the given string, e.g. in SQL function CHAR_LENGTH(). Loading
strings/ctype-uca.c +53 −3 Original line number Diff line number Diff line Loading @@ -7053,6 +7053,28 @@ static int my_strnncoll_uca(CHARSET_INFO *cs, Works exactly the same with my_strnncoll_uca(), but ignores trailing spaces. In the while() comparison these situations are possible: 1. (s_res>0) and (t_res>0) and (s_res == t_res) Weights are the same so far, continue comparison 2. (s_res>0) and (t_res>0) and (s_res!=t_res) A difference has been found, return. 3. (s_res>0) and (t_res<0) We have reached the end of the second string, or found an illegal multibyte sequence in the second string. Compare the first string to an infinite array of space characters until difference is found, or until the end of the first string. 4. (s_res<0) and (t_res>0) We have reached the end of the first string, or found an illegal multibyte sequence in the first string. Compare the second string to an infinite array of space characters until difference is found or until the end of the second steing. 5. (s_res<0) and (t_res<0) Both scanners returned -1. It means we have riched the end-of-string of illegal-sequence in both strings at the same time. Return 0, strings are equal. RETURN Difference between two strings, according to the collation: 0 - means strings are equal Loading @@ -7070,9 +7092,6 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, int s_res; int t_res; slen= cs->cset->lengthsp(cs, (char*) s, slen); tlen= cs->cset->lengthsp(cs, (char*) t, tlen); scanner_handler->init(&sscanner, cs, s, slen); scanner_handler->init(&tscanner, cs, t, tlen); Loading @@ -7080,6 +7099,37 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, { s_res= scanner_handler->next(&sscanner); t_res= scanner_handler->next(&tscanner); if (s_res > 0 && t_res < 0) { /* Calculate weight for SPACE character */ t_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]]; /* compare the first string to spaces */ do { if (s_res != t_res) return (s_res - t_res); s_res= scanner_handler->next(&sscanner); } while (s_res > 0); return 0; } if (s_res < 0 && t_res > 0) { /* Calculate weight for SPACE character */ s_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]]; /* compare the second string to spaces */ do { if (s_res != t_res) return (s_res - t_res); t_res= scanner_handler->next(&tscanner); } while (t_res > 0); return 0; } } while ( s_res == t_res && s_res >0); return ( s_res - t_res ); Loading