Loading mysql-test/r/ctype_big5.result +13 −0 Original line number Diff line number Diff line Loading @@ -192,3 +192,16 @@ drop table t1; select hex(convert(_big5 0xC84041 using ucs2)); hex(convert(_big5 0xC84041 using ucs2)) 003F0041 End of 4.1 tests create table t1 (a blob); insert into t1 values (0xEE00); delete from t1; select hex(load_file('test/t1.txt')); hex(load_file('test/t1.txt')) 5CEE5C300A load data infile 't1.txt' into table t1; select hex(a) from t1; hex(a) EE00 drop table t1; End of 5.0 tests mysql-test/t/ctype_big5.test +17 −1 Original line number Diff line number Diff line Loading @@ -63,4 +63,20 @@ drop table t1; # select hex(convert(_big5 0xC84041 using ucs2)); # End of 4.1 tests --echo End of 4.1 tests # # Bug#26711 "binary content 0x00 sometimes becomes 0x5C 0x00 after dump/load" # create table t1 (a blob); insert into t1 values (0xEE00); --exec $MYSQL_DUMP --default-character-set=big5 -T $MYSQLTEST_VARDIR/master-data/test test t1 delete from t1; select hex(load_file('test/t1.txt')); load data infile 't1.txt' into table t1; select hex(a) from t1; --exec rm $MYSQLTEST_VARDIR/master-data/test/t1.txt --exec rm $MYSQLTEST_VARDIR/master-data/test/t1.sql drop table t1; --echo End of 5.0 tests sql/sql_class.cc +53 −6 Original line number Diff line number Diff line Loading @@ -1221,6 +1221,11 @@ select_export::prepare(List<Item> &list, SELECT_LEX_UNIT *u) } #define NEED_ESCAPING(x) ((int) (uchar) (x) == escape_char || \ (int) (uchar) (x) == field_sep_char || \ (int) (uchar) (x) == line_sep_char || \ !(x)) bool select_export::send_data(List<Item> &items) { Loading Loading @@ -1281,13 +1286,19 @@ bool select_export::send_data(List<Item> &items) if (result_type == STRING_RESULT && escape_char != -1) { char *pos, *start, *end; CHARSET_INFO *res_charset= res->charset(); CHARSET_INFO *character_set_client= thd->variables. character_set_client; bool check_second_byte= (res_charset == &my_charset_bin) && character_set_client-> escape_with_backslash_is_dangerous; DBUG_ASSERT(character_set_client->mbmaxlen == 2 || !character_set_client->escape_with_backslash_is_dangerous); for (start=pos=(char*) res->ptr(),end=pos+used_length ; pos != end ; pos++) { #ifdef USE_MB CHARSET_INFO *res_charset=res->charset(); if (use_mb(res_charset)) { int l; Loading @@ -1298,8 +1309,44 @@ bool select_export::send_data(List<Item> &items) } } #endif if ((int) *pos == escape_char || (int) *pos == field_sep_char || (int) *pos == line_sep_char || !*pos) /* Special case when dumping BINARY/VARBINARY/BLOB values for the clients with character sets big5, cp932, gbk and sjis, which can have the escape character (0x5C "\" by default) as the second byte of a multi-byte sequence. If - pos[0] is a valid multi-byte head (e.g 0xEE) and - pos[1] is 0x00, which will be escaped as "\0", then we'll get "0xEE + 0x5C + 0x30" in the output file. If this file is later loaded using this sequence of commands: mysql> create table t1 (a varchar(128)) character set big5; mysql> LOAD DATA INFILE 'dump.txt' INTO TABLE t1; then 0x5C will be misinterpreted as the second byte of a multi-byte character "0xEE + 0x5C", instead of escape character for 0x00. To avoid this confusion, we'll escape the multi-byte head character too, so the sequence "0xEE + 0x00" will be dumped as "0x5C + 0xEE + 0x5C + 0x30". Note, in the condition below we only check if mbcharlen is equal to 2, because there are no character sets with mbmaxlen longer than 2 and with escape_with_backslash_is_dangerous set. DBUG_ASSERT before the loop makes that sure. */ if (NEED_ESCAPING(*pos) || (check_second_byte && my_mbcharlen(character_set_client, (uchar) *pos) == 2 && pos + 1 < end && NEED_ESCAPING(pos[1]))) { char tmp_buff[2]; tmp_buff[0]= escape_char; Loading strings/ctype-big5.c +2 −2 Original line number Diff line number Diff line Loading @@ -6400,7 +6400,7 @@ CHARSET_INFO my_charset_big5_chinese_ci= 0, /* min_sort_char */ 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_big5_handler, &my_collation_big5_chinese_ci_handler }; Loading Loading @@ -6433,7 +6433,7 @@ CHARSET_INFO my_charset_big5_bin= 0, /* min_sort_char */ 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_big5_handler, &my_collation_mb_bin_handler }; Loading strings/ctype-gbk.c +2 −2 Original line number Diff line number Diff line Loading @@ -10046,7 +10046,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci= 0, /* min_sort_char */ 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, &my_collation_ci_handler }; Loading Loading @@ -10078,7 +10078,7 @@ CHARSET_INFO my_charset_gbk_bin= 0, /* min_sort_char */ 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, &my_collation_mb_bin_handler }; Loading
mysql-test/r/ctype_big5.result +13 −0 Original line number Diff line number Diff line Loading @@ -192,3 +192,16 @@ drop table t1; select hex(convert(_big5 0xC84041 using ucs2)); hex(convert(_big5 0xC84041 using ucs2)) 003F0041 End of 4.1 tests create table t1 (a blob); insert into t1 values (0xEE00); delete from t1; select hex(load_file('test/t1.txt')); hex(load_file('test/t1.txt')) 5CEE5C300A load data infile 't1.txt' into table t1; select hex(a) from t1; hex(a) EE00 drop table t1; End of 5.0 tests
mysql-test/t/ctype_big5.test +17 −1 Original line number Diff line number Diff line Loading @@ -63,4 +63,20 @@ drop table t1; # select hex(convert(_big5 0xC84041 using ucs2)); # End of 4.1 tests --echo End of 4.1 tests # # Bug#26711 "binary content 0x00 sometimes becomes 0x5C 0x00 after dump/load" # create table t1 (a blob); insert into t1 values (0xEE00); --exec $MYSQL_DUMP --default-character-set=big5 -T $MYSQLTEST_VARDIR/master-data/test test t1 delete from t1; select hex(load_file('test/t1.txt')); load data infile 't1.txt' into table t1; select hex(a) from t1; --exec rm $MYSQLTEST_VARDIR/master-data/test/t1.txt --exec rm $MYSQLTEST_VARDIR/master-data/test/t1.sql drop table t1; --echo End of 5.0 tests
sql/sql_class.cc +53 −6 Original line number Diff line number Diff line Loading @@ -1221,6 +1221,11 @@ select_export::prepare(List<Item> &list, SELECT_LEX_UNIT *u) } #define NEED_ESCAPING(x) ((int) (uchar) (x) == escape_char || \ (int) (uchar) (x) == field_sep_char || \ (int) (uchar) (x) == line_sep_char || \ !(x)) bool select_export::send_data(List<Item> &items) { Loading Loading @@ -1281,13 +1286,19 @@ bool select_export::send_data(List<Item> &items) if (result_type == STRING_RESULT && escape_char != -1) { char *pos, *start, *end; CHARSET_INFO *res_charset= res->charset(); CHARSET_INFO *character_set_client= thd->variables. character_set_client; bool check_second_byte= (res_charset == &my_charset_bin) && character_set_client-> escape_with_backslash_is_dangerous; DBUG_ASSERT(character_set_client->mbmaxlen == 2 || !character_set_client->escape_with_backslash_is_dangerous); for (start=pos=(char*) res->ptr(),end=pos+used_length ; pos != end ; pos++) { #ifdef USE_MB CHARSET_INFO *res_charset=res->charset(); if (use_mb(res_charset)) { int l; Loading @@ -1298,8 +1309,44 @@ bool select_export::send_data(List<Item> &items) } } #endif if ((int) *pos == escape_char || (int) *pos == field_sep_char || (int) *pos == line_sep_char || !*pos) /* Special case when dumping BINARY/VARBINARY/BLOB values for the clients with character sets big5, cp932, gbk and sjis, which can have the escape character (0x5C "\" by default) as the second byte of a multi-byte sequence. If - pos[0] is a valid multi-byte head (e.g 0xEE) and - pos[1] is 0x00, which will be escaped as "\0", then we'll get "0xEE + 0x5C + 0x30" in the output file. If this file is later loaded using this sequence of commands: mysql> create table t1 (a varchar(128)) character set big5; mysql> LOAD DATA INFILE 'dump.txt' INTO TABLE t1; then 0x5C will be misinterpreted as the second byte of a multi-byte character "0xEE + 0x5C", instead of escape character for 0x00. To avoid this confusion, we'll escape the multi-byte head character too, so the sequence "0xEE + 0x00" will be dumped as "0x5C + 0xEE + 0x5C + 0x30". Note, in the condition below we only check if mbcharlen is equal to 2, because there are no character sets with mbmaxlen longer than 2 and with escape_with_backslash_is_dangerous set. DBUG_ASSERT before the loop makes that sure. */ if (NEED_ESCAPING(*pos) || (check_second_byte && my_mbcharlen(character_set_client, (uchar) *pos) == 2 && pos + 1 < end && NEED_ESCAPING(pos[1]))) { char tmp_buff[2]; tmp_buff[0]= escape_char; Loading
strings/ctype-big5.c +2 −2 Original line number Diff line number Diff line Loading @@ -6400,7 +6400,7 @@ CHARSET_INFO my_charset_big5_chinese_ci= 0, /* min_sort_char */ 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_big5_handler, &my_collation_big5_chinese_ci_handler }; Loading Loading @@ -6433,7 +6433,7 @@ CHARSET_INFO my_charset_big5_bin= 0, /* min_sort_char */ 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_big5_handler, &my_collation_mb_bin_handler }; Loading
strings/ctype-gbk.c +2 −2 Original line number Diff line number Diff line Loading @@ -10046,7 +10046,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci= 0, /* min_sort_char */ 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, &my_collation_ci_handler }; Loading Loading @@ -10078,7 +10078,7 @@ CHARSET_INFO my_charset_gbk_bin= 0, /* min_sort_char */ 255, /* max_sort_char */ ' ', /* pad char */ 0, /* escape_with_backslash_is_dangerous */ 1, /* escape_with_backslash_is_dangerous */ &my_charset_handler, &my_collation_mb_bin_handler };