Commit bf45b6ba authored by unknown's avatar unknown
Browse files

ctype-gbk.c:

  Bug #11987
  mysql will truncate the text when the text contain GBK char:"0xA3A0" and "0xA1"
  Allow to store and retrieve even unassigned GBK codes.
  Like we did in Big5 earlier.
have_gbk.inc, have_gbk.require, ctype_gbk.result, ctype_gbk.test:
  new file


strings/ctype-gbk.c:
  Bug #11987
  mysql will truncate the text when the text contain GBK char:"0xA3A0" and "0xA1"
  Allow to store and retrieve even unassigned GBK codes.
  Like we did in Big5 earlier.
parent a68a2da0
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
-- require r/have_gbk.require
disable_query_log;
show collation like "gbk_chinese_ci";
enable_query_log;
+131 −0
Original line number Diff line number Diff line
drop table if exists t1;
SET @test_character_set= 'gbk';
SET @test_collation= 'gbk_chinese_ci';
SET @safe_character_set_server= @@character_set_server;
SET @safe_collation_server= @@collation_server;
SET character_set_server= @test_character_set;
SET collation_server= @test_collation;
CREATE DATABASE d1;
USE d1;
CREATE TABLE t1 (c CHAR(10), KEY(c));
SHOW FULL COLUMNS FROM t1;
Field	Type	Collation	Null	Key	Default	Extra	Privileges	Comment
c	char(10)	gbk_chinese_ci	YES	MUL	NULL			
INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa');
SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%';
want3results
aaa
aaaa
aaaaa
DROP TABLE t1;
CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2)));
SHOW FULL COLUMNS FROM t1;
Field	Type	Collation	Null	Key	Default	Extra	Privileges	Comment
c1	varchar(15)	gbk_chinese_ci	YES	MUL	NULL			
INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab');
SELECT c1 as want3results from t1 where c1 like 'l%';
want3results
location
loberge
lotre
SELECT c1 as want3results from t1 where c1 like 'lo%';
want3results
location
loberge
lotre
SELECT c1 as want1result  from t1 where c1 like 'loc%';
want1result
location
SELECT c1 as want1result  from t1 where c1 like 'loca%';
want1result
location
SELECT c1 as want1result  from t1 where c1 like 'locat%';
want1result
location
SELECT c1 as want1result  from t1 where c1 like 'locati%';
want1result
location
SELECT c1 as want1result  from t1 where c1 like 'locatio%';
want1result
location
SELECT c1 as want1result  from t1 where c1 like 'location%';
want1result
location
DROP TABLE t1;
DROP DATABASE d1;
USE test;
SET character_set_server= @safe_character_set_server;
SET collation_server= @safe_collation_server;
SET NAMES gbk;
SET collation_connection='gbk_chinese_ci';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a)	hex(a)
gbk_chinese_ci	6109
gbk_chinese_ci	61
gbk_chinese_ci	6120
drop table t1;
create table t1 engine=innodb select repeat('a',50) as c1;
alter table t1 add index(c1(5));
insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
select collation(c1) from t1 limit 1;
collation(c1)
gbk_chinese_ci
select c1 from t1 where c1 like 'abcdef%' order by c1;
c1
abcdefg
select c1 from t1 where c1 like 'abcde1%' order by c1;
c1
abcde100
abcde110
abcde111
select c1 from t1 where c1 like 'abcde11%' order by c1;
c1
abcde110
abcde111
select c1 from t1 where c1 like 'abcde111%' order by c1;
c1
abcde111
drop table t1;
SET collation_connection='gbk_bin';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a)	hex(a)
gbk_bin	6109
gbk_bin	61
gbk_bin	6120
drop table t1;
create table t1 engine=innodb select repeat('a',50) as c1;
alter table t1 add index(c1(5));
insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
select collation(c1) from t1 limit 1;
collation(c1)
gbk_bin
select c1 from t1 where c1 like 'abcdef%' order by c1;
c1
abcdefg
select c1 from t1 where c1 like 'abcde1%' order by c1;
c1
abcde100
abcde110
abcde111
select c1 from t1 where c1 like 'abcde11%' order by c1;
c1
abcde110
abcde111
select c1 from t1 where c1 like 'abcde111%' order by c1;
c1
abcde111
drop table t1;
SET NAMES gbk;
CREATE TABLE t1 (a text) character set gbk;
INSERT INTO t1 VALUES (0xA3A0),(0xA1A1);
SELECT hex(a) FROM t1 ORDER BY a;
hex(a)
A1A1
A3A0
DROP TABLE t1;
+2 −0
Original line number Diff line number Diff line
Collation	Charset	Id	Default	Compiled	Sortlen
gbk_chinese_ci	gbk	28	Yes	Yes	1
+30 −0
Original line number Diff line number Diff line
-- source include/have_gbk.inc

#
# Tests with the gbk character set
#
--disable_warnings
drop table if exists t1;
--enable_warnings

SET @test_character_set= 'gbk';
SET @test_collation= 'gbk_chinese_ci';
-- source include/ctype_common.inc

SET NAMES gbk;
SET collation_connection='gbk_chinese_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_innodb_like.inc
SET collation_connection='gbk_bin';
-- source include/ctype_filesort.inc
-- source include/ctype_innodb_like.inc

#
# Bug#11987 mysql will truncate the text when
# the text contain GBK char:"0xA3A0" and "0xA1"
#
SET NAMES gbk;
CREATE TABLE t1 (a text) character set gbk;
INSERT INTO t1 VALUES (0xA3A0),(0xA1A1);
SELECT hex(a) FROM t1 ORDER BY a;
DROP TABLE t1;
+38 −1
Original line number Diff line number Diff line
@@ -9925,6 +9925,43 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
}
/*
  Returns well formed length of a GBK string.
*/
static
uint my_well_formed_len_gbk(CHARSET_INFO *cs __attribute__((unused)),
                            const char *b, const char *e,
                            uint pos, int *error)
{
  const char *b0= b;
  const char *emb= e - 1; /* Last possible end of an MB character */
  *error= 0;
  while (pos-- && b < e)
  {
    if ((uchar) b[0] < 128)
    {
      /* Single byte ascii character */
      b++;
    }
    else  if ((b < emb) && isgbkcode((uchar)*b, (uchar)b[1]))
    {
      /* Double byte character */
      b+= 2;
    }
    else
    {
      /* Wrong byte sequence */
      *error= 1;
      break;
    }
  }
  return b - b0;
}
                             
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
  NULL,			/* init */
@@ -9945,7 +9982,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
  mbcharlen_gbk,
  my_numchars_mb,
  my_charpos_mb,
  my_well_formed_len_mb,
  my_well_formed_len_gbk,
  my_lengthsp_8bit,
  my_numcells_8bit,
  my_mb_wc_gbk,