Commit 90aa6e00 authored by unknown's avatar unknown
Browse files

Allow inserting of extra HKSCS and cp950 characters into a Big5 column.

parent cdf8e293
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -77,3 +77,10 @@ big5_bin 6109
big5_bin	61
big5_bin	6120
drop table t1;
SET NAMES big5;
CREATE TABLE t1 (a text) character set big5;
INSERT INTO t1 VALUES ('ùØ');
SELECT * FROM t1;
a
ùØ
DROP TABLE t1;
+9 −0
Original line number Diff line number Diff line
@@ -16,3 +16,12 @@ SET collation_connection='big5_chinese_ci';
-- source include/ctype_filesort.inc
SET collation_connection='big5_bin';
-- source include/ctype_filesort.inc

#
# Bugs#9357: TEXT columns break string with special word in BIG5 charset.
#
SET NAMES big5;
CREATE TABLE t1 (a text) character set big5;
INSERT INTO t1 VALUES ('');
SELECT * FROM t1;
DROP TABLE t1;
+38 −1
Original line number Diff line number Diff line
@@ -6271,6 +6271,43 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
  return 2;
}


/*
  Returns a well formed length of a BIG5 string.
  CP950 and HKSCS additional characters are also accepted.
*/
static
uint my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)),
                             const char *b, const char *e, uint pos)
{
  const char *b0= b;
  const char *emb= e - 1; /* Last possible end of an MB character */
  while (pos && b < e)
  {
    /*
      Cast to int8 for extra safety. "char" can be unsigned
      by default on some platforms.
    */
    if (((int8)b[0]) >= 0)
    {
      /* Single byte ascii character */
      b++;
    }
    else  if ((b < emb) && isbig5code((uchar)*b, (uchar)b[1]))
    {
      /* Double byte character */
      b+= 2;
    }
    else
    {
      /* Wrong byte sequence */
      break;
    }
  }
  return b - b0;
}


static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
{
  NULL,			/* init */
@@ -6291,7 +6328,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
  mbcharlen_big5,
  my_numchars_mb,
  my_charpos_mb,
  my_well_formed_len_mb,
  my_well_formed_len_big5,
  my_lengthsp_8bit,
  my_numcells_8bit,
  my_mb_wc_big5,	/* mb_wc       */