Commit 822e8866 authored by unknown's avatar unknown
Browse files

Fixed bug #14896.

This bug in Field_string::cmp resulted in a wrong comparison 
with keys in partial indexes over multi-byte character fields.
Given field a is declared as a varchar(16) collate utf8_unicode_ci
INDEX(a(4)) gives us an example of such an index.
  
Wrong key comparisons could lead to wrong result sets if 
the selected query execution plan used a range scan by 
a partial index over a utf8 character field.
This also caused wrong results in many other cases.


mysql-test/t/ctype_utf8.test:
  Added test cases for bug #14896.
mysql-test/r/ctype_utf8.result:
  Added test cases for bug #14896.
sql/field.cc:
  Fixed bug #14896.
  This bug in Field_string::cmp resulted in a wrong comparison 
  with keys in partial indexes over multi-byte character fields.
  Given field a is declared as a varchar(16) collate utf8_unicode_ci
  INDEX(a(4)) gives us an example of such an index.
       
  Wrong key comparisons could lead to wrong result sets if 
  the selected query execution plan used a range scan by 
  a partial index over a utf8 character field.
  This also caused wrong results in many other cases.
parent fa83f8ba
Loading
Loading
Loading
Loading
+40 −0
Original line number Diff line number Diff line
@@ -1124,3 +1124,43 @@ check table t1;
Table	Op	Msg_type	Msg_text
test.t1	check	status	OK
drop table t1;
SET NAMES utf8;
CREATE TABLE t1 (id int PRIMARY KEY,
a varchar(16) collate utf8_unicode_ci NOT NULL default '',
b int,
f varchar(128) default 'XXX',
INDEX (a(4))
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
INSERT INTO t1(id, a, b) VALUES
(1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30),
(4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40),
(7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50),
(10, 'eeeee', 40), (11, 'bbbbbb', 60);
SELECT id, a, b FROM t1;
id	a	b
1	cccc	50
2	cccc	70
3	cccc	30
4	cccc	30
5	cccc	20
6	bbbbbb	40
7	dddd	30
8	aaaa	10
9	aaaa	50
10	eeeee	40
11	bbbbbb	60
SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb';
id	a	b
8	aaaa	10
9	aaaa	50
6	bbbbbb	40
11	bbbbbb	60
SELECT id, a FROM t1 WHERE a='bbbbbb';
id	a
6	bbbbbb
11	bbbbbb
SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b;
id	a
6	bbbbbb
11	bbbbbb
DROP TABLE t1;
+26 −0
Original line number Diff line number Diff line
@@ -926,4 +926,30 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb
check table t1;
drop table t1;

#
# Bug#14896: Comparison with a key in a partial index over mb chararacter field
#

SET NAMES utf8;
CREATE TABLE t1 (id int PRIMARY KEY,
                 a varchar(16) collate utf8_unicode_ci NOT NULL default '',
                 b int,
                 f varchar(128) default 'XXX',
                 INDEX (a(4))
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
INSERT INTO t1(id, a, b) VALUES
  (1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30),
  (4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40),
  (7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50),
  (10, 'eeeee', 40), (11, 'bbbbbb', 60);

SELECT id, a, b FROM t1;

SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb';

SELECT id, a FROM t1 WHERE a='bbbbbb';
SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b;

DROP TABLE t1;

# End of 4.1 tests
+7 −13
Original line number Diff line number Diff line
@@ -5072,17 +5072,6 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr)
{
  uint a_len, b_len;

  if (field_charset->strxfrm_multiply > 1)
  {
    /*
      We have to remove end space to be able to compare multi-byte-characters
      like in latin_de 'ae' and 0xe4
    */
    return field_charset->coll->strnncollsp(field_charset,
                                            (const uchar*) a_ptr, field_length,
                                            (const uchar*) b_ptr,
                                            field_length);
  }
  if (field_charset->mbmaxlen != 1)
  {
    uint char_len= field_length/field_charset->mbmaxlen;
@@ -5091,7 +5080,12 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr)
  }
  else
    a_len= b_len= field_length;
  return my_strnncoll(field_charset,(const uchar*) a_ptr, a_len,
  /*
    We have to remove end space to be able to compare multi-byte-characters
    like in latin_de 'ae' and 0xe4
  */
  return field_charset->coll->strnncollsp(field_charset,
                                          (const uchar*) a_ptr, a_len,
                                          (const uchar*) b_ptr, b_len);
}