Commit 938fb67b authored by unknown's avatar unknown
Browse files

Bug #3928 regexp [[:>:]] and UTF-8

parent 5cc410bb
Loading
Loading
Loading
Loading
+22 −0
Original line number Diff line number Diff line
@@ -218,3 +218,25 @@ b
select * from t1 where a = 'b' and a != 'b';
a
drop table t1;
set names utf8;
select  'вася'  rlike '[[:<:]]вася[[:>:]]';
'вася'  rlike '[[:<:]]вася[[:>:]]'
1
select  'вася ' rlike '[[:<:]]вася[[:>:]]';
'вася ' rlike '[[:<:]]вася[[:>:]]'
1
select ' вася'  rlike '[[:<:]]вася[[:>:]]';
' вася'  rlike '[[:<:]]вася[[:>:]]'
1
select ' вася ' rlike '[[:<:]]вася[[:>:]]';
' вася ' rlike '[[:<:]]вася[[:>:]]'
1
select  'васяz' rlike '[[:<:]]вася[[:>:]]';
'васяz' rlike '[[:<:]]вася[[:>:]]'
0
select 'zвася'  rlike '[[:<:]]вася[[:>:]]';
'zвася'  rlike '[[:<:]]вася[[:>:]]'
0
select 'zвасяz' rlike '[[:<:]]вася[[:>:]]';
'zвасяz' rlike '[[:<:]]вася[[:>:]]'
0
+16 −0
Original line number Diff line number Diff line
@@ -141,3 +141,19 @@ select * from t1 where a = 'b';
select * from t1 where a = 'b' and a = 'b';
select * from t1 where a = 'b' and a != 'b';
drop table t1;

#
# Bug #3928 regexp [[:>:]] and UTF-8
#
set names utf8;

# This should return TRUE
select  'вася'  rlike '[[:<:]]вася[[:>:]]';
select  'вася ' rlike '[[:<:]]вася[[:>:]]';
select ' вася'  rlike '[[:<:]]вася[[:>:]]';
select ' вася ' rlike '[[:<:]]вася[[:>:]]';

# This should return FALSE
select  'васяz' rlike '[[:<:]]вася[[:>:]]';
select 'zвася'  rlike '[[:<:]]вася[[:>:]]';
select 'zвасяz' rlike '[[:<:]]вася[[:>:]]';
+13 −7
Original line number Diff line number Diff line
@@ -1524,8 +1524,12 @@ MY_UNICASE_INFO *uni_plane[256]={

#ifdef HAVE_CHARSET_utf8

/* These arrays are taken from usa7 implementation */

/* 
  We consider bytes with code more than 127 as a letter.
  This garantees that word boundaries work fine with regular
  expressions. Note, there is no need to mark byte 255  as a
  letter, it is illegal byte in UTF8.
*/
static uchar ctype_utf8[] = {
    0,
   32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
@@ -1536,16 +1540,18 @@ static uchar ctype_utf8[] = {
    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
   16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
    2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    0,  0,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  0
};

/* The below are taken from usa7 implementation */

static uchar to_lower_utf8[] = {
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,