Commit 7e7dfccc authored by unknown's avatar unknown
Browse files

Bug #5324 Bug in UCA collations with LIKE comparisons and INDEX

parent 4011f819
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -312,6 +312,13 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
			      char *min_str, char *max_str,
			      uint *min_length, uint *max_length);

my_bool  my_like_range_mb(CHARSET_INFO *cs,
			  const char *ptr, uint ptr_length,
			  pbool escape, pbool w_one, pbool w_many,
			  uint res_length,
			  char *min_str, char *max_str,
			  uint *min_length, uint *max_length);

my_bool  my_like_range_ucs2(CHARSET_INFO *cs,
			    const char *ptr, uint ptr_length,
			    pbool escape, pbool w_one, pbool w_many,
+39 −0
Original line number Diff line number Diff line
@@ -1872,3 +1872,42 @@ Z,z,Ź,ź,Ż,ż,Ž,ž
ǁ
ǂ
ǃ
drop table t1;
SET NAMES utf8;
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_general_ci;
c
Μωδαί̈
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_general_ci ORDER BY c;
c
Μωδ
Μωδαί̈
DROP TABLE t1;
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
c
Μωδαί̈
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
COLLATE ucs2_unicode_ci ORDER BY c;
c
Μωδ
Μωδαί̈
DROP TABLE t1;
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
c
Μωδαί̈
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_unicode_ci ORDER BY c;
c
Μωδ
Μωδαί̈
DROP TABLE t1;
+37 −0
Original line number Diff line number Diff line
@@ -180,3 +180,40 @@ select group_concat(c1 order by c1) from t1 group by c1 collate utf8_slovak_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_spanish2_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_roman_ci;

drop table t1;

#
# Bug#5324
#
SET NAMES utf8;
#test1
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
#Check one row
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_general_ci;
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
#Check two rows
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_general_ci ORDER BY c;
DROP TABLE t1;
#test2
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
COLLATE ucs2_unicode_ci ORDER BY c;
DROP TABLE t1;
#test 3
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
#Check one row row
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
#Check two rows
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_unicode_ci ORDER BY c;
DROP TABLE t1;
+86 −0
Original line number Diff line number Diff line
@@ -458,6 +458,92 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
  }
}

/*
** Calculate min_str and max_str that ranges a LIKE string.
** Arguments:
** ptr		Pointer to LIKE string.
** ptr_length	Length of LIKE string.
** escape	Escape character in LIKE.  (Normally '\').
**		All escape characters should be removed from min_str and max_str
** res_length	Length of min_str and max_str.
** min_str	Smallest case sensitive string that ranges LIKE.
**		Should be space padded to res_length.
** max_str	Largest case sensitive string that ranges LIKE.
**		Normally padded with the biggest character sort value.
**
** The function should return 0 if ok and 1 if the LIKE string can't be
** optimized !
*/

my_bool my_like_range_mb(CHARSET_INFO *cs,
			 const char *ptr,uint ptr_length,
			 pbool escape, pbool w_one, pbool w_many,
			 uint res_length,
			 char *min_str,char *max_str,
			 uint *min_length,uint *max_length)
{
  const char *end=ptr+ptr_length;
  char *min_org=min_str;
  char *min_end=min_str+res_length;
  char *max_end=max_str+res_length;

  for (; ptr != end && min_str != min_end ; ptr++)
  {
    if (*ptr == escape && ptr+1 != end)
    {
      ptr++;					/* Skip escape */
      *min_str++= *max_str++ = *ptr;
      continue;
    }
    if (*ptr == w_one || *ptr == w_many)	/* '_' and '%' in SQL */
    {
      char buf[10];
      uint buflen;
      
      /* Write min key  */
      *min_length= (uint) (min_str - min_org);
      *max_length=res_length;
      do
      {
	*min_str++= (char) cs->min_sort_char;
      } while (min_str != min_end);
      
      /* 
        Write max key: create a buffer with multibyte
        representation of the max_sort_char character,
        and copy it into max_str in a loop. 
      */
      buflen= cs->cset->wc_mb(cs, cs->max_sort_char, buf, buf + sizeof(buf));
      DBUG_ASSERT(buflen > 0);
      do
      {
        if ((max_str + buflen) <= max_end)
        {
          /* Enough space for max characer */
          memcpy(max_str, buf, buflen);
          max_str+= buflen;
        }
        else
        {
          /* 
            There is no space for whole multibyte
            character, then add trailing spaces.
          */
          
	  *max_str++= ' ';
	}
      } while (max_str != max_end);
      return 0;
    }
    *min_str++= *max_str++ = *ptr;
  }
  *min_length= *max_length = (uint) (min_str - min_org);

  while (min_str != min_end)
    *min_str++ = *max_str++ = ' ';	/* Because if key compression */
  return 0;
}

static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
		  const char *str,const char *str_end,
		  const char *wildstr,const char *wildend,
+4 −3
Original line number Diff line number Diff line
@@ -6876,7 +6876,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
    int mblen;
    
    if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc, 
                                          scanner->sbeg, scanner->send)) < 0))
                                          scanner->sbeg,
                                          scanner->send)) <= 0))
      return -1;
    
    scanner->page= wc >> 8;
@@ -7918,7 +7919,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
    my_strnncoll_ucs2_uca,
    my_strnncollsp_ucs2_uca,
    my_strnxfrm_ucs2_uca,
    my_like_range_simple,
    my_like_range_ucs2,
    my_wildcmp_uca,
    NULL,
    my_instr_mb,
@@ -8369,7 +8370,7 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler =
    my_strnncoll_any_uca,
    my_strnncollsp_any_uca,
    my_strnxfrm_any_uca,
    my_like_range_simple,
    my_like_range_mb,
    my_wildcmp_uca,
    NULL,
    my_instr_mb,