Commit 5267ec8a authored by unknown's avatar unknown
Browse files

Bug #6040 can't retrieve records with umlaut characters in case insensitive manner

parent 2310f00a
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -365,6 +365,11 @@ uint my_instr_mb(struct charset_info_st *,
                 const char *s, uint s_length,
                 my_match_t *match, uint nmatch);

int my_wildcmp_unicode(CHARSET_INFO *cs,
                       const char *str, const char *str_end,
                       const char *wildstr, const char *wildend,
                       int escape, int w_one, int w_many,
                       MY_UNICASE_INFO **weights);

extern my_bool my_parse_charset_xml(const char *bug, uint len,
				    int (*add)(CHARSET_INFO *cs));
+9 −0
Original line number Diff line number Diff line
@@ -63,6 +63,15 @@ select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
1
select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8);
convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8)
1
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8)
1
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8)
1
SELECT 'a' = 'a ';
'a' = 'a '
1
+8 −0
Original line number Diff line number Diff line
@@ -33,6 +33,14 @@ select 'A' like 'a';
select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');

# Bug #6040: can't retrieve records with umlaut
# characters in case insensitive manner.
# Case insensitive search LIKE comparison
# was broken for multibyte characters:
select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8);
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);

#
# Check the following:
# "a"  == "a "
+4 −162
Original line number Diff line number Diff line
@@ -1231,171 +1231,13 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}


/*
** Compare string against string with wildcard
**	0 if matched
**	-1 if not matched with wildcard
**	 1 if matched with wildcard
*/

static
int my_wildcmp_ucs2(CHARSET_INFO *cs,
		    const char *str,const char *str_end,
		    const char *wildstr,const char *wildend,
		    int escape, int w_one, int w_many,
		    MY_UNICASE_INFO **weights)
{
  int result= -1;			/* Not found, using wildcards */
  my_wc_t s_wc, w_wc;
  int scan, plane;
  
  while (wildstr != wildend)
  {
    
    while (1)
    {
      scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
			(const uchar*)wildend);
      if (scan <= 0)
        return 1;
      
      if (w_wc ==  (my_wc_t)escape)
      {
        wildstr+= scan;
        scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
			  (const uchar*)wildend);
        if (scan <= 0)
          return 1;
      }
      
      if (w_wc == (my_wc_t)w_many)
      {
        result= 1;				/* Found an anchor char */
        break;
      }
      
      wildstr+= scan;
      scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
      if (scan <=0)
        return 1;
      str+= scan;
      
      if (w_wc == (my_wc_t)w_one)
      {
        result= 1;				/* Found an anchor char */
      }
      else
      {
        if (weights)
        {
          plane=(s_wc>>8) & 0xFF;
          s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
          plane=(w_wc>>8) & 0xFF;
          w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
        }
        if (s_wc != w_wc)
          return 1;				/* No match */
      }
      if (wildstr == wildend)
	return (str != str_end);		/* Match if both are at end */
    }
    
    
    if (w_wc == (my_wc_t)w_many)
    {						/* Found w_many */
    
      /* Remove any '%' and '_' from the wild search string */
      for ( ; wildstr != wildend ; )
      {
        scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
			  (const uchar*)wildend);
        if (scan <= 0)
          return 1;
        
	if (w_wc == (my_wc_t)w_many)
	{
	  wildstr+= scan;
	  continue;
	} 
	
	if (w_wc == (my_wc_t)w_one)
	{
	  wildstr+= scan;
	  scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str,
			    (const uchar*)str_end);
          if (scan <=0)
            return 1;
          str+= scan;
	  continue;
	}
	break;					/* Not a wild character */
      }
      
      if (wildstr == wildend)
	return 0;				/* Ok if w_many is last */
      
      if (str == str_end)
	return -1;
      
      scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
			(const uchar*)wildend);
      if (scan <= 0)
        return 1;
      
      if (w_wc ==  (my_wc_t)escape)
      {
        wildstr+= scan;
        scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
			  (const uchar*)wildend);
        if (scan <= 0)
          return 1;
      }
      
      while (1)
      {
        /* Skip until the first character from wildstr is found */
        while (str != str_end)
        {
          scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str,
			    (const uchar*)str_end);
          if (scan <= 0)
            return 1;
          if (weights)
          {
            plane=(s_wc>>8) & 0xFF;
            s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
            plane=(w_wc>>8) & 0xFF;
            w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
          }
          
          if (s_wc == w_wc)
            break;
          str+= scan;
        }
        if (str == str_end)
          return -1;
        
        result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape,
                                w_one,w_many,weights);
        
        if (result <= 0)
          return result;
        
        str+= scan;
      } 
    }
  }
  return (str != str_end ? 1 : 0);
}


static
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
		    const char *str,const char *str_end,
		    const char *wildstr,const char *wildend,
		    int escape, int w_one, int w_many)
{
  return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
                            escape,w_one,w_many,uni_plane); 
}

@@ -1406,7 +1248,7 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
		    const char *wildstr,const char *wildend,
		    int escape, int w_one, int w_many)
{
  return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
                            escape,w_one,w_many,NULL); 
}

+167 −1
Original line number Diff line number Diff line
@@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={

};


/*
** Compare string against string with wildcard
** This function is used in UTF8 and UCS2
**
**	0 if matched
**	-1 if not matched with wildcard
**	 1 if matched with wildcard
*/

int my_wildcmp_unicode(CHARSET_INFO *cs,
		       const char *str,const char *str_end,
		       const char *wildstr,const char *wildend,
		       int escape, int w_one, int w_many,
		       MY_UNICASE_INFO **weights)
{
  int result= -1;			/* Not found, using wildcards */
  my_wc_t s_wc, w_wc;
  int scan, plane;
  int (*mb_wc)(struct charset_info_st *cs, my_wc_t *wc,
               const unsigned char *s,const unsigned char *e);
  mb_wc= cs->cset->mb_wc;
  
  while (wildstr != wildend)
  {
    while (1)
    {
      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                       (const uchar*)wildend)) <= 0)
        return 1;
      
      if (w_wc ==  (my_wc_t)escape)
      {
        wildstr+= scan;
        if ((scan= mb_wc(cs,&w_wc, (const uchar*)wildstr,
                         (const uchar*)wildend)) <= 0)
          return 1;
      }
      
      if (w_wc == (my_wc_t)w_many)
      {
        result= 1;				/* Found an anchor char */
        break;
      }
      
      wildstr+= scan;
      if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
                       (const uchar*)str_end)) <=0)
        return 1;
      str+= scan;
      
      if (w_wc == (my_wc_t)w_one)
      {
        result= 1;				/* Found an anchor char */
      }
      else
      {
        if (weights)
        {
          plane=(s_wc>>8) & 0xFF;
          s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
          plane=(w_wc>>8) & 0xFF;
          w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
        }
        if (s_wc != w_wc)
          return 1;				/* No match */
      }
      if (wildstr == wildend)
	return (str != str_end);		/* Match if both are at end */
    }
    
    
    if (w_wc == (my_wc_t)w_many)
    {						/* Found w_many */
    
      /* Remove any '%' and '_' from the wild search string */
      for ( ; wildstr != wildend ; )
      {
        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                         (const uchar*)wildend)) <= 0)
          return 1;
        
	if (w_wc == (my_wc_t)w_many)
	{
	  wildstr+= scan;
	  continue;
	} 
	
	if (w_wc == (my_wc_t)w_one)
	{
	  wildstr+= scan;
          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
                           (const uchar*)str_end)) <=0)
            return 1;
          str+= scan;
	  continue;
	}
	break;					/* Not a wild character */
      }
      
      if (wildstr == wildend)
	return 0;				/* Ok if w_many is last */
      
      if (str == str_end)
	return -1;
      
      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                       (const uchar*)wildend)) <=0)
        return 1;
      
      if (w_wc ==  (my_wc_t)escape)
      {
        wildstr+= scan;
        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                         (const uchar*)wildend)) <=0)
          return 1;
      }
      
      while (1)
      {
        /* Skip until the first character from wildstr is found */
        while (str != str_end)
        {
          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
                           (const uchar*)str_end)) <=0)
            return 1;
          if (weights)
          {
            plane=(s_wc>>8) & 0xFF;
            s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
            plane=(w_wc>>8) & 0xFF;
            w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
          }
          
          if (s_wc == w_wc)
            break;
          str+= scan;
        }
        if (str == str_end)
          return -1;
        
        result= my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
                                   escape, w_one, w_many,
                                   weights);
        
        if (result <= 0)
          return result;
        
        str+= scan;
      } 
    }
  }
  return (str != str_end ? 1 : 0);
}

#endif


@@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
  return  my_strncasecmp_utf8(cs, s, t, len);
}

static
int my_wildcmp_utf8(CHARSET_INFO *cs,
		    const char *str,const char *str_end,
		    const char *wildstr,const char *wildend,
		    int escape, int w_one, int w_many)
{
  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
                            escape,w_one,w_many,uni_plane); 
}


static int my_strnxfrm_utf8(CHARSET_INFO *cs,
                            uchar *dst, uint dstlen,
                            const uchar *src, uint srclen)
@@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
    my_strnncollsp_utf8,
    my_strnxfrm_utf8,
    my_like_range_mb,
    my_wildcmp_mb,
    my_wildcmp_utf8,
    my_strcasecmp_utf8,
    my_instr_mb,
    my_hash_sort_utf8