Commit 26ff24f0 authored by unknown's avatar unknown
Browse files

merge


mysys/charset-def.c:
  Auto merged
BitKeeper/deleted/.del-errmsg.txt~f96b7055cac394e:
  Auto merged
sql/mysqld.cc:
  Auto merged
strings/ctype-big5.c:
  Auto merged
strings/ctype-sjis.c:
  Auto merged
strings/ctype-utf8.c:
  Auto merged
mysql-test/r/type_blob.result:
  after merge fix
parents bbd7265a 0b45706c
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci;
extern CHARSET_INFO my_charset_utf8_roman_uca_ci;
extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
#ifdef HAVE_CYBOZU_COLLATION
extern CHARSET_INFO my_charset_utf8_general_cs;
#endif
#endif

#endif /* HAVE_UCA_COLLATIONS */
@@ -156,6 +159,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
#ifdef HAVE_CHARSET_utf8
  add_compiled_collation(&my_charset_utf8_general_ci);
  add_compiled_collation(&my_charset_utf8_bin);
#ifdef HAVE_CYBOZU_COLLATION
  add_compiled_collation(&my_charset_utf8_general_cs);
#endif
#ifdef HAVE_UCA_COLLATIONS
  add_compiled_collation(&my_charset_utf8_general_uca_ci);
  add_compiled_collation(&my_charset_utf8_icelandic_uca_ci);
+0 −2
Original line number Diff line number Diff line
@@ -4075,8 +4075,6 @@ pthread_handler_decl(handle_connections_shared_memory,arg)
	      NullS);
      sql_perror(buff);
    }
    my_security_attr_free(sa_event);
    my_security_attr_free(sa_mapping);
    if (handle_client_file_map) 
      CloseHandle(handle_client_file_map);
    if (handle_client_map)
+1 −5
Original line number Diff line number Diff line
@@ -6300,11 +6300,7 @@ uint my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)),
  const char *emb= e - 1; /* Last possible end of an MB character */
  while (pos && b < e)
  {
    /*
      Cast to int8 for extra safety. "char" can be unsigned
      by default on some platforms.
    */
    if (((int8)b[0]) >= 0)
    if ((uchar) b[0] < 128)
    {
      /* Single byte ascii character */
      b++;
+1 −6
Original line number Diff line number Diff line
@@ -4591,12 +4591,7 @@ uint my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)),
  const char *b0= b;
  while (pos && b < e)
  {
    /*
      Cast to int8 for extra safety.
      "char" can be unsigned by default
      on some platforms.
    */
    if (((int8)b[0]) >= 0)
    if ((uchar) b[0] < 128)
    {
      /* Single byte ascii character */
      b++;
+168 −7
Original line number Diff line number Diff line
@@ -2148,12 +2148,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
  {
    my_wc_t s_wc,t_wc;
    
    /*
      Cast to int8 for extra safety.
      char can be unsigned by default
      on some platforms.
    */
    if (((int8)s[0]) >= 0)
    if ((uchar) s[0] < 128)
    {
      /* 
        s[0] is between 0 and 127.
@@ -2200,7 +2195,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
    
    /* Do the same for the second string */
    
    if (((int8)t[0]) >= 0)
    if ((uchar) t[0] < 128)
    {
      /* Convert single byte character into weight */
      t_wc= plane00[(uchar) t[0]].tolower;
@@ -2410,6 +2405,172 @@ CHARSET_INFO my_charset_utf8_bin=
    &my_collation_mb_bin_handler
};

#ifdef HAVE_CYBOZU_COLLATION

/*
 * These functions bacically do the same as their original, except
 * that they return 0 only when two comparing unicode strings are
 * strictly the same in case-sensitive way.  See "save_diff" local
 * variable to what they actually do.
 */

static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, 
                                const uchar *s, uint slen,
                                const uchar *t, uint tlen,
                                my_bool t_is_prefix)
{
  int s_res,t_res;
  my_wc_t s_wc,t_wc;
  const uchar *se=s+slen;
  const uchar *te=t+tlen;
  int save_diff = 0;
  int diff;

  while ( s < se && t < te )
  {
    int plane;
    s_res=my_utf8_uni(cs,&s_wc, s, se);
    t_res=my_utf8_uni(cs,&t_wc, t, te);
    
    if ( s_res <= 0 || t_res <= 0 )

    {
      /* Incorrect string, compare by char value */
      return ((int)s[0]-(int)t[0]); 
    }
    
    if ( save_diff == 0 )
    {
      save_diff = ((int)s_wc) - ((int)t_wc);
    }
    plane=(s_wc>>8) & 0xFF;
    s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
    plane=(t_wc>>8) & 0xFF;
    t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
    if ( s_wc != t_wc )
    {
      return  ((int) s_wc) - ((int) t_wc);
    }
    
    s+=s_res;
    t+=t_res;
  }
  diff = ( (se-s) - (te-t) );
  return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff);
}

static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, 
                                  const uchar *s, uint slen,
                                  const uchar *t, uint tlen)
{
  int s_res,t_res;
  my_wc_t s_wc,t_wc;
  const uchar *se= s+slen;
  const uchar *te= t+tlen;
  int save_diff = 0;
  
  while ( s < se && t < te )
  {
    int plane;
    s_res=my_utf8_uni(cs,&s_wc, s, se);
    t_res=my_utf8_uni(cs,&t_wc, t, te);
    
    if ( s_res <= 0 || t_res <= 0 )
    {
      /* Incorrect string, compare by char value */
      return ((int)s[0]-(int)t[0]); 
    }
    
    if ( save_diff == 0 )
    {
      save_diff = ((int)s_wc) - ((int)t_wc);
    }
    plane=(s_wc>>8) & 0xFF;
    s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
    plane=(t_wc>>8) & 0xFF;
    t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
    if ( s_wc != t_wc )
    {
      return  ((int) s_wc) - ((int) t_wc);
    }
    
    s+=s_res;
    t+=t_res;
  }
  
  slen= se-s;
  tlen= te-t;
  
  if (slen != tlen)
  {
    int swap= 0;
    if (slen < tlen)
    {
      slen= tlen;
      s= t;
      se= te;
      swap= -1;
    }
    /*
      This following loop uses the fact that in UTF-8
      all multibyte characters are greater than space,
      and all multibyte head characters are greater than
      space. It means if we meet a character greater
      than space, it always means that the longer string
      is greater. So we can reuse the same loop from the
      8bit version, without having to process full multibute
      sequences.
    */
    for ( ; s < se; s++)
    {
      if (*s != ' ')
        return ((int)*s -  (int) ' ') ^ swap;
    }
  }
  return save_diff;
}

static MY_COLLATION_HANDLER my_collation_cs_handler =
{
    NULL,		/* init */
    my_strnncoll_utf8_cs,
    my_strnncollsp_utf8_cs,
    my_strnxfrm_utf8,
    my_like_range_simple,
    my_wildcmp_mb,
    my_strcasecmp_utf8,
    my_instr_mb,
    my_hash_sort_utf8
};

CHARSET_INFO my_charset_utf8_general_cs=
{
    254,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_UNICODE,	/* state  */
    "utf8",		/* cs name      */
    "utf8_general_cs",	/* name         */
    "",			/* comment      */
    NULL,		/* tailoring    */
    ctype_utf8,		/* ctype        */
    to_lower_utf8,	/* to_lower     */
    to_upper_utf8,	/* to_upper     */
    to_upper_utf8,	/* sort_order   */
    NULL,		/* contractions */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    1,			/* strxfrm_multiply */
    1,			/* mbminlen     */
    3,			/* mbmaxlen     */
    0,			/* min_sort_char */
    255,		/* max_sort_char */
    &my_charset_utf8_handler,
    &my_collation_cs_handler
};
#endif	/* Cybozu Hack */


#ifdef MY_TEST_UTF8
#include <stdio.h>