Commit 9155e8de authored by unknown's avatar unknown
Browse files

Move UCA language specific definitions into ctype-ucs.c.

parent da7b7d55
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -22,6 +22,19 @@
  init_compiled_charsets() that only adds those that he wants
*/

#ifdef HAVE_CHARSET_ucs2
extern CHARSET_INFO my_charset_ucs2_general_uca;
extern CHARSET_INFO my_charset_ucs2_icelandic_uca_ci;
extern CHARSET_INFO my_charset_ucs2_latvian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_romanian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_slovenian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_polish_uca_ci;
extern CHARSET_INFO my_charset_ucs2_estonian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_spanish_uca_ci;
extern CHARSET_INFO my_charset_ucs2_swedish_uca_ci;
extern CHARSET_INFO my_charset_ucs2_turkish_uca_ci;
#endif

my_bool init_compiled_charsets(myf flags __attribute__((unused)))
{
  CHARSET_INFO *cs;
@@ -74,6 +87,16 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
  add_compiled_collation(&my_charset_ucs2_general_ci);
  add_compiled_collation(&my_charset_ucs2_bin);
  add_compiled_collation(&my_charset_ucs2_general_uca);
  add_compiled_collation(&my_charset_ucs2_general_uca);
  add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci);
  add_compiled_collation(&my_charset_ucs2_latvian_uca_ci);
  add_compiled_collation(&my_charset_ucs2_romanian_uca_ci);
  add_compiled_collation(&my_charset_ucs2_slovenian_uca_ci);
  add_compiled_collation(&my_charset_ucs2_polish_uca_ci);
  add_compiled_collation(&my_charset_ucs2_estonian_uca_ci);
  add_compiled_collation(&my_charset_ucs2_spanish_uca_ci);
  add_compiled_collation(&my_charset_ucs2_swedish_uca_ci);
  add_compiled_collation(&my_charset_ucs2_turkish_uca_ci);
#endif

#ifdef HAVE_CHARSET_ujis
+0 −143
Original line number Diff line number Diff line
@@ -177,127 +177,6 @@ static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
}


#ifdef HAVE_CHARSET_ucs2

typedef struct my_tailoring_st
{
  uint  number;
  const char *name;
  const char *tailoring;
} my_tailoring;

static my_tailoring tailoring[]=
{
  {
    0, "icelandic",
    /*
      Some sources treat LETTER A WITH DIARESIS (00E4,00C4)
      secondary greater than LETTER AE (00E6,00C6).
      http://www.evertype.com/alphabets/icelandic.pdf
      http://developer.mimer.com/collations/charts/icelandic.htm

      Other sources do not provide any special rules
      for LETTER A WITH DIARESIS:
      http://www.omniglot.com/writing/icelandic.htm
      http://en.wikipedia.org/wiki/Icelandic_alphabet
      http://oss.software.ibm.com/icu/charts/collation/is.html

      Let's go the first way.
    */
    "& A < \\u00E1 <<< \\u00C1 "
    "& D < \\u00F0 <<< \\u00D0 "
    "& E < \\u00E9 <<< \\u00C9 "
    "& I < \\u00ED <<< \\u00CD "
    "& O < \\u00F3 <<< \\u00D3 "
    "& U < \\u00FA <<< \\u00DA "
    "& Y < \\u00FD <<< \\u00DD "
    "& Z < \\u00FE <<< \\u00DE "
        "< \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
        "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
        "< \\u00E5 <<< \\u00C5 "
  },
  {
    1, "latvian",
    /*
      Some sources treat I and Y primary different.
      Other sources treat I and Y the same on primary level.
      We'll go the first way.
    */
    "& C < \\u010D <<< \\u010C "
    "& G < \\u0123 <<< \\u0122 "
    "& I < \\u0079 <<< \\u0059 "
    "& K < \\u0137 <<< \\u0136 "
    "& L < \\u013C <<< \\u013B "
    "& N < \\u0146 <<< \\u0145 "
    "& R < \\u0157 <<< \\u0156 "
    "& S < \\u0161 <<< \\u0160 "
    "& Z < \\u017E <<< \\u017D "
  },
  {
    2, "romanian",
    "& A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
    "& I < \\u00EE <<< \\u00CE "
    "& S < \\u0219 <<< \\u0218 << \\u015F <<< \\u015E "
    "& T < \\u021B <<< \\u021A << \\u0163 <<< \\u0162 "
  },
  {
    3, "slovenian",
    "& C < \\u010D <<< \\u010C "
    "& S < \\u0161 <<< \\u0160 "
    "& Z < \\u017E <<< \\u017D "
  },
  {
    4, "polish",
    "& A < \\u0105 <<< \\u0104 "
    "& C < \\u0107 <<< \\u0106 "
    "& E < \\u0119 <<< \\u0118 "
    "& L < \\u0142 <<< \\u0141 "
    "& N < \\u0144 <<< \\u0143 "
    "& O < \\u00F3 <<< \\u00D3 "
    "& S < \\u015B <<< \\u015A "
    "& Z < \\u017A <<< \\u017B "
  },
  {
    5, "estonian",
    "& S < \\u0161 <<< \\u0160 "
       " < \\u007A <<< \\u005A "
       " < \\u017E <<< \\u017D "
    "& W < \\u00F5 <<< \\u00D5 "
        "< \\u00E4 <<< \\u00C4 "
        "< \\u00F6 <<< \\u00D6 "
        "< \\u00FC <<< \\u00DC "
  },
  {
    6, "spanish",
    "& N < \\u00F1 <<< \\u00D1 "
  },
  {
    7, "swedish",
    /*
      Some sources treat V and W as similar on primary level.
      We'll treat V and W as different on primary level.
    */
    "& Y <<\\u00FC <<< \\u00DC "
    "& Z < \\u00E5 <<< \\u00C5 "
        "< \\u00E4 <<< \\u00C4 << \\u00E6 <<< \\u00C6 "
        "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
  },
  {
    8, "turkish",
    "& C < \\u00E7 <<< \\u00C7 "
    "& G < \\u011F <<< \\u011E "
    "& H < \\u0131 <<< \\u0049 "
    "& O < \\u00F6 <<< \\u00D6 "
    "& S < \\u015F <<< \\u015E "
    "& U < \\u00FC <<< \\u00DC "
  },
  {
    0, NULL, NULL
  }
};

#endif


static my_bool simple_cs_is_full(CHARSET_INFO *cs)
{
@@ -393,25 +272,6 @@ static int add_collation(CHARSET_INFO *cs)
  return MY_XML_OK;
}

#ifdef HAVE_CHARSET_ucs2
static my_bool init_uca_charsets()
{
  my_tailoring *t;
  CHARSET_INFO cs= my_charset_ucs2_general_uca;
  char name[64];
  
  cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE;
  for (t= tailoring; t->tailoring; t++)
  {
    cs.number= 128 + t->number;
    cs.tailoring= t->tailoring;
    cs.name= name;
    sprintf(name, "ucs2_%s_ci", t->name);
    add_collation(&cs);
  }
  return 0;
}
#endif

#define MY_MAX_ALLOWED_BUF 1024*1024
#define MY_CHARSET_INDEX "Index.xml"
@@ -515,9 +375,6 @@ static my_bool init_available_charsets(myf myflags)

    bzero(&all_charsets,sizeof(all_charsets));
    init_compiled_charsets(myflags);
#ifdef HAVE_CHARSET_ucs2
    init_uca_charsets();
#endif
    
    /* Copy compiled charsets */
    for (cs=all_charsets;
+333 −1
Original line number Diff line number Diff line
@@ -6521,6 +6521,104 @@ NULL ,page0F9data,page0FAdata,page0FBdata,
page0FCdata,page0FDdata,page0FEdata,page0FFdata
};

/*
  Some sources treat LETTER A WITH DIARESIS (00E4,00C4)
  secondary greater than LETTER AE (00E6,00C6).
  http://www.evertype.com/alphabets/icelandic.pdf
  http://developer.mimer.com/collations/charts/icelandic.htm

  Other sources do not provide any special rules
  for LETTER A WITH DIARESIS:
  http://www.omniglot.com/writing/icelandic.htm
  http://en.wikipedia.org/wiki/Icelandic_alphabet
  http://oss.software.ibm.com/icu/charts/collation/is.html

  Let's go the first way.
*/

static const char icelandic[]=
    "& A < \\u00E1 <<< \\u00C1 "
    "& D < \\u00F0 <<< \\u00D0 "
    "& E < \\u00E9 <<< \\u00C9 "
    "& I < \\u00ED <<< \\u00CD "
    "& O < \\u00F3 <<< \\u00D3 "
    "& U < \\u00FA <<< \\u00DA "
    "& Y < \\u00FD <<< \\u00DD "
    "& Z < \\u00FE <<< \\u00DE "
        "< \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
        "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "
        "< \\u00E5 <<< \\u00C5 ";

/*
  Some sources treat I and Y primary different.
  Other sources treat I and Y the same on primary level.
  We'll go the first way.
*/

static const char latvian[]=
    "& C < \\u010D <<< \\u010C "
    "& G < \\u0123 <<< \\u0122 "
    "& I < \\u0079 <<< \\u0059 "
    "& K < \\u0137 <<< \\u0136 "
    "& L < \\u013C <<< \\u013B "
    "& N < \\u0146 <<< \\u0145 "
    "& R < \\u0157 <<< \\u0156 "
    "& S < \\u0161 <<< \\u0160 "
    "& Z < \\u017E <<< \\u017D ";


static const char romanian[]=
    "& A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 "
    "& I < \\u00EE <<< \\u00CE "
    "& S < \\u0219 <<< \\u0218 << \\u015F <<< \\u015E "
    "& T < \\u021B <<< \\u021A << \\u0163 <<< \\u0162 ";

static const char slovenian[]=
    "& C < \\u010D <<< \\u010C "
    "& S < \\u0161 <<< \\u0160 "
    "& Z < \\u017E <<< \\u017D ";
    

static const char polish[]=
    "& A < \\u0105 <<< \\u0104 "
    "& C < \\u0107 <<< \\u0106 "
    "& E < \\u0119 <<< \\u0118 "
    "& L < \\u0142 <<< \\u0141 "
    "& N < \\u0144 <<< \\u0143 "
    "& O < \\u00F3 <<< \\u00D3 "
    "& S < \\u015B <<< \\u015A "
    "& Z < \\u017A <<< \\u017B ";

static const char estonian[]=
    "& S < \\u0161 <<< \\u0160 "
       " < \\u007A <<< \\u005A "
       " < \\u017E <<< \\u017D "
    "& W < \\u00F5 <<< \\u00D5 "
        "< \\u00E4 <<< \\u00C4 "
        "< \\u00F6 <<< \\u00D6 "
        "< \\u00FC <<< \\u00DC ";

static const char spanish[]= "& N < \\u00F1 <<< \\u00D1 ";

/*
  Some sources treat V and W as similar on primary level.
  We'll treat V and W as different on primary level.
*/
    
static const char swedish[]=
    "& Y <<\\u00FC <<< \\u00DC "
    "& Z < \\u00E5 <<< \\u00C5 "
        "< \\u00E4 <<< \\u00C4 << \\u00E6 <<< \\u00C6 "
        "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 ";

static const char turkish[]=
    "& C < \\u00E7 <<< \\u00C7 "
    "& G < \\u011F <<< \\u011E "
    "& H < \\u0131 <<< \\u0049 "
    "& O < \\u00F6 <<< \\u00D6 "
    "& S < \\u015F <<< \\u015E "
    "& U < \\u00FC <<< \\u00DC ";
    

/*
  Unicode Collation Algorithm:
@@ -7509,7 +7607,7 @@ CHARSET_INFO my_charset_ucs2_general_uca=
    45,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_general_uca",	/* name         */
    "ucs2_uca_ci",	/* name         */
    "",			/* comment      */
    NULL,		/* tailoring    */
    NULL,		/* ctype        */
@@ -7531,4 +7629,238 @@ CHARSET_INFO my_charset_ucs2_general_uca=
};


CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
{
    128,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_icelandic_ci",/* name         */
    "",			/* comment      */
    icelandic,		/* tailoring    */
    NULL,		/* ctype        */
    NULL,		/* to_lower     */
    NULL,		/* to_upper     */
    NULL,		/* sort_order   */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    8,			/* strxfrm_multiply */
    2,			/* mbminlen     */
    2,			/* mbmaxlen     */
    9,			/* min_sort_char */
    0xFFFF,		/* max_sort_char */
    &my_charset_ucs2_handler,
    &my_collation_ucs2_uca_handler
};

CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
{
    129,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_latvian_ci",	/* name         */
    "",			/* comment      */
    latvian,		/* tailoring    */
    NULL,		/* ctype        */
    NULL,		/* to_lower     */
    NULL,		/* to_upper     */
    NULL,		/* sort_order   */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    8,			/* strxfrm_multiply */
    2,			/* mbminlen     */
    2,			/* mbmaxlen     */
    9,			/* min_sort_char */
    0xFFFF,		/* max_sort_char */
    &my_charset_ucs2_handler,
    &my_collation_ucs2_uca_handler
};

CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
{
    130,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_romanian_ci",	/* name         */
    "",			/* comment      */
    romanian,		/* tailoring    */
    NULL,		/* ctype        */
    NULL,		/* to_lower     */
    NULL,		/* to_upper     */
    NULL,		/* sort_order   */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    8,			/* strxfrm_multiply */
    2,			/* mbminlen     */
    2,			/* mbmaxlen     */
    9,			/* min_sort_char */
    0xFFFF,		/* max_sort_char */
    &my_charset_ucs2_handler,
    &my_collation_ucs2_uca_handler
};

CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
{
    131,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_slovenian_ci",/* name         */
    "",			/* comment      */
    slovenian,		/* tailoring    */
    NULL,		/* ctype        */
    NULL,		/* to_lower     */
    NULL,		/* to_upper     */
    NULL,		/* sort_order   */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    8,			/* strxfrm_multiply */
    2,			/* mbminlen     */
    2,			/* mbmaxlen     */
    9,			/* min_sort_char */
    0xFFFF,		/* max_sort_char */
    &my_charset_ucs2_handler,
    &my_collation_ucs2_uca_handler
};

CHARSET_INFO my_charset_ucs2_polish_uca_ci=
{
    132,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_polish_ci",	/* name         */
    "",			/* comment      */
    polish,		/* tailoring    */
    NULL,		/* ctype        */
    NULL,		/* to_lower     */
    NULL,		/* to_upper     */
    NULL,		/* sort_order   */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    8,			/* strxfrm_multiply */
    2,			/* mbminlen     */
    2,			/* mbmaxlen     */
    9,			/* min_sort_char */
    0xFFFF,		/* max_sort_char */
    &my_charset_ucs2_handler,
    &my_collation_ucs2_uca_handler
};

CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
{
    133,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_estonian_ci",	/* name         */
    "",			/* comment      */
    estonian,		/* tailoring    */
    NULL,		/* ctype        */
    NULL,		/* to_lower     */
    NULL,		/* to_upper     */
    NULL,		/* sort_order   */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    8,			/* strxfrm_multiply */
    2,			/* mbminlen     */
    2,			/* mbmaxlen     */
    9,			/* min_sort_char */
    0xFFFF,		/* max_sort_char */
    &my_charset_ucs2_handler,
    &my_collation_ucs2_uca_handler
};

CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
{
    134,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_spanish_ci",	/* name         */
    "",			/* comment      */
    spanish,		/* tailoring    */
    NULL,		/* ctype        */
    NULL,		/* to_lower     */
    NULL,		/* to_upper     */
    NULL,		/* sort_order   */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    8,			/* strxfrm_multiply */
    2,			/* mbminlen     */
    2,			/* mbmaxlen     */
    9,			/* min_sort_char */
    0xFFFF,		/* max_sort_char */
    &my_charset_ucs2_handler,
    &my_collation_ucs2_uca_handler
};

CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
{
    135,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_swedish_ci",	/* name         */
    "",			/* comment      */
    swedish,		/* tailoring    */
    NULL,		/* ctype        */
    NULL,		/* to_lower     */
    NULL,		/* to_upper     */
    NULL,		/* sort_order   */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    8,			/* strxfrm_multiply */
    2,			/* mbminlen     */
    2,			/* mbmaxlen     */
    9,			/* min_sort_char */
    0xFFFF,		/* max_sort_char */
    &my_charset_ucs2_handler,
    &my_collation_ucs2_uca_handler
};

CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
{
    136,0,0,		/* number       */
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
    "ucs2",		/* cs name    */
    "ucs2_turkish_ci",	/* name         */
    "",			/* comment      */
    turkish,		/* tailoring    */
    NULL,		/* ctype        */
    NULL,		/* to_lower     */
    NULL,		/* to_upper     */
    NULL,		/* sort_order   */
    NULL,		/* sort_order_big*/
    NULL,		/* tab_to_uni   */
    NULL,		/* tab_from_uni */
    NULL,		/* state_map    */
    NULL,		/* ident_map    */
    8,			/* strxfrm_multiply */
    2,			/* mbminlen     */
    2,			/* mbmaxlen     */
    9,			/* min_sort_char */
    0xFFFF,		/* max_sort_char */
    &my_charset_ucs2_handler,
    &my_collation_ucs2_uca_handler
};

#endif