Commit 74434833 authored by tim@localhost.polyesthetic.msg's avatar tim@localhost.polyesthetic.msg
Browse files

Associate a charset directly with its number in the Index file, and

propogate those changes through the code.  This is so that there can
be holes in the list of charsets without breaking old tables.
parent dba8816b
Loading
Loading
Loading
Loading
+4 −11
Original line number Diff line number Diff line
@@ -1578,15 +1578,6 @@ do
  fi
done
  
default_charset_has_source=0
for cs in $COMPILED_CHARSETS
do
  if test $cs = $default_charset
  then
    default_charset_has_source=1
  fi
done
  
CHARSET_SRCS=""
CHARSETS_NEED_SOURCE=""
CHARSET_DECLARATIONS=""
@@ -1600,8 +1591,10 @@ index_file="$srcdir/sql/share/charsets/Index"
for c in $CHARSETS
do
  # get the charset number from $index_file
  subpat='^'"${c}"'[[\t ]]*#'
  number=`$AWK 'sub("'"$subpat"'", "") { print }' $index_file`
changequote(,)dnl
  subpat='^'"${c}"'[ 	][ 	]*\([0-9][0-9]*\)[^0-9]*$'
  number=`sed -e "/$subpat/!d" -e 's//\1/' $index_file`
changequote([,])dnl
  # some sanity checking....
  if test X"$number" = X
  then
+61 −34
Original line number Diff line number Diff line
@@ -21,9 +21,14 @@
#include <m_string.h>
#include <my_dir.h>

typedef struct cs_id_st {
  char *name;
  uint number;
} CS_ID;

const char *charsets_dir = NULL;
static DYNAMIC_ARRAY cs_info_table;
static TYPELIB available_charsets;
static CS_ID *available_charsets;
static int charset_initialized=0;

#define MAX_LINE  1024
@@ -46,6 +51,24 @@ uint compiled_charset_number(const char *name);
const char *compiled_charset_name(uint charset_number);


static uint num_from_csname(CS_ID *cs, const char *name)
{
  CS_ID *c;
  for (c = cs; c; ++c)
    if (!strcmp(c->name, name))
      return c->number;
  return 0;   /* this mimics find_type() */
}

static char *name_from_csnum(CS_ID *cs, uint number)
{
  CS_ID *c;
  for (c = cs; c; ++c)
    if (c->number == number)
      return c->name;
  return "?";   /* this mimics find_type() */
}

static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf)
{
  char *endptr=fb->p;
@@ -92,12 +115,12 @@ static char *get_charsets_dir(char *buf)
}


static my_bool read_charset_index(TYPELIB *charsets, myf myflags)
static my_bool read_charset_index(CS_ID **charsets, myf myflags)
{
  struct simpleconfig_buf_st fb;
  char buf[MAX_LINE];
  char buf[MAX_LINE], num_buf[MAX_LINE];
  DYNAMIC_ARRAY cs;
  my_string s;
  CS_ID *csid;

  strmov(get_charsets_dir(buf), "Index");

@@ -106,36 +129,42 @@ static my_bool read_charset_index(TYPELIB *charsets, myf myflags)
  fb.buf[0] = '\0';
  fb.p = fb.buf;

  if (init_dynamic_array(&cs, sizeof(my_string), 32, 32))
  if (init_dynamic_array(&cs, sizeof(CS_ID *), 32, 32))
    return TRUE;

  while (!get_word(&fb, buf))
  while (!get_word(&fb, buf) && !get_word(&fb, num_buf))
  {
    uint csnum;
    uint length;
    if (!(s= (char*) my_once_alloc(length= (uint) strlen(buf)+1, myflags)))

    if (!(csnum = atoi(num_buf)))
    {
      /* corrupt Index file */
      my_fclose(fb.f,myflags);
      return TRUE;
    }
    memcpy(s,buf,length);
    insert_dynamic(&cs, (gptr) &s);

    if (!(csid = (CS_ID*) my_once_alloc(sizeof(CS_ID), myflags)) ||
        !(csid->name=
           (char*) my_once_alloc(length= (uint) strlen(buf)+1, myflags)))
    {
      my_fclose(fb.f,myflags);
      return TRUE;
    }
    memcpy(csid->name,buf,length);
    csid->number = csnum;

    insert_dynamic(&cs, (gptr) &csid);
  }
  my_fclose(fb.f,myflags);

  /* I seriously doubt this is the best way to initialize this
   * TYPELIB from the Index file.  But it's the best way I could
   * come up with right now. */

  charsets->count = cs.elements;
  charsets->name  = "";
  if (!(charsets->type_names =
	(const char **) my_once_alloc((cs.elements + 1) * sizeof(const char *),
				      myflags)))
  if (!(*charsets =
      (CS_ID *) my_once_alloc((cs.elements + 1) * sizeof(CS_ID *), myflags)))
    return TRUE;
  /* unwarranted chumminess with dynamic_array implementation? */
  memcpy((char*) charsets->type_names, cs.buffer,
	 cs.elements * sizeof(my_string *));
  charsets->type_names[cs.elements] = NullS;
  memcpy((byte *) *charsets, cs.buffer, cs.elements * sizeof(CS_ID *));
  (*charsets)[cs.elements] = NULL;
  delete_dynamic(&cs);  

  return FALSE;
@@ -164,7 +193,7 @@ static my_bool init_available_charsets(myf myflags)
    charset_initialized=1;
    pthread_mutex_unlock(&THR_LOCK_charset);
  }
  return error || available_charsets.count == 0;
  return error || !available_charsets[0];
}


@@ -193,7 +222,7 @@ static my_bool fill_array(uchar *array, int sz, struct simpleconfig_buf_st *fb)
static void get_charset_conf_name(uint cs_number, char *buf)
{
  strxmov(get_charsets_dir(buf),
          get_type(&available_charsets, cs_number - 1), ".conf", NullS);
          name_from_csnum(&available_charsets, cs_number), ".conf", NullS);
}


@@ -237,7 +266,7 @@ uint get_charset_number(const char *charset_name)
  if (error)
    return compiled_charset_number(charset_name);
  else
    return find_type((char*)charset_name, &available_charsets, 1);
    return num_from_csname((char*)charset_name, &available_charsets, 1);
}

const char *get_charset_name(uint charset_number)
@@ -247,7 +276,7 @@ const char *get_charset_name(uint charset_number)
  if (error)
    return compiled_charset_name(charset_number);
  else
    return get_type(&available_charsets, charset_number - 1);
    return name_from_csnum(&available_charsets, charset_number);
}


@@ -452,29 +481,27 @@ char * list_charsets(myf want_flags)

  if (want_flags & MY_CONFIG_SETS)
  {
    uint i;
    const char *cs_name;
    CS_ID *c;
    char buf[FN_REFLEN];
    MY_STAT stat;

    for (i = 0; i < available_charsets.count; i++)
    for (c = available_charsets; *c; ++c)
    {
      cs_name = get_type(&available_charsets, i);
      if (charset_in_string(cs_name, &s))
      if (charset_in_string(c->name, &s))
        continue;
      get_charset_conf_name(i + 1, buf);
      get_charset_conf_name(c->number, buf);
      if (!my_stat(buf, &stat, MYF(0)))
        continue;       /* conf file doesn't exist */
      dynstr_append(&s, cs_name);
      dynstr_append(&s, c->name);
      dynstr_append(&s, " ");
    }
  }

  if (want_flags & MY_INDEX_SETS)
  {
    uint i;
    for (i = 0; i < available_charsets.count; i++)
      charset_append(&s, get_type(&available_charsets, i));
    CS_ID *c;
    for (c = available_charsets; *c; ++c)
      charset_append(&s, c->name);
  }

  if (want_flags & MY_LOADED_SETS)
+29 −32
Original line number Diff line number Diff line
@@ -2,36 +2,33 @@
#
# This file lists all of the available character sets.

# THE ORDER IN WHICH CHARACTER SETS ARE LISTED IS IMPORTANT.  See the
# README file in this directory for details.


big5 		  #  1
czech 		  #  2
dec8 		  #  3
dos 		  #  4
german1 	  #  5
hp8 		  #  6
koi8_ru 	  #  7
latin1		  #  8
latin2		  #  9
swe7 		  # 10
usa7 		  # 11
ujis 		  # 12
sjis 		  # 13
cp1251		  # 14
danish		  # 15
hebrew		  # 16
win1251 	  # 17
tis620		  # 18
euc_kr		  # 19
estonia 	  # 20
hungarian 	  # 21
koi8_ukr 	  # 22
win1251ukr 	  # 23
gb2312		  # 24
greek 		  # 25
win1250 	  # 26
croat 		  # 27
gbk 		  # 28
cp1257		  # 29
big5 		   1
czech 		   2
dec8 		   3
dos 		   4
german1 	   5
hp8 		   6
koi8_ru 	   7
latin1		   8
latin2		   9
swe7 		  10
usa7 		  11
ujis 		  12
sjis 		  13
cp1251		  14
danish		  15
hebrew		  16
win1251 	  17
tis620		  18
euc_kr		  19
estonia 	  20
hungarian 	  21
koi8_ukr 	  22
win1251ukr 	  23
gb2312		  24
greek 		  25
win1250 	  26
croat 		  27
gbk 		  28
cp1257		  29
+3 −6
Original line number Diff line number Diff line
@@ -9,10 +9,9 @@ different character sets. It contains:
Index
    The Index file lists all of the available charset configurations.

    THE ORDER OF THE CHARACTER SETS IN THIS FILE IS SIGNIFICANT.
    The first character set is number 1, the second is number 2, etc.  The
    number is stored IN THE DATABASE TABLE FILES and must not be changed.
    Always add new character sets to the end of the list, so that the
    Each charset is paired with a number.  The number is stored
    IN THE DATABASE TABLE FILES and must not be changed.  Always
    add new character sets to the end of the list, so that the
    numbers of the other character sets will not be changed.

Compiled in or configuration file?
@@ -39,5 +38,3 @@ Syntax of configuration files
    number in hexadecimal format.  The ctype array takes up the first
    257 words; the to_lower, to_upper and sort_order arrays take up 256
    words each after that.

    The Index file is simply a list of the available character sets.