Commit 4fa4383b authored by unknown's avatar unknown
Browse files

WL#1386 - CTYPE table for unicode character sets

A prerequisite for several fulltext and XML bugs.
MY_CHARSET_HANDLER now has a new function "ctype"
to detect a type of the next character in a string
(i.e. digit, letter, space, punctuation, control, etc),
which now works correctly for both 8bit and multibyte charsets.
Previously only 8bit charsets worked correctly,
while any multibyte character was considered as letter
in multibyte charsets.
Many files:
  Adding new function
Makefile.am:
  Adding build rules for uctypedump,
  a dump tool to create my_uctype.h
  using Unicode Character Database file.
m_ctype.h:
  Adding declaration of my_uni_ctype,
  ctype data for Unicode.
  Adding new member into MY_CHARSET_HANDLER
Makefile.am:
  Adding my_uctype.h into noinst_HEADERS
my_uctype.h, uctypedump.c:
  new files:
  ctype data for unicode,
  and the tool to generate it from 
  a Unicode Character Database file.



include/Makefile.am:
  Adding my_uctype.h
include/m_ctype.h:
  Adding declaration of my_uni_ctype,
  ctype data for Unicode.
strings/Makefile.am:
  Adding build rules for uctypedump,
  a dump tool to create my_uctype.h
  using Unicode Character Database file.
strings/ctype-big5.c:
  Adding new function
strings/ctype-bin.c:
  Adding new function
strings/ctype-cp932.c:
  Adding new function
strings/ctype-euc_kr.c:
  Adding new function
strings/ctype-eucjpms.c:
  Adding new function
strings/ctype-gb2312.c:
  Adding new function
strings/ctype-gbk.c:
  Adding new function
strings/ctype-latin1.c:
  Adding new function
strings/ctype-mb.c:
  Adding new function
strings/ctype-simple.c:
  Adding new function
strings/ctype-sjis.c:
  Adding new function
strings/ctype-tis620.c:
  Adding new function
strings/ctype-ucs2.c:
  Adding new function
strings/ctype-ujis.c:
  Adding new function
strings/ctype-utf8.c:
  Adding new function
parent 55c304a1
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@ pkginclude_HEADERS = my_dbug.h m_string.h my_sys.h my_list.h my_xml.h \
			sslopt-vars.h sslopt-case.h sql_common.h keycache.h \
			mysql_time.h plugin.h $(BUILT_SOURCES)
noinst_HEADERS =	config-win.h config-os2.h config-netware.h \
			heap.h my_bitmap.h\
			heap.h my_bitmap.h my_uctype.h \
			myisam.h myisampack.h myisammrg.h ft_global.h\
			mysys_err.h my_base.h help_start.h help_end.h \
			my_nosys.h my_alarm.h queues.h rijndael.h sha1.h \
+16 −0
Original line number Diff line number Diff line
@@ -47,6 +47,15 @@ typedef struct unicase_info_st
extern MY_UNICASE_INFO *my_unicase_default[256];
extern MY_UNICASE_INFO *my_unicase_turkish[256];

typedef struct uni_ctype_st
{
  unsigned char  pctype;
  unsigned char  *ctype;
} MY_UNI_CTYPE;

extern MY_UNI_CTYPE my_uni_ctype[256];


#define MY_CS_ILSEQ	0
#define MY_CS_ILUNI	0
#define MY_CS_TOOSMALL	-1
@@ -165,6 +174,10 @@ typedef struct my_charset_handler_st
  int (*wc_mb)(struct charset_info_st *cs,my_wc_t wc,
	       unsigned char *s,unsigned char *e);
  
  /* CTYPE scanner */
  int (*ctype)(struct charset_info_st *cs, int *ctype,
               const unsigned char *s, const unsigned char *e);
  
  /* Functions for case and sort convertion */
  void    (*caseup_str)(struct charset_info_st *, char *);
  void    (*casedn_str)(struct charset_info_st *, char *);
@@ -308,6 +321,9 @@ extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,const uchar *e);
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);

int my_mb_ctype_8bit(CHARSET_INFO *,int *, const uchar *,const uchar *);
int my_mb_ctype_mb(CHARSET_INFO *,int *, const uchar *,const uchar *);

ulong my_scan_8bit(CHARSET_INFO *cs, const char *b, const char *e, int sq);

int my_snprintf_8bit(struct charset_info_st *, char *to, uint n,

include/my_uctype.h

0 → 100644
+1464 −0

File added.

Preview size limit exceeded, changes collapsed.

+4 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@ endif

libmystrings_a_SOURCES = $(ASRCS) $(CSRCS)
noinst_PROGRAMS = conf_to_src
CLEANFILES = str_test uctypedump test_decimal
# Default charset definitions
EXTRA_DIST =		ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-win1250ch.c \
			ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-utf8.c \
@@ -77,6 +78,9 @@ FLAGS=$(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) @NOINST_LDFLAGS@
str_test: str_test.c $(pkglib_LIBRARIES)
	$(LINK) $(FLAGS) -DMAIN $(INCLUDES) $(srcdir)/str_test.c $(LDADD) $(pkglib_LIBRARIES)

uctypedump: uctypedump.c
	$(LINK) $(INCLUDES) $(srcdir)/uctypedump.c

test_decimal$(EXEEXT): decimal.c $(pkglib_LIBRARIES)
	$(CP) $(srcdir)/decimal.c ./test_decimal.c
	$(LINK) $(FLAGS) -DMAIN  ./test_decimal.c $(LDADD) $(pkglib_LIBRARIES)
+1 −0
Original line number Diff line number Diff line
@@ -6356,6 +6356,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
  my_numcells_8bit,
  my_mb_wc_big5,	/* mb_wc       */
  my_wc_mb_big5,	/* wc_mb       */
  my_mb_ctype_mb,
  my_caseup_str_mb,
  my_casedn_str_mb,
  my_caseup_mb,
Loading