Loading mysql-test/r/xml.result +27 −1 Original line number Diff line number Diff line Loading @@ -570,7 +570,7 @@ select extractvalue('<a>a<b>B</b></a>','a|/b'); extractvalue('<a>a<b>B</b></a>','a|/b') a select extractvalue('<a>A</a>','/<a>'); ERROR HY000: XPATH syntax error: '<a>' ERROR HY000: XPATH error: comparison of two nodesets is not supported: '<a>' select extractvalue('<a><b>b</b><b!>b!</b!></a>','//b!'); ERROR HY000: XPATH syntax error: '!' select extractvalue('<a>A<b>B<c>C</c></b></a>','/a/descendant::*'); Loading Loading @@ -710,3 +710,29 @@ Data select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something'); extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something') Otherdata select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02'); ERROR HY000: XPATH syntax error: '02' select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*'); extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*') NULL Warnings: Warning 1512 Incorrect XML value: 'parse error at line 1 pos 13: unknown token unexpected (ident or '/' wanted)' select extractValue('<.>test</.>','//*'); extractValue('<.>test</.>','//*') NULL Warnings: Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)' select extractValue('<->test</->','//*'); extractValue('<->test</->','//*') NULL Warnings: Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)' select extractValue('<:>test</:>','//*'); extractValue('<:>test</:>','//*') test select extractValue('<_>test</_>','//*'); extractValue('<_>test</_>','//*') test select extractValue('<x.-_:>test</x.-_:>','//*'); extractValue('<x.-_:>test</x.-_:>','//*') test mysql-test/t/xml.test +16 −0 Original line number Diff line number Diff line Loading @@ -360,3 +360,19 @@ select extractValue('<ns:element xmlns:ns="myns">a</ns:element>','/ns:element/@x # select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/foo.bar'); select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something'); # # Bug#20854 XML functions: wrong result in ExtractValue # --error 1105 select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02'); select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*'); # dot and dash are bad identtifier start character select extractValue('<.>test</.>','//*'); select extractValue('<->test</->','//*'); # semicolon is good identifier start character select extractValue('<:>test</:>','//*'); # underscore is good identifier start character select extractValue('<_>test</_>','//*'); # dot, dash, underscore and semicolon are good identifier middle characters select extractValue('<x.-_:>test</x.-_:>','//*'); sql/item_xmlfunc.cc +58 −6 Original line number Diff line number Diff line Loading @@ -105,6 +105,7 @@ typedef struct my_xpath_st String *context_cache; /* last context provider */ String *pxml; /* Parsed XML, an array of MY_XML_NODE */ CHARSET_INFO *cs; /* character set/collation string comparison */ int error; } MY_XPATH; Loading Loading @@ -913,7 +914,9 @@ static Item *eq_func_reverse(int oper, Item *a, Item *b) RETURN The newly created item. */ static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) static Item *create_comparator(MY_XPATH *xpath, int oper, MY_XPATH_LEX *context, Item *a, Item *b) { if (a->type() != Item::XPATH_NODESET && b->type() != Item::XPATH_NODESET) Loading @@ -923,6 +926,13 @@ static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) else if (a->type() == Item::XPATH_NODESET && b->type() == Item::XPATH_NODESET) { uint len= context->end - context->beg; set_if_bigger(len, 32); my_printf_error(ER_UNKNOWN_ERROR, "XPATH error: " "comparison of two nodesets is not supported: '%.*s'", MYF(0), len, context->beg); return 0; // TODO: Comparison of two nodesets } else Loading Loading @@ -1430,7 +1440,7 @@ my_xpath_lex_scan(MY_XPATH *xpath, static int my_xpath_parse_term(MY_XPATH *xpath, int term) { if (xpath->lasttok.term == term) if (xpath->lasttok.term == term && !xpath->error) { xpath->prevtok= xpath->lasttok; my_xpath_lex_scan(xpath, &xpath->lasttok, Loading Loading @@ -1558,8 +1568,9 @@ static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath) return my_xpath_parse_RelativeLocationPath(xpath); } return my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF) || my_xpath_parse_RelativeLocationPath(xpath); return (xpath->error == 0); } Loading Loading @@ -1596,8 +1607,11 @@ static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath) "*", 1, xpath->pxml, 1); if (!my_xpath_parse_Step(xpath)) { xpath->error= 1; return 0; } } return 1; } Loading Loading @@ -1633,10 +1647,16 @@ my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath) xpath->context_cache= context_cache; if(!my_xpath_parse_PredicateExpr(xpath)) { xpath->error= 1; return 0; } if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB)) { xpath->error= 1; return 0; } xpath->item= nodeset2bool(xpath, xpath->item); Loading Loading @@ -1893,7 +1913,10 @@ static int my_xpath_parse_UnionExpr(MY_XPATH *xpath) if (!my_xpath_parse_PathExpr(xpath) || xpath->item->type() != Item::XPATH_NODESET) { xpath->error= 1; return 0; } xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml); } return 1; Loading Loading @@ -1929,6 +1952,7 @@ static int my_xpath_parse_PathExpr(MY_XPATH *xpath) { return my_xpath_parse_LocationPath(xpath) || my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath); } Loading Loading @@ -1975,7 +1999,10 @@ static int my_xpath_parse_OrExpr(MY_XPATH *xpath) { Item *prev= xpath->item; if (!my_xpath_parse_AndExpr(xpath)) { return 0; xpath->error= 1; } xpath->item= new Item_cond_or(nodeset2bool(xpath, prev), nodeset2bool(xpath, xpath->item)); } Loading Loading @@ -2003,7 +2030,10 @@ static int my_xpath_parse_AndExpr(MY_XPATH *xpath) { Item *prev= xpath->item; if (!my_xpath_parse_EqualityExpr(xpath)) { xpath->error= 1; return 0; } xpath->item= new Item_cond_and(nodeset2bool(xpath,prev), nodeset2bool(xpath,xpath->item)); Loading Loading @@ -2057,17 +2087,26 @@ static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath) } static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath) { MY_XPATH_LEX operator_context; if (!my_xpath_parse_RelationalExpr(xpath)) return 0; operator_context= xpath->lasttok; while (my_xpath_parse_EqualityOperator(xpath)) { Item *prev= xpath->item; int oper= xpath->extra; if (!my_xpath_parse_RelationalExpr(xpath)) { xpath->error= 1; return 0; } if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) if (!(xpath->item= create_comparator(xpath, oper, &operator_context, prev, xpath->item))) return 0; operator_context= xpath->lasttok; } return 1; } Loading Loading @@ -2109,18 +2148,25 @@ static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath) } static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath) { MY_XPATH_LEX operator_context; if (!my_xpath_parse_AdditiveExpr(xpath)) return 0; operator_context= xpath->lasttok; while (my_xpath_parse_RelationalOperator(xpath)) { Item *prev= xpath->item; int oper= xpath->extra; if (!my_xpath_parse_AdditiveExpr(xpath)) { xpath->error= 1; return 0; } if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) if (!(xpath->item= create_comparator(xpath, oper, &operator_context, prev, xpath->item))) return 0; operator_context= xpath->lasttok; } return 1; } Loading Loading @@ -2153,7 +2199,10 @@ static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath) int oper= xpath->prevtok.term; Item *prev= xpath->item; if (!my_xpath_parse_MultiplicativeExpr(xpath)) { xpath->error= 1; return 0; } if (oper == MY_XPATH_LEX_PLUS) xpath->item= new Item_func_plus(prev, xpath->item); Loading Loading @@ -2198,7 +2247,10 @@ static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath) int oper= xpath->prevtok.term; Item *prev= xpath->item; if (!my_xpath_parse_UnaryExpr(xpath)) { xpath->error= 1; return 0; } switch (oper) { case MY_XPATH_LEX_ASTERISK: Loading strings/xml.c +51 −9 Original line number Diff line number Diff line Loading @@ -19,6 +19,7 @@ #include "my_xml.h" #define MY_XML_UNKNOWN 'U' #define MY_XML_EOF 'E' #define MY_XML_STRING 'S' #define MY_XML_IDENT 'I' Loading @@ -39,6 +40,46 @@ typedef struct xml_attr_st } MY_XML_ATTR; /* XML ctype: */ #define MY_XML_ID0 0x01 /* Identifier initial character */ #define MY_XML_ID1 0x02 /* Identifier medial character */ #define MY_XML_SPC 0x08 /* Spacing character */ /* http://www.w3.org/TR/REC-xml/ [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender [5] Name ::= (Letter | '_' | ':') (NameChar)* */ static char my_xml_ctype[256]= { /*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0, /*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */ /*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */ /*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */ /*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */ /*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */ /*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */ /*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }; #define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC) #define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0) #define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1) static const char *lex2str(int lex) { switch(lex) Loading @@ -56,13 +97,13 @@ static const char *lex2str(int lex) case MY_XML_QUESTION: return "'?'"; case MY_XML_EXCLAM: return "'!'"; } return "UNKNOWN"; return "unknown token"; } static void my_xml_norm_text(MY_XML_ATTR *a) { for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ ); for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- ); for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ ); for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- ); } Loading @@ -70,7 +111,7 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) { int lex; for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++); for( ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++); if (p->cur >= p->end) { Loading Loading @@ -124,16 +165,17 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) my_xml_norm_text(a); lex=MY_XML_STRING; } else else if (my_xml_is_id0(p->cur[0])) { for(; (p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]); p->cur++) {} p->cur++; while (p->cur < p->end && my_xml_is_id1(p->cur[0])) p->cur++; a->end=p->cur; my_xml_norm_text(a); lex=MY_XML_IDENT; } else lex= MY_XML_UNKNOWN; #if 0 printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg); Loading Loading
mysql-test/r/xml.result +27 −1 Original line number Diff line number Diff line Loading @@ -570,7 +570,7 @@ select extractvalue('<a>a<b>B</b></a>','a|/b'); extractvalue('<a>a<b>B</b></a>','a|/b') a select extractvalue('<a>A</a>','/<a>'); ERROR HY000: XPATH syntax error: '<a>' ERROR HY000: XPATH error: comparison of two nodesets is not supported: '<a>' select extractvalue('<a><b>b</b><b!>b!</b!></a>','//b!'); ERROR HY000: XPATH syntax error: '!' select extractvalue('<a>A<b>B<c>C</c></b></a>','/a/descendant::*'); Loading Loading @@ -710,3 +710,29 @@ Data select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something'); extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something') Otherdata select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02'); ERROR HY000: XPATH syntax error: '02' select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*'); extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*') NULL Warnings: Warning 1512 Incorrect XML value: 'parse error at line 1 pos 13: unknown token unexpected (ident or '/' wanted)' select extractValue('<.>test</.>','//*'); extractValue('<.>test</.>','//*') NULL Warnings: Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)' select extractValue('<->test</->','//*'); extractValue('<->test</->','//*') NULL Warnings: Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)' select extractValue('<:>test</:>','//*'); extractValue('<:>test</:>','//*') test select extractValue('<_>test</_>','//*'); extractValue('<_>test</_>','//*') test select extractValue('<x.-_:>test</x.-_:>','//*'); extractValue('<x.-_:>test</x.-_:>','//*') test
mysql-test/t/xml.test +16 −0 Original line number Diff line number Diff line Loading @@ -360,3 +360,19 @@ select extractValue('<ns:element xmlns:ns="myns">a</ns:element>','/ns:element/@x # select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/foo.bar'); select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something'); # # Bug#20854 XML functions: wrong result in ExtractValue # --error 1105 select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02'); select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*'); # dot and dash are bad identtifier start character select extractValue('<.>test</.>','//*'); select extractValue('<->test</->','//*'); # semicolon is good identifier start character select extractValue('<:>test</:>','//*'); # underscore is good identifier start character select extractValue('<_>test</_>','//*'); # dot, dash, underscore and semicolon are good identifier middle characters select extractValue('<x.-_:>test</x.-_:>','//*');
sql/item_xmlfunc.cc +58 −6 Original line number Diff line number Diff line Loading @@ -105,6 +105,7 @@ typedef struct my_xpath_st String *context_cache; /* last context provider */ String *pxml; /* Parsed XML, an array of MY_XML_NODE */ CHARSET_INFO *cs; /* character set/collation string comparison */ int error; } MY_XPATH; Loading Loading @@ -913,7 +914,9 @@ static Item *eq_func_reverse(int oper, Item *a, Item *b) RETURN The newly created item. */ static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) static Item *create_comparator(MY_XPATH *xpath, int oper, MY_XPATH_LEX *context, Item *a, Item *b) { if (a->type() != Item::XPATH_NODESET && b->type() != Item::XPATH_NODESET) Loading @@ -923,6 +926,13 @@ static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) else if (a->type() == Item::XPATH_NODESET && b->type() == Item::XPATH_NODESET) { uint len= context->end - context->beg; set_if_bigger(len, 32); my_printf_error(ER_UNKNOWN_ERROR, "XPATH error: " "comparison of two nodesets is not supported: '%.*s'", MYF(0), len, context->beg); return 0; // TODO: Comparison of two nodesets } else Loading Loading @@ -1430,7 +1440,7 @@ my_xpath_lex_scan(MY_XPATH *xpath, static int my_xpath_parse_term(MY_XPATH *xpath, int term) { if (xpath->lasttok.term == term) if (xpath->lasttok.term == term && !xpath->error) { xpath->prevtok= xpath->lasttok; my_xpath_lex_scan(xpath, &xpath->lasttok, Loading Loading @@ -1558,8 +1568,9 @@ static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath) return my_xpath_parse_RelativeLocationPath(xpath); } return my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF) || my_xpath_parse_RelativeLocationPath(xpath); return (xpath->error == 0); } Loading Loading @@ -1596,8 +1607,11 @@ static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath) "*", 1, xpath->pxml, 1); if (!my_xpath_parse_Step(xpath)) { xpath->error= 1; return 0; } } return 1; } Loading Loading @@ -1633,10 +1647,16 @@ my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath) xpath->context_cache= context_cache; if(!my_xpath_parse_PredicateExpr(xpath)) { xpath->error= 1; return 0; } if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB)) { xpath->error= 1; return 0; } xpath->item= nodeset2bool(xpath, xpath->item); Loading Loading @@ -1893,7 +1913,10 @@ static int my_xpath_parse_UnionExpr(MY_XPATH *xpath) if (!my_xpath_parse_PathExpr(xpath) || xpath->item->type() != Item::XPATH_NODESET) { xpath->error= 1; return 0; } xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml); } return 1; Loading Loading @@ -1929,6 +1952,7 @@ static int my_xpath_parse_PathExpr(MY_XPATH *xpath) { return my_xpath_parse_LocationPath(xpath) || my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath); } Loading Loading @@ -1975,7 +1999,10 @@ static int my_xpath_parse_OrExpr(MY_XPATH *xpath) { Item *prev= xpath->item; if (!my_xpath_parse_AndExpr(xpath)) { return 0; xpath->error= 1; } xpath->item= new Item_cond_or(nodeset2bool(xpath, prev), nodeset2bool(xpath, xpath->item)); } Loading Loading @@ -2003,7 +2030,10 @@ static int my_xpath_parse_AndExpr(MY_XPATH *xpath) { Item *prev= xpath->item; if (!my_xpath_parse_EqualityExpr(xpath)) { xpath->error= 1; return 0; } xpath->item= new Item_cond_and(nodeset2bool(xpath,prev), nodeset2bool(xpath,xpath->item)); Loading Loading @@ -2057,17 +2087,26 @@ static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath) } static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath) { MY_XPATH_LEX operator_context; if (!my_xpath_parse_RelationalExpr(xpath)) return 0; operator_context= xpath->lasttok; while (my_xpath_parse_EqualityOperator(xpath)) { Item *prev= xpath->item; int oper= xpath->extra; if (!my_xpath_parse_RelationalExpr(xpath)) { xpath->error= 1; return 0; } if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) if (!(xpath->item= create_comparator(xpath, oper, &operator_context, prev, xpath->item))) return 0; operator_context= xpath->lasttok; } return 1; } Loading Loading @@ -2109,18 +2148,25 @@ static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath) } static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath) { MY_XPATH_LEX operator_context; if (!my_xpath_parse_AdditiveExpr(xpath)) return 0; operator_context= xpath->lasttok; while (my_xpath_parse_RelationalOperator(xpath)) { Item *prev= xpath->item; int oper= xpath->extra; if (!my_xpath_parse_AdditiveExpr(xpath)) { xpath->error= 1; return 0; } if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) if (!(xpath->item= create_comparator(xpath, oper, &operator_context, prev, xpath->item))) return 0; operator_context= xpath->lasttok; } return 1; } Loading Loading @@ -2153,7 +2199,10 @@ static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath) int oper= xpath->prevtok.term; Item *prev= xpath->item; if (!my_xpath_parse_MultiplicativeExpr(xpath)) { xpath->error= 1; return 0; } if (oper == MY_XPATH_LEX_PLUS) xpath->item= new Item_func_plus(prev, xpath->item); Loading Loading @@ -2198,7 +2247,10 @@ static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath) int oper= xpath->prevtok.term; Item *prev= xpath->item; if (!my_xpath_parse_UnaryExpr(xpath)) { xpath->error= 1; return 0; } switch (oper) { case MY_XPATH_LEX_ASTERISK: Loading
strings/xml.c +51 −9 Original line number Diff line number Diff line Loading @@ -19,6 +19,7 @@ #include "my_xml.h" #define MY_XML_UNKNOWN 'U' #define MY_XML_EOF 'E' #define MY_XML_STRING 'S' #define MY_XML_IDENT 'I' Loading @@ -39,6 +40,46 @@ typedef struct xml_attr_st } MY_XML_ATTR; /* XML ctype: */ #define MY_XML_ID0 0x01 /* Identifier initial character */ #define MY_XML_ID1 0x02 /* Identifier medial character */ #define MY_XML_SPC 0x08 /* Spacing character */ /* http://www.w3.org/TR/REC-xml/ [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender [5] Name ::= (Letter | '_' | ':') (NameChar)* */ static char my_xml_ctype[256]= { /*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0, /*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */ /*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */ /*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */ /*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */ /*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */ /*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */ /*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }; #define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC) #define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0) #define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1) static const char *lex2str(int lex) { switch(lex) Loading @@ -56,13 +97,13 @@ static const char *lex2str(int lex) case MY_XML_QUESTION: return "'?'"; case MY_XML_EXCLAM: return "'!'"; } return "UNKNOWN"; return "unknown token"; } static void my_xml_norm_text(MY_XML_ATTR *a) { for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ ); for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- ); for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ ); for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- ); } Loading @@ -70,7 +111,7 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) { int lex; for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++); for( ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++); if (p->cur >= p->end) { Loading Loading @@ -124,16 +165,17 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) my_xml_norm_text(a); lex=MY_XML_STRING; } else else if (my_xml_is_id0(p->cur[0])) { for(; (p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]); p->cur++) {} p->cur++; while (p->cur < p->end && my_xml_is_id1(p->cur[0])) p->cur++; a->end=p->cur; my_xml_norm_text(a); lex=MY_XML_IDENT; } else lex= MY_XML_UNKNOWN; #if 0 printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg); Loading