Commit 3010775e authored by unknown's avatar unknown
Browse files

Bug#18201: XML: ExtractValue works even if the xml

fragment is not well-formed xml

Problem:
- ExtractValue silently returned NULL if a wrong XML value is passed.
- In some cases "unexpected END-OF-INPUT" error was not detected, and
  a non-NULL result could be returned for a bad XML value.

Fix:
- Adding warning messages, to make user aware why NULL was returned.
- Missing "unexpected END-OF-INPUT" error is reported now.


mysql-test/r/xml.result:
  - Fixing XML systax error in old test
  - Adding test cases.
mysql-test/t/xml.test:
  - Fixing XML systax error in old test
  - Adding test cases.
sql/item_xmlfunc.cc:
  Produce warning in case of XML systax error,
  instead of silentrly returning NULL.
strings/xml.c:
  - Making error messages better looking and clearer:
  It is important because now they're seen in SHOW WARNINGS
  (previously they were used only for debugging purposes).
  - Adding "unexpected END-OF-INPUT" error if after scanning
    closing tag for the root element some input is left
    (previously this error was ignored in a mistake).
parent 3ef01486
Loading
Loading
Loading
Loading
+31 −1
Original line number Diff line number Diff line
@@ -132,7 +132,7 @@ xb1 xc1
SELECT extractValue(@xml,'/a//@x[2]');
extractValue(@xml,'/a//@x[2]')
xb2 xc2
SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b>/a>';
SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b></a>';
SELECT extractValue(@xml,'//b[1]');
extractValue(@xml,'//b[1]')
b1 c1b1 c2b1
@@ -612,6 +612,36 @@ extractvalue('<a>Jack</a>' collate latin1_bin,'/a[contains(../a,"j")]')
select ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1');
ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1')
test
select extractValue('<a>a','/a');
extractValue('<a>a','/a')
NULL
Warnings:
Warning	1504	Incorrect XML value: 'parse error at line 1 pos 5: unexpected END-OF-INPUT'
select extractValue('<a>a<','/a');
extractValue('<a>a<','/a')
NULL
Warnings:
Warning	1504	Incorrect XML value: 'parse error at line 1 pos 6: END-OF-INPUT unexpected (ident or '/' wanted)'
select extractValue('<a>a</','/a');
extractValue('<a>a</','/a')
NULL
Warnings:
Warning	1504	Incorrect XML value: 'parse error at line 1 pos 7: END-OF-INPUT unexpected (ident wanted)'
select extractValue('<a>a</a','/a');
extractValue('<a>a</a','/a')
NULL
Warnings:
Warning	1504	Incorrect XML value: 'parse error at line 1 pos 8: END-OF-INPUT unexpected ('>' wanted)'
select extractValue('<a>a</a></b>','/a');
extractValue('<a>a</a></b>','/a')
NULL
Warnings:
Warning	1504	Incorrect XML value: 'parse error at line 1 pos 12: '</b>' unexpected (END-OF-INPUT wanted)'
select extractValue('<a b=>a</a>','/a');
extractValue('<a b=>a</a>','/a')
NULL
Warnings:
Warning	1504	Incorrect XML value: 'parse error at line 1 pos 7: '>' unexpected (ident or string wanted)'
select extractValue('<e>1</e>','position()');
ERROR HY000: XPATH syntax error: ''
select extractValue('<e>1</e>','last()');
+12 −1
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@ SELECT extractValue(@xml,'/a//@x');
SELECT extractValue(@xml,'/a//@x[1]');
SELECT extractValue(@xml,'/a//@x[2]');

SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b>/a>';
SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b></a>';
SELECT extractValue(@xml,'//b[1]');
SELECT extractValue(@xml,'/descendant::b[1]');

@@ -284,6 +284,17 @@ select extractvalue('<a>Jack</a>' collate latin1_bin,'/a[contains(../a,"j")]');
#
select ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1');

#
# Bug#18201: XML: ExtractValue works even if the xml fragment
# is not well-formed xml
#
select extractValue('<a>a','/a');
select extractValue('<a>a<','/a');
select extractValue('<a>a</','/a');
select extractValue('<a>a</a','/a');
select extractValue('<a>a</a></b>','/a');
select extractValue('<a b=>a</a>','/a');

#
# Bug #18171 XML: ExtractValue: the XPath position()
# function crashes the server!
+11 −1
Original line number Diff line number Diff line
@@ -2563,7 +2563,17 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf)
  xml_enter(&p, raw_xml->ptr(), 0);

  /* Execute XML parser */
  rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length());
  if ((rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length())) != MY_XML_OK)
  {
    char buf[128];
    my_snprintf(buf, sizeof(buf)-1, "parse error at line %d pos %d: %s",
                my_xml_error_lineno(&p) + 1,
                my_xml_error_pos(&p) + 1,
                my_xml_error_string(&p));
    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                        ER_WRONG_VALUE,
                        ER(ER_WRONG_VALUE), "XML", buf);
  }
  my_xml_parser_free(&p);

  return rc == MY_XML_OK ? parsed_xml_buf : 0;
+19 −8
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ static const char *lex2str(int lex)
{
  switch(lex)
  {
    case MY_XML_EOF:      return "EOF";
    case MY_XML_EOF:      return "END-OF-INPUT";
    case MY_XML_STRING:   return "STRING";
    case MY_XML_IDENT:    return "IDENT";
    case MY_XML_CDATA:    return "CDATA";
@@ -195,8 +195,13 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, uint slen)
  if (str && (slen != glen))
  {
    mstr(s,str,sizeof(s)-1,slen);
    if (glen)
    {
      mstr(g,e+1,sizeof(g)-1,glen),
      sprintf(p->errstr,"'</%s>' unexpected ('</%s>' wanted)",s,g);
    }
    else
      sprintf(p->errstr,"'</%s>' unexpected (END-OF-INPUT wanted)", s);
    return MY_XML_ERROR;
  }
  
@@ -247,7 +252,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
      {
        if (MY_XML_IDENT != (lex=my_xml_scan(p,&a)))
        {
          sprintf(p->errstr,"1: %s unexpected (ident wanted)",lex2str(lex));
          sprintf(p->errstr,"%s unexpected (ident wanted)",lex2str(lex));
          return MY_XML_ERROR;
        }
        if (MY_XML_OK != my_xml_leave(p,a.beg,(uint) (a.end-a.beg)))
@@ -275,7 +280,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
      }
      else
      {
        sprintf(p->errstr,"3: %s unexpected (ident or '/' wanted)",
        sprintf(p->errstr,"%s unexpected (ident or '/' wanted)",
		lex2str(lex));
        return MY_XML_ERROR;
      }
@@ -297,7 +302,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
          }
          else
          {
            sprintf(p->errstr,"4: %s unexpected (ident or string wanted)",
            sprintf(p->errstr,"%s unexpected (ident or string wanted)",
		    lex2str(lex));
            return MY_XML_ERROR;
          }
@@ -325,7 +330,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
      {
        if (lex != MY_XML_QUESTION)
        {
          sprintf(p->errstr,"6: %s unexpected ('?' wanted)",lex2str(lex));
          sprintf(p->errstr,"%s unexpected ('?' wanted)",lex2str(lex));
          return MY_XML_ERROR;
        }
        if (MY_XML_OK != my_xml_leave(p,NULL,0))
@@ -341,7 +346,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
      
      if (lex != MY_XML_GT)
      {
        sprintf(p->errstr,"5: %s unexpected ('>' wanted)",lex2str(lex));
        sprintf(p->errstr,"%s unexpected ('>' wanted)",lex2str(lex));
        return MY_XML_ERROR;
      }
    }
@@ -359,6 +364,12 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
      }
    }
  }

  if (p->attr[0])
  {
    sprintf(p->errstr,"unexpected END-OF-INPUT");
    return MY_XML_ERROR;
  }
  return MY_XML_OK;
}