387 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			387 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
|  | /*
 | ||
|  |  * Copyright (C)2005-2019 Haxe Foundation | ||
|  |  * | ||
|  |  * Permission is hereby granted, free of charge, to any person obtaining a | ||
|  |  * copy of this software and associated documentation files (the "Software"), | ||
|  |  * to deal in the Software without restriction, including without limitation | ||
|  |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
|  |  * and/or sell copies of the Software, and to permit persons to whom the | ||
|  |  * Software is furnished to do so, subject to the following conditions: | ||
|  |  * | ||
|  |  * The above copyright notice and this permission notice shall be included in | ||
|  |  * all copies or substantial portions of the Software. | ||
|  |  * | ||
|  |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
|  |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
|  |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
|  |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
|  |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
|  |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
|  |  * DEALINGS IN THE SOFTWARE. | ||
|  |  */ | ||
|  | 
 | ||
|  | #ifdef EPPC
 | ||
|  | #include <memory>
 | ||
|  | #else
 | ||
|  | #include <memory.h>
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #ifndef HX_WINDOWS
 | ||
|  | #  include <strings.h>
 | ||
|  | #  undef strcmpi
 | ||
|  | #  define strcmpi(a,b) strcasecmp(a,b)
 | ||
|  | #else
 | ||
|  | #   include <string.h>
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | 
 | ||
|  | // -------------- parsing --------------------------
 | ||
|  | 
 | ||
|  | 
 | ||
|  | enum STATE { | ||
|  |    IGNORE_SPACES, | ||
|  |    BEGIN, | ||
|  |    BEGIN_NODE, | ||
|  |    TAG_NAME, | ||
|  |    BODY, | ||
|  |    ATTRIB_NAME, | ||
|  |    EQUALS, | ||
|  |    ATTVAL_BEGIN, | ||
|  |    ATTRIB_VAL, | ||
|  |    CHILDS, | ||
|  |    CLOSE, | ||
|  |    WAIT_END, | ||
|  |    WAIT_END_RET, | ||
|  |    PCDATA, | ||
|  |    HEADER, | ||
|  |    COMMENT, | ||
|  |    DOCTYPE, | ||
|  |    CDATA, | ||
|  | }; | ||
|  | 
 | ||
|  | static void xml_error( const char *xml, const char *inWhere, int *line, String msg ) { | ||
|  |    String b = HX_CSTRING("Xml parse error : ") + msg + HX_CSTRING(" at line ") + String(*line) + HX_CSTRING(" : "); | ||
|  |    String where(inWhere); | ||
|  | 
 | ||
|  |    int l = where.length; | ||
|  |    int nchars = 30; | ||
|  |    if( inWhere != xml ) | ||
|  |       b += HX_CSTRING("..."); | ||
|  | 
 | ||
|  |    if (where.length==0) | ||
|  |       b+= HX_CSTRING("<eof>"); | ||
|  |    else if (where.length<nchars) | ||
|  |       b+= where; | ||
|  |    else | ||
|  |       b+= where.substr(0,nchars) + HX_CSTRING("..."); | ||
|  | 
 | ||
|  |    hx::Throw(b); | ||
|  | } | ||
|  | 
 | ||
|  | #define ERRORSTR(msg)   xml_error(xml,p,line,msg);
 | ||
|  | #define ERROR(msg)   xml_error(xml,p,line,HX_CSTRING(msg));
 | ||
|  | 
 | ||
|  | static bool is_valid_char( int c ) { | ||
|  |    return ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) || c == ':' || c == '.' || c == '_' || c == '-'; | ||
|  | } | ||
|  | 
 | ||
|  | static void do_parse_xml( const char *xml, const char **lp, int *line, cpp::NativeXmlState callb, String parentname ) | ||
|  | { | ||
|  |    STATE state = BEGIN; | ||
|  |    STATE next = BEGIN; | ||
|  |    String aname; | ||
|  |    hx::Anon attribs; | ||
|  |    String nodename; | ||
|  | 
 | ||
|  |    const char *start = NULL; | ||
|  |    const char *p = *lp; | ||
|  |    char c = *p; | ||
|  |    int nsubs = 0, nbrackets = 0; | ||
|  |    while( c ) { | ||
|  |       switch( state ) { | ||
|  |       case IGNORE_SPACES: | ||
|  |          switch( c ) { | ||
|  |          case '\n': | ||
|  |          case '\r': | ||
|  |          case '\t': | ||
|  |          case ' ': | ||
|  |             break; | ||
|  |          default: | ||
|  |             state = next; | ||
|  |             continue; | ||
|  |          } | ||
|  |          break; | ||
|  |       case BEGIN: | ||
|  |          switch( c ) { | ||
|  |          case '<': | ||
|  |             state = IGNORE_SPACES; | ||
|  |             next = BEGIN_NODE; | ||
|  |             break; | ||
|  |          default: | ||
|  |             start = p; | ||
|  |             state = PCDATA; | ||
|  |             continue; | ||
|  |          } | ||
|  |          break; | ||
|  |       case PCDATA: | ||
|  |          if( c == '<' ) { | ||
|  |             callb->pcdata(String(start,p-start).dup()); | ||
|  |             nsubs++; | ||
|  |             state = IGNORE_SPACES; | ||
|  |             next = BEGIN_NODE; | ||
|  |          } | ||
|  |          break; | ||
|  |       case CDATA: | ||
|  |          if( c == ']' && p[1] == ']' && p[2] == '>' ) { | ||
|  |             callb->cdata(String(start,p-start).dup()); | ||
|  |             nsubs++; | ||
|  |             p += 2; | ||
|  |             state = BEGIN; | ||
|  |          } | ||
|  |          break; | ||
|  |       case BEGIN_NODE: | ||
|  |          switch( c ) { | ||
|  |          case '!': | ||
|  |             if( p[1] == '[' ) { | ||
|  |                p += 2; | ||
|  |                if( (p[0] != 'C' && p[0] != 'c') || | ||
|  |                   (p[1] != 'D' && p[1] != 'd') || | ||
|  |                   (p[2] != 'A' && p[2] != 'a') || | ||
|  |                   (p[3] != 'T' && p[3] != 't') || | ||
|  |                   (p[4] != 'A' && p[4] != 'a') || | ||
|  |                   (p[5] != '[') ) | ||
|  |                   ERROR("Expected <![CDATA["); | ||
|  |                p += 5; | ||
|  |                state = CDATA; | ||
|  |                start = p + 1; | ||
|  |                break; | ||
|  |             } | ||
|  |             if( p[1] == 'D' || p[1] == 'd' ) { | ||
|  |                if( (p[2] != 'O' && p[2] != 'o') || | ||
|  |                   (p[3] != 'C' && p[3] != 'c') || | ||
|  |                   (p[4] != 'T' && p[4] != 't') || | ||
|  |                   (p[5] != 'Y' && p[5] != 'y') || | ||
|  |                   (p[6] != 'P' && p[6] != 'p') || | ||
|  |                   (p[7] != 'E' && p[7] != 'e') ) | ||
|  |                   ERROR("Expected <!DOCTYPE"); | ||
|  |                p += 7; | ||
|  |                state = DOCTYPE; | ||
|  |                start = p + 1; | ||
|  |                break; | ||
|  |             } | ||
|  |             if( p[1] != '-' || p[2] != '-' ) | ||
|  |                ERROR("Expected <!--"); | ||
|  |             p += 2; | ||
|  |             state = COMMENT; | ||
|  |             start = p + 1; | ||
|  |             break; | ||
|  |          case '?': | ||
|  |             state = HEADER; | ||
|  |             start = p; | ||
|  |             break; | ||
|  |          case '/': | ||
|  |             if( parentname.length==0 ) | ||
|  |                ERROR("Expected node name"); | ||
|  |             start = p + 1; | ||
|  |             state = IGNORE_SPACES; | ||
|  |             next = CLOSE; | ||
|  |             break; | ||
|  |          default: | ||
|  |             state = TAG_NAME; | ||
|  |             start = p; | ||
|  |             continue; | ||
|  |          } | ||
|  |          break; | ||
|  |       case TAG_NAME: | ||
|  |          if( !is_valid_char(c) ) { | ||
|  |             if( p == start ) | ||
|  |                ERROR("Expected node name"); | ||
|  |             nodename = String(start,p-start).dup(); | ||
|  |             attribs = hx::Anon_obj::Create(); | ||
|  |             state = IGNORE_SPACES; | ||
|  |             next = BODY; | ||
|  |             continue; | ||
|  |          } | ||
|  |          break; | ||
|  |       case BODY: | ||
|  |          switch( c ) { | ||
|  |          case '/': | ||
|  |             state = WAIT_END; | ||
|  |             nsubs++; | ||
|  |             callb->xml(nodename,attribs); | ||
|  |             break; | ||
|  |          case '>': | ||
|  |             state = CHILDS; | ||
|  |             nsubs++; | ||
|  |             callb->xml(nodename,attribs); | ||
|  |             break; | ||
|  |          default: | ||
|  |             state = ATTRIB_NAME; | ||
|  |             start = p; | ||
|  |             continue; | ||
|  |          } | ||
|  |          break; | ||
|  |       case ATTRIB_NAME: | ||
|  |          if( !is_valid_char(c) ) { | ||
|  |             if( start == p ) | ||
|  |                ERROR("Expected attribute name"); | ||
|  |             aname = String(start,p-start).dup(); | ||
|  |             if( attribs->__Field(aname,hx::paccDynamic) != null() ) | ||
|  |                ERROR("Duplicate attribute"); | ||
|  |             state = IGNORE_SPACES; | ||
|  |             next = EQUALS; | ||
|  |             continue; | ||
|  |          } | ||
|  |          break; | ||
|  |       case EQUALS: | ||
|  |          switch( c ) { | ||
|  |          case '=': | ||
|  |             state = IGNORE_SPACES; | ||
|  |             next = ATTVAL_BEGIN; | ||
|  |             break; | ||
|  |          default: | ||
|  |             ERROR("Expected ="); | ||
|  |          } | ||
|  |          break; | ||
|  |       case ATTVAL_BEGIN: | ||
|  |          switch( c ) { | ||
|  |          case '"': | ||
|  |          case '\'': | ||
|  |             state = ATTRIB_VAL; | ||
|  |             start = p; | ||
|  |             break; | ||
|  |          default: | ||
|  |             ERROR("Expected \""); | ||
|  |          } | ||
|  |          break; | ||
|  |       case ATTRIB_VAL: | ||
|  |          if( c == *start ) { | ||
|  |             attribs->Add( aname, String(start+1,p-start-1).dup() ); | ||
|  |             state = IGNORE_SPACES; | ||
|  |             next = BODY; | ||
|  |          } | ||
|  |          break; | ||
|  |       case CHILDS: | ||
|  |          *lp = p; | ||
|  |          do_parse_xml(xml,lp,line,callb,nodename); | ||
|  |          p = *lp; | ||
|  |          start = p; | ||
|  |          state = BEGIN; | ||
|  |          break; | ||
|  |       case WAIT_END: | ||
|  |          switch( c ) { | ||
|  |          case '>': | ||
|  |             callb->done(); | ||
|  |             state = BEGIN; | ||
|  |             break; | ||
|  |          default : | ||
|  |             ERROR("Expected >"); | ||
|  |          } | ||
|  |          break; | ||
|  |       case WAIT_END_RET: | ||
|  |          switch( c ) { | ||
|  |          case '>': | ||
|  |             if( nsubs == 0 ) | ||
|  |                callb->pcdata(HX_CSTRING("")); | ||
|  |             *lp = p; | ||
|  |             return; | ||
|  |          default : | ||
|  |             ERROR("Expected >"); | ||
|  |          } | ||
|  |          break; | ||
|  |       case CLOSE: | ||
|  |          if( !is_valid_char(c) ) { | ||
|  |             if( start == p ) | ||
|  |                ERROR("Expected node name"); | ||
|  |             { | ||
|  |                String v = String(start,p - start).dup(); | ||
|  |                if( strcmpi(parentname.__s,v.__s) != 0 ) { | ||
|  |                   ERRORSTR(HX_CSTRING("Expected </") + parentname + HX_CSTRING(">")); | ||
|  |                } | ||
|  |             } | ||
|  |             state = IGNORE_SPACES; | ||
|  |             next = WAIT_END_RET; | ||
|  |             continue; | ||
|  |          } | ||
|  |          break; | ||
|  |       case COMMENT: | ||
|  |          if( c == '-' && p[1] == '-' && p[2] == '>' ) { | ||
|  |             callb->comment(String(start,p-start).dup()); | ||
|  |             p += 2; | ||
|  |             state = BEGIN; | ||
|  |          } | ||
|  |          break; | ||
|  |       case DOCTYPE: | ||
|  |          if( c == '[' ) | ||
|  |             nbrackets++; | ||
|  |          else if( c == ']' ) | ||
|  |             nbrackets--; | ||
|  |          else if( c == '>' && nbrackets == 0 ) { | ||
|  |             callb->doctype(String(start,p-start).dup()); | ||
|  |             state = BEGIN; | ||
|  |          } | ||
|  |          break; | ||
|  |       case HEADER: | ||
|  |          if( c == '?' && p[1] == '>' ) { | ||
|  |             p++; | ||
|  |             callb->comment(String(start,p-start).dup()); | ||
|  |             state = BEGIN; | ||
|  |          } | ||
|  |          break; | ||
|  |       } | ||
|  |       c = *++p; | ||
|  |       if( c == '\n' ) | ||
|  |          (*line)++; | ||
|  |    } | ||
|  |    if( state == BEGIN ) { | ||
|  |       start = p; | ||
|  |       state = PCDATA; | ||
|  |    } | ||
|  |    if( parentname.__s == 0 && state == PCDATA ) { | ||
|  |       if( p != start || nsubs == 0 ) | ||
|  |          callb->pcdata(String(start,p-start).dup()); | ||
|  |       return; | ||
|  |    } | ||
|  |    ERROR("Unexpected end"); | ||
|  | } | ||
|  | 
 | ||
|  | // ----------------------------------------------
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |    <doc> | ||
|  |    <h1>Xml</h1> | ||
|  |    <p> | ||
|  |    The standard event-driven XML parser. | ||
|  |    </p> | ||
|  |    </doc> | ||
|  | **/ | ||
|  | 
 | ||
|  | /**
 | ||
|  |    parse_xml : xml:string -> events:object -> void | ||
|  |    <doc> | ||
|  |    The [parse_xml] parse a string and for each parsed element call the | ||
|  |    corresponding object method in [events] : | ||
|  |    <ul> | ||
|  |    <li>[void xml( name : string, attribs : object)] when an XML node is found</li> | ||
|  |    <li>[void done()] when an XML node is closed</li> | ||
|  |    <li>[void pcdata(string)] when PCData chars found</li> | ||
|  |    <li>[void cdata(string)] when a CData session is found</li> | ||
|  |    <li>[void comment(string)] when some comment or special header is found</li> | ||
|  |    </ul> | ||
|  |    You can then implement the events so they build the appropriate XML data | ||
|  |    structure needed by your language. | ||
|  |    </doc> | ||
|  | **/ | ||
|  | static void parse_xml( String str, cpp::NativeXmlState state ) | ||
|  | { | ||
|  |    int line = 0; | ||
|  |    const char *p = str.__s; | ||
|  |    // skip BOM
 | ||
|  |    if( p[0] == (char)0xEF && p[1] == (char)0xBB && p[2] == (char)0xBF ) | ||
|  |       p += 3; | ||
|  |    do_parse_xml(p,&p,&line,state,String()); | ||
|  | } | ||
|  | 
 | ||
|  | 
 |