387 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			387 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (C)2005-2019 Haxe Foundation
 | 
						|
 *
 | 
						|
 * Permission is hereby granted, free of charge, to any person obtaining a
 | 
						|
 * copy of this software and associated documentation files (the "Software"),
 | 
						|
 * to deal in the Software without restriction, including without limitation
 | 
						|
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 | 
						|
 * and/or sell copies of the Software, and to permit persons to whom the
 | 
						|
 * Software is furnished to do so, subject to the following conditions:
 | 
						|
 *
 | 
						|
 * The above copyright notice and this permission notice shall be included in
 | 
						|
 * all copies or substantial portions of the Software.
 | 
						|
 *
 | 
						|
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
						|
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
						|
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | 
						|
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
						|
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 | 
						|
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 | 
						|
 * DEALINGS IN THE SOFTWARE.
 | 
						|
 */
 | 
						|
 | 
						|
#ifdef EPPC
 | 
						|
#include <memory>
 | 
						|
#else
 | 
						|
#include <memory.h>
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#ifndef HX_WINDOWS
 | 
						|
#  include <strings.h>
 | 
						|
#  undef strcmpi
 | 
						|
#  define strcmpi(a,b) strcasecmp(a,b)
 | 
						|
#else
 | 
						|
#   include <string.h>
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
// -------------- parsing --------------------------
 | 
						|
 | 
						|
 | 
						|
enum STATE {
 | 
						|
   IGNORE_SPACES,
 | 
						|
   BEGIN,
 | 
						|
   BEGIN_NODE,
 | 
						|
   TAG_NAME,
 | 
						|
   BODY,
 | 
						|
   ATTRIB_NAME,
 | 
						|
   EQUALS,
 | 
						|
   ATTVAL_BEGIN,
 | 
						|
   ATTRIB_VAL,
 | 
						|
   CHILDS,
 | 
						|
   CLOSE,
 | 
						|
   WAIT_END,
 | 
						|
   WAIT_END_RET,
 | 
						|
   PCDATA,
 | 
						|
   HEADER,
 | 
						|
   COMMENT,
 | 
						|
   DOCTYPE,
 | 
						|
   CDATA,
 | 
						|
};
 | 
						|
 | 
						|
static void xml_error( const char *xml, const char *inWhere, int *line, String msg ) {
 | 
						|
   String b = HX_CSTRING("Xml parse error : ") + msg + HX_CSTRING(" at line ") + String(*line) + HX_CSTRING(" : ");
 | 
						|
   String where(inWhere);
 | 
						|
 | 
						|
   int l = where.length;
 | 
						|
   int nchars = 30;
 | 
						|
   if( inWhere != xml )
 | 
						|
      b += HX_CSTRING("...");
 | 
						|
 | 
						|
   if (where.length==0)
 | 
						|
      b+= HX_CSTRING("<eof>");
 | 
						|
   else if (where.length<nchars)
 | 
						|
      b+= where;
 | 
						|
   else
 | 
						|
      b+= where.substr(0,nchars) + HX_CSTRING("...");
 | 
						|
 | 
						|
   hx::Throw(b);
 | 
						|
}
 | 
						|
 | 
						|
#define ERRORSTR(msg)   xml_error(xml,p,line,msg);
 | 
						|
#define ERROR(msg)   xml_error(xml,p,line,HX_CSTRING(msg));
 | 
						|
 | 
						|
static bool is_valid_char( int c ) {
 | 
						|
   return ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) || c == ':' || c == '.' || c == '_' || c == '-';
 | 
						|
}
 | 
						|
 | 
						|
static void do_parse_xml( const char *xml, const char **lp, int *line, cpp::NativeXmlState callb, String parentname )
 | 
						|
{
 | 
						|
   STATE state = BEGIN;
 | 
						|
   STATE next = BEGIN;
 | 
						|
   String aname;
 | 
						|
   hx::Anon attribs;
 | 
						|
   String nodename;
 | 
						|
 | 
						|
   const char *start = NULL;
 | 
						|
   const char *p = *lp;
 | 
						|
   char c = *p;
 | 
						|
   int nsubs = 0, nbrackets = 0;
 | 
						|
   while( c ) {
 | 
						|
      switch( state ) {
 | 
						|
      case IGNORE_SPACES:
 | 
						|
         switch( c ) {
 | 
						|
         case '\n':
 | 
						|
         case '\r':
 | 
						|
         case '\t':
 | 
						|
         case ' ':
 | 
						|
            break;
 | 
						|
         default:
 | 
						|
            state = next;
 | 
						|
            continue;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case BEGIN:
 | 
						|
         switch( c ) {
 | 
						|
         case '<':
 | 
						|
            state = IGNORE_SPACES;
 | 
						|
            next = BEGIN_NODE;
 | 
						|
            break;
 | 
						|
         default:
 | 
						|
            start = p;
 | 
						|
            state = PCDATA;
 | 
						|
            continue;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case PCDATA:
 | 
						|
         if( c == '<' ) {
 | 
						|
            callb->pcdata(String(start,p-start).dup());
 | 
						|
            nsubs++;
 | 
						|
            state = IGNORE_SPACES;
 | 
						|
            next = BEGIN_NODE;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case CDATA:
 | 
						|
         if( c == ']' && p[1] == ']' && p[2] == '>' ) {
 | 
						|
            callb->cdata(String(start,p-start).dup());
 | 
						|
            nsubs++;
 | 
						|
            p += 2;
 | 
						|
            state = BEGIN;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case BEGIN_NODE:
 | 
						|
         switch( c ) {
 | 
						|
         case '!':
 | 
						|
            if( p[1] == '[' ) {
 | 
						|
               p += 2;
 | 
						|
               if( (p[0] != 'C' && p[0] != 'c') ||
 | 
						|
                  (p[1] != 'D' && p[1] != 'd') ||
 | 
						|
                  (p[2] != 'A' && p[2] != 'a') ||
 | 
						|
                  (p[3] != 'T' && p[3] != 't') ||
 | 
						|
                  (p[4] != 'A' && p[4] != 'a') ||
 | 
						|
                  (p[5] != '[') )
 | 
						|
                  ERROR("Expected <![CDATA[");
 | 
						|
               p += 5;
 | 
						|
               state = CDATA;
 | 
						|
               start = p + 1;
 | 
						|
               break;
 | 
						|
            }
 | 
						|
            if( p[1] == 'D' || p[1] == 'd' ) {
 | 
						|
               if( (p[2] != 'O' && p[2] != 'o') ||
 | 
						|
                  (p[3] != 'C' && p[3] != 'c') ||
 | 
						|
                  (p[4] != 'T' && p[4] != 't') ||
 | 
						|
                  (p[5] != 'Y' && p[5] != 'y') ||
 | 
						|
                  (p[6] != 'P' && p[6] != 'p') ||
 | 
						|
                  (p[7] != 'E' && p[7] != 'e') )
 | 
						|
                  ERROR("Expected <!DOCTYPE");
 | 
						|
               p += 7;
 | 
						|
               state = DOCTYPE;
 | 
						|
               start = p + 1;
 | 
						|
               break;
 | 
						|
            }
 | 
						|
            if( p[1] != '-' || p[2] != '-' )
 | 
						|
               ERROR("Expected <!--");
 | 
						|
            p += 2;
 | 
						|
            state = COMMENT;
 | 
						|
            start = p + 1;
 | 
						|
            break;
 | 
						|
         case '?':
 | 
						|
            state = HEADER;
 | 
						|
            start = p;
 | 
						|
            break;
 | 
						|
         case '/':
 | 
						|
            if( parentname.length==0 )
 | 
						|
               ERROR("Expected node name");
 | 
						|
            start = p + 1;
 | 
						|
            state = IGNORE_SPACES;
 | 
						|
            next = CLOSE;
 | 
						|
            break;
 | 
						|
         default:
 | 
						|
            state = TAG_NAME;
 | 
						|
            start = p;
 | 
						|
            continue;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case TAG_NAME:
 | 
						|
         if( !is_valid_char(c) ) {
 | 
						|
            if( p == start )
 | 
						|
               ERROR("Expected node name");
 | 
						|
            nodename = String(start,p-start).dup();
 | 
						|
            attribs = hx::Anon_obj::Create();
 | 
						|
            state = IGNORE_SPACES;
 | 
						|
            next = BODY;
 | 
						|
            continue;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case BODY:
 | 
						|
         switch( c ) {
 | 
						|
         case '/':
 | 
						|
            state = WAIT_END;
 | 
						|
            nsubs++;
 | 
						|
            callb->xml(nodename,attribs);
 | 
						|
            break;
 | 
						|
         case '>':
 | 
						|
            state = CHILDS;
 | 
						|
            nsubs++;
 | 
						|
            callb->xml(nodename,attribs);
 | 
						|
            break;
 | 
						|
         default:
 | 
						|
            state = ATTRIB_NAME;
 | 
						|
            start = p;
 | 
						|
            continue;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case ATTRIB_NAME:
 | 
						|
         if( !is_valid_char(c) ) {
 | 
						|
            if( start == p )
 | 
						|
               ERROR("Expected attribute name");
 | 
						|
            aname = String(start,p-start).dup();
 | 
						|
            if( attribs->__Field(aname,hx::paccDynamic) != null() )
 | 
						|
               ERROR("Duplicate attribute");
 | 
						|
            state = IGNORE_SPACES;
 | 
						|
            next = EQUALS;
 | 
						|
            continue;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case EQUALS:
 | 
						|
         switch( c ) {
 | 
						|
         case '=':
 | 
						|
            state = IGNORE_SPACES;
 | 
						|
            next = ATTVAL_BEGIN;
 | 
						|
            break;
 | 
						|
         default:
 | 
						|
            ERROR("Expected =");
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case ATTVAL_BEGIN:
 | 
						|
         switch( c ) {
 | 
						|
         case '"':
 | 
						|
         case '\'':
 | 
						|
            state = ATTRIB_VAL;
 | 
						|
            start = p;
 | 
						|
            break;
 | 
						|
         default:
 | 
						|
            ERROR("Expected \"");
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case ATTRIB_VAL:
 | 
						|
         if( c == *start ) {
 | 
						|
            attribs->Add( aname, String(start+1,p-start-1).dup() );
 | 
						|
            state = IGNORE_SPACES;
 | 
						|
            next = BODY;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case CHILDS:
 | 
						|
         *lp = p;
 | 
						|
         do_parse_xml(xml,lp,line,callb,nodename);
 | 
						|
         p = *lp;
 | 
						|
         start = p;
 | 
						|
         state = BEGIN;
 | 
						|
         break;
 | 
						|
      case WAIT_END:
 | 
						|
         switch( c ) {
 | 
						|
         case '>':
 | 
						|
            callb->done();
 | 
						|
            state = BEGIN;
 | 
						|
            break;
 | 
						|
         default :
 | 
						|
            ERROR("Expected >");
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case WAIT_END_RET:
 | 
						|
         switch( c ) {
 | 
						|
         case '>':
 | 
						|
            if( nsubs == 0 )
 | 
						|
               callb->pcdata(HX_CSTRING(""));
 | 
						|
            *lp = p;
 | 
						|
            return;
 | 
						|
         default :
 | 
						|
            ERROR("Expected >");
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case CLOSE:
 | 
						|
         if( !is_valid_char(c) ) {
 | 
						|
            if( start == p )
 | 
						|
               ERROR("Expected node name");
 | 
						|
            {
 | 
						|
               String v = String(start,p - start).dup();
 | 
						|
               if( strcmpi(parentname.__s,v.__s) != 0 ) {
 | 
						|
                  ERRORSTR(HX_CSTRING("Expected </") + parentname + HX_CSTRING(">"));
 | 
						|
               }
 | 
						|
            }
 | 
						|
            state = IGNORE_SPACES;
 | 
						|
            next = WAIT_END_RET;
 | 
						|
            continue;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case COMMENT:
 | 
						|
         if( c == '-' && p[1] == '-' && p[2] == '>' ) {
 | 
						|
            callb->comment(String(start,p-start).dup());
 | 
						|
            p += 2;
 | 
						|
            state = BEGIN;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case DOCTYPE:
 | 
						|
         if( c == '[' )
 | 
						|
            nbrackets++;
 | 
						|
         else if( c == ']' )
 | 
						|
            nbrackets--;
 | 
						|
         else if( c == '>' && nbrackets == 0 ) {
 | 
						|
            callb->doctype(String(start,p-start).dup());
 | 
						|
            state = BEGIN;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      case HEADER:
 | 
						|
         if( c == '?' && p[1] == '>' ) {
 | 
						|
            p++;
 | 
						|
            callb->comment(String(start,p-start).dup());
 | 
						|
            state = BEGIN;
 | 
						|
         }
 | 
						|
         break;
 | 
						|
      }
 | 
						|
      c = *++p;
 | 
						|
      if( c == '\n' )
 | 
						|
         (*line)++;
 | 
						|
   }
 | 
						|
   if( state == BEGIN ) {
 | 
						|
      start = p;
 | 
						|
      state = PCDATA;
 | 
						|
   }
 | 
						|
   if( parentname.__s == 0 && state == PCDATA ) {
 | 
						|
      if( p != start || nsubs == 0 )
 | 
						|
         callb->pcdata(String(start,p-start).dup());
 | 
						|
      return;
 | 
						|
   }
 | 
						|
   ERROR("Unexpected end");
 | 
						|
}
 | 
						|
 | 
						|
// ----------------------------------------------
 | 
						|
 | 
						|
/**
 | 
						|
   <doc>
 | 
						|
   <h1>Xml</h1>
 | 
						|
   <p>
 | 
						|
   The standard event-driven XML parser.
 | 
						|
   </p>
 | 
						|
   </doc>
 | 
						|
**/
 | 
						|
 | 
						|
/**
 | 
						|
   parse_xml : xml:string -> events:object -> void
 | 
						|
   <doc>
 | 
						|
   The [parse_xml] parse a string and for each parsed element call the
 | 
						|
   corresponding object method in [events] :
 | 
						|
   <ul>
 | 
						|
   <li>[void xml( name : string, attribs : object)] when an XML node is found</li>
 | 
						|
   <li>[void done()] when an XML node is closed</li>
 | 
						|
   <li>[void pcdata(string)] when PCData chars found</li>
 | 
						|
   <li>[void cdata(string)] when a CData session is found</li>
 | 
						|
   <li>[void comment(string)] when some comment or special header is found</li>
 | 
						|
   </ul>
 | 
						|
   You can then implement the events so they build the appropriate XML data
 | 
						|
   structure needed by your language.
 | 
						|
   </doc>
 | 
						|
**/
 | 
						|
static void parse_xml( String str, cpp::NativeXmlState state )
 | 
						|
{
 | 
						|
   int line = 0;
 | 
						|
   const char *p = str.__s;
 | 
						|
   // skip BOM
 | 
						|
   if( p[0] == (char)0xEF && p[1] == (char)0xBB && p[2] == (char)0xBF )
 | 
						|
      p += 3;
 | 
						|
   do_parse_xml(p,&p,&line,state,String());
 | 
						|
}
 | 
						|
 | 
						|
 |