420 lines
12 KiB
Haxe
420 lines
12 KiB
Haxe
|
/*
|
||
|
* Copyright (C)2005-2019 Haxe Foundation
|
||
|
*
|
||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||
|
* copy of this software and associated documentation files (the "Software"),
|
||
|
* to deal in the Software without restriction, including without limitation
|
||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||
|
* Software is furnished to do so, subject to the following conditions:
|
||
|
*
|
||
|
* The above copyright notice and this permission notice shall be included in
|
||
|
* all copies or substantial portions of the Software.
|
||
|
*
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||
|
* DEALINGS IN THE SOFTWARE.
|
||
|
*/
|
||
|
|
||
|
package haxe.xml;
|
||
|
|
||
|
using StringTools;
|
||
|
|
||
|
private enum abstract S(Int) {
|
||
|
var IGNORE_SPACES;
|
||
|
var BEGIN;
|
||
|
var BEGIN_NODE;
|
||
|
var TAG_NAME;
|
||
|
var BODY;
|
||
|
var ATTRIB_NAME;
|
||
|
var EQUALS;
|
||
|
var ATTVAL_BEGIN;
|
||
|
var ATTRIB_VAL;
|
||
|
var CHILDS;
|
||
|
var CLOSE;
|
||
|
var WAIT_END;
|
||
|
var WAIT_END_RET;
|
||
|
var PCDATA;
|
||
|
var HEADER;
|
||
|
var COMMENT;
|
||
|
var DOCTYPE;
|
||
|
var CDATA;
|
||
|
var ESCAPE;
|
||
|
}
|
||
|
|
||
|
class XmlParserException {
|
||
|
/**
|
||
|
the XML parsing error message
|
||
|
**/
|
||
|
public var message:String;
|
||
|
|
||
|
/**
|
||
|
the line number at which the XML parsing error occurred
|
||
|
**/
|
||
|
public var lineNumber:Int;
|
||
|
|
||
|
/**
|
||
|
the character position in the reported line at which the parsing error occurred
|
||
|
**/
|
||
|
public var positionAtLine:Int;
|
||
|
|
||
|
/**
|
||
|
the character position in the XML string at which the parsing error occurred
|
||
|
**/
|
||
|
public var position:Int;
|
||
|
|
||
|
/**
|
||
|
the invalid XML string
|
||
|
**/
|
||
|
public var xml:String;
|
||
|
|
||
|
public function new(message:String, xml:String, position:Int) {
|
||
|
this.xml = xml;
|
||
|
this.message = message;
|
||
|
this.position = position;
|
||
|
lineNumber = 1;
|
||
|
positionAtLine = 0;
|
||
|
|
||
|
for (i in 0...position) {
|
||
|
var c = xml.fastCodeAt(i);
|
||
|
if (c == '\n'.code) {
|
||
|
lineNumber++;
|
||
|
positionAtLine = 0;
|
||
|
} else {
|
||
|
if (c != '\r'.code)
|
||
|
positionAtLine++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public function toString():String {
|
||
|
return Type.getClassName(Type.getClass(this)) + ": " + message + " at line " + lineNumber + " char " + positionAtLine;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
class Parser {
|
||
|
static var escapes = {
|
||
|
var h = new haxe.ds.StringMap();
|
||
|
h.set("lt", "<");
|
||
|
h.set("gt", ">");
|
||
|
h.set("amp", "&");
|
||
|
h.set("quot", '"');
|
||
|
h.set("apos", "'");
|
||
|
h;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
Parses the String into an XML Document. Set strict parsing to true in order to enable a strict check of XML attributes and entities.
|
||
|
|
||
|
@throws haxe.xml.XmlParserException
|
||
|
**/
|
||
|
static public function parse(str:String, strict = false) {
|
||
|
var doc = Xml.createDocument();
|
||
|
doParse(str, strict, 0, doc);
|
||
|
return doc;
|
||
|
}
|
||
|
|
||
|
static function doParse(str:String, strict:Bool, p:Int = 0, ?parent:Xml):Int {
|
||
|
var xml:Xml = null;
|
||
|
var state = S.BEGIN;
|
||
|
var next = S.BEGIN;
|
||
|
var aname = null;
|
||
|
var start = 0;
|
||
|
var nsubs = 0;
|
||
|
var nbrackets = 0;
|
||
|
var buf = new StringBuf();
|
||
|
// need extra state because next is in use
|
||
|
var escapeNext = S.BEGIN;
|
||
|
var attrValQuote = -1;
|
||
|
inline function addChild(xml:Xml) {
|
||
|
parent.addChild(xml);
|
||
|
nsubs++;
|
||
|
}
|
||
|
while (p < str.length) {
|
||
|
var c = str.unsafeCodeAt(p);
|
||
|
switch (state) {
|
||
|
case S.IGNORE_SPACES:
|
||
|
switch (c) {
|
||
|
case '\n'.code, '\r'.code, '\t'.code, ' '.code:
|
||
|
default:
|
||
|
state = next;
|
||
|
continue;
|
||
|
}
|
||
|
case S.BEGIN:
|
||
|
switch (c) {
|
||
|
case '<'.code:
|
||
|
state = S.IGNORE_SPACES;
|
||
|
next = S.BEGIN_NODE;
|
||
|
default:
|
||
|
start = p;
|
||
|
state = S.PCDATA;
|
||
|
continue;
|
||
|
}
|
||
|
case S.PCDATA:
|
||
|
if (c == '<'.code) {
|
||
|
buf.addSub(str, start, p - start);
|
||
|
var child = Xml.createPCData(buf.toString());
|
||
|
buf = new StringBuf();
|
||
|
addChild(child);
|
||
|
state = S.IGNORE_SPACES;
|
||
|
next = S.BEGIN_NODE;
|
||
|
} else if (c == '&'.code) {
|
||
|
buf.addSub(str, start, p - start);
|
||
|
state = S.ESCAPE;
|
||
|
escapeNext = S.PCDATA;
|
||
|
start = p + 1;
|
||
|
}
|
||
|
case S.CDATA:
|
||
|
if (c == ']'.code && str.fastCodeAt(p + 1) == ']'.code && str.fastCodeAt(p + 2) == '>'.code) {
|
||
|
var child = Xml.createCData(str.substr(start, p - start));
|
||
|
addChild(child);
|
||
|
p += 2;
|
||
|
state = S.BEGIN;
|
||
|
}
|
||
|
case S.BEGIN_NODE:
|
||
|
switch (c) {
|
||
|
case '!'.code:
|
||
|
if (str.fastCodeAt(p + 1) == '['.code) {
|
||
|
p += 2;
|
||
|
if (str.substr(p, 6).toUpperCase() != "CDATA[")
|
||
|
throw new XmlParserException("Expected <![CDATA[", str, p);
|
||
|
p += 5;
|
||
|
state = S.CDATA;
|
||
|
start = p + 1;
|
||
|
} else if (str.fastCodeAt(p + 1) == 'D'.code || str.fastCodeAt(p + 1) == 'd'.code) {
|
||
|
if (str.substr(p + 2, 6).toUpperCase() != "OCTYPE")
|
||
|
throw new XmlParserException("Expected <!DOCTYPE", str, p);
|
||
|
p += 8;
|
||
|
state = S.DOCTYPE;
|
||
|
start = p + 1;
|
||
|
} else if (str.fastCodeAt(p + 1) != '-'.code || str.fastCodeAt(p + 2) != '-'.code) throw new XmlParserException("Expected <!--",
|
||
|
str, p); else {
|
||
|
p += 2;
|
||
|
state = S.COMMENT;
|
||
|
start = p + 1;
|
||
|
}
|
||
|
case '?'.code:
|
||
|
state = S.HEADER;
|
||
|
start = p;
|
||
|
case '/'.code:
|
||
|
if (parent == null)
|
||
|
throw new XmlParserException("Expected node name", str, p);
|
||
|
start = p + 1;
|
||
|
state = S.IGNORE_SPACES;
|
||
|
next = S.CLOSE;
|
||
|
default:
|
||
|
state = S.TAG_NAME;
|
||
|
start = p;
|
||
|
continue;
|
||
|
}
|
||
|
case S.TAG_NAME:
|
||
|
if (!isValidChar(c)) {
|
||
|
if (p == start)
|
||
|
throw new XmlParserException("Expected node name", str, p);
|
||
|
xml = Xml.createElement(str.substr(start, p - start));
|
||
|
addChild(xml);
|
||
|
state = S.IGNORE_SPACES;
|
||
|
next = S.BODY;
|
||
|
continue;
|
||
|
}
|
||
|
case S.BODY:
|
||
|
switch (c) {
|
||
|
case '/'.code:
|
||
|
state = S.WAIT_END;
|
||
|
case '>'.code:
|
||
|
state = S.CHILDS;
|
||
|
default:
|
||
|
state = S.ATTRIB_NAME;
|
||
|
start = p;
|
||
|
continue;
|
||
|
}
|
||
|
case S.ATTRIB_NAME:
|
||
|
if (!isValidChar(c)) {
|
||
|
var tmp;
|
||
|
if (start == p)
|
||
|
throw new XmlParserException("Expected attribute name", str, p);
|
||
|
tmp = str.substr(start, p - start);
|
||
|
aname = tmp;
|
||
|
if (xml.exists(aname))
|
||
|
throw new XmlParserException("Duplicate attribute [" + aname + "]", str, p);
|
||
|
state = S.IGNORE_SPACES;
|
||
|
next = S.EQUALS;
|
||
|
continue;
|
||
|
}
|
||
|
case S.EQUALS:
|
||
|
switch (c) {
|
||
|
case '='.code:
|
||
|
state = S.IGNORE_SPACES;
|
||
|
next = S.ATTVAL_BEGIN;
|
||
|
default:
|
||
|
throw new XmlParserException("Expected =", str, p);
|
||
|
}
|
||
|
case S.ATTVAL_BEGIN:
|
||
|
switch (c) {
|
||
|
case '"'.code | '\''.code:
|
||
|
buf = new StringBuf();
|
||
|
state = S.ATTRIB_VAL;
|
||
|
start = p + 1;
|
||
|
attrValQuote = c;
|
||
|
default:
|
||
|
throw new XmlParserException("Expected \"", str, p);
|
||
|
}
|
||
|
case S.ATTRIB_VAL:
|
||
|
switch (c) {
|
||
|
case '&'.code:
|
||
|
buf.addSub(str, start, p - start);
|
||
|
state = S.ESCAPE;
|
||
|
escapeNext = S.ATTRIB_VAL;
|
||
|
start = p + 1;
|
||
|
case '>'.code | '<'.code if (strict):
|
||
|
// HTML allows these in attributes values
|
||
|
throw new XmlParserException("Invalid unescaped " + String.fromCharCode(c) + " in attribute value", str, p);
|
||
|
case _ if (c == attrValQuote):
|
||
|
buf.addSub(str, start, p - start);
|
||
|
var val = buf.toString();
|
||
|
buf = new StringBuf();
|
||
|
xml.set(aname, val);
|
||
|
state = S.IGNORE_SPACES;
|
||
|
next = S.BODY;
|
||
|
}
|
||
|
case S.CHILDS:
|
||
|
p = doParse(str, strict, p, xml);
|
||
|
start = p;
|
||
|
state = S.BEGIN;
|
||
|
case S.WAIT_END:
|
||
|
switch (c) {
|
||
|
case '>'.code:
|
||
|
state = S.BEGIN;
|
||
|
default:
|
||
|
throw new XmlParserException("Expected >", str, p);
|
||
|
}
|
||
|
case S.WAIT_END_RET:
|
||
|
switch (c) {
|
||
|
case '>'.code:
|
||
|
if (nsubs == 0)
|
||
|
parent.addChild(Xml.createPCData(""));
|
||
|
return p;
|
||
|
default:
|
||
|
throw new XmlParserException("Expected >", str, p);
|
||
|
}
|
||
|
case S.CLOSE:
|
||
|
if (!isValidChar(c)) {
|
||
|
if (start == p)
|
||
|
throw new XmlParserException("Expected node name", str, p);
|
||
|
|
||
|
var v = str.substr(start, p - start);
|
||
|
if (parent == null || parent.nodeType != Element) {
|
||
|
throw new XmlParserException('Unexpected </$v>, tag is not open', str, p);
|
||
|
}
|
||
|
if (v != parent.nodeName)
|
||
|
throw new XmlParserException("Expected </" + parent.nodeName + ">", str, p);
|
||
|
|
||
|
state = S.IGNORE_SPACES;
|
||
|
next = S.WAIT_END_RET;
|
||
|
continue;
|
||
|
}
|
||
|
case S.COMMENT:
|
||
|
if (c == '-'.code && str.fastCodeAt(p + 1) == '-'.code && str.fastCodeAt(p + 2) == '>'.code) {
|
||
|
addChild(Xml.createComment(str.substr(start, p - start)));
|
||
|
p += 2;
|
||
|
state = S.BEGIN;
|
||
|
}
|
||
|
case S.DOCTYPE:
|
||
|
if (c == '['.code)
|
||
|
nbrackets++;
|
||
|
else if (c == ']'.code)
|
||
|
nbrackets--;
|
||
|
else if (c == '>'.code && nbrackets == 0) {
|
||
|
addChild(Xml.createDocType(str.substr(start, p - start)));
|
||
|
state = S.BEGIN;
|
||
|
}
|
||
|
case S.HEADER:
|
||
|
if (c == '?'.code && str.fastCodeAt(p + 1) == '>'.code) {
|
||
|
p++;
|
||
|
var str = str.substr(start + 1, p - start - 2);
|
||
|
addChild(Xml.createProcessingInstruction(str));
|
||
|
state = S.BEGIN;
|
||
|
}
|
||
|
case S.ESCAPE:
|
||
|
if (c == ';'.code) {
|
||
|
var s = str.substr(start, p - start);
|
||
|
if (s.fastCodeAt(0) == '#'.code) {
|
||
|
var c = s.fastCodeAt(1) == 'x'.code ? Std.parseInt("0" + s.substr(1, s.length - 1)) : Std.parseInt(s.substr(1, s.length - 1));
|
||
|
#if !(target.unicode)
|
||
|
if (c >= 128) {
|
||
|
// UTF8-encode it
|
||
|
if (c <= 0x7FF) {
|
||
|
buf.addChar(0xC0 | (c >> 6));
|
||
|
buf.addChar(0x80 | (c & 63));
|
||
|
} else if (c <= 0xFFFF) {
|
||
|
buf.addChar(0xE0 | (c >> 12));
|
||
|
buf.addChar(0x80 | ((c >> 6) & 63));
|
||
|
buf.addChar(0x80 | (c & 63));
|
||
|
} else if (c <= 0x10FFFF) {
|
||
|
buf.addChar(0xF0 | (c >> 18));
|
||
|
buf.addChar(0x80 | ((c >> 12) & 63));
|
||
|
buf.addChar(0x80 | ((c >> 6) & 63));
|
||
|
buf.addChar(0x80 | (c & 63));
|
||
|
} else
|
||
|
throw new XmlParserException("Cannot encode UTF8-char " + c, str, p);
|
||
|
} else
|
||
|
#end
|
||
|
buf.addChar(c);
|
||
|
} else if (!escapes.exists(s)) {
|
||
|
if (strict)
|
||
|
throw new XmlParserException("Undefined entity: " + s, str, p);
|
||
|
buf.add('&$s;');
|
||
|
} else {
|
||
|
buf.add(escapes.get(s));
|
||
|
}
|
||
|
start = p + 1;
|
||
|
state = escapeNext;
|
||
|
} else if (!isValidChar(c) && c != "#".code) {
|
||
|
if (strict)
|
||
|
throw new XmlParserException("Invalid character in entity: " + String.fromCharCode(c), str, p);
|
||
|
buf.addChar("&".code);
|
||
|
buf.addSub(str, start, p - start);
|
||
|
p--;
|
||
|
start = p + 1;
|
||
|
state = escapeNext;
|
||
|
}
|
||
|
}
|
||
|
++p;
|
||
|
}
|
||
|
|
||
|
if (state == S.BEGIN) {
|
||
|
start = p;
|
||
|
state = S.PCDATA;
|
||
|
}
|
||
|
|
||
|
if (state == S.PCDATA) {
|
||
|
if (parent.nodeType == Element) {
|
||
|
throw new XmlParserException("Unclosed node <" + parent.nodeName + ">", str, p);
|
||
|
}
|
||
|
if (p != start || nsubs == 0) {
|
||
|
buf.addSub(str, start, p - start);
|
||
|
addChild(Xml.createPCData(buf.toString()));
|
||
|
}
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
if (!strict && state == S.ESCAPE && escapeNext == S.PCDATA) {
|
||
|
buf.addChar("&".code);
|
||
|
buf.addSub(str, start, p - start);
|
||
|
addChild(Xml.createPCData(buf.toString()));
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
throw new XmlParserException("Unexpected end", str, p);
|
||
|
}
|
||
|
|
||
|
static inline function isValidChar(c) {
|
||
|
return (c >= 'a'.code && c <= 'z'.code) || (c >= 'A'.code && c <= 'Z'.code) || (c >= '0'.code && c <= '9'.code) || c == ':'.code || c == '.'.code
|
||
|
|| c == '_'.code || c == '-'.code;
|
||
|
}
|
||
|
}
|