420 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Haxe
		
	
	
	
	
	
		
		
			
		
	
	
			420 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Haxe
		
	
	
	
	
	
|  | /* | ||
|  |  * Copyright (C)2005-2019 Haxe Foundation | ||
|  |  * | ||
|  |  * Permission is hereby granted, free of charge, to any person obtaining a | ||
|  |  * copy of this software and associated documentation files (the "Software"), | ||
|  |  * to deal in the Software without restriction, including without limitation | ||
|  |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
|  |  * and/or sell copies of the Software, and to permit persons to whom the | ||
|  |  * Software is furnished to do so, subject to the following conditions: | ||
|  |  * | ||
|  |  * The above copyright notice and this permission notice shall be included in | ||
|  |  * all copies or substantial portions of the Software. | ||
|  |  * | ||
|  |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
|  |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
|  |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
|  |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
|  |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
|  |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
|  |  * DEALINGS IN THE SOFTWARE. | ||
|  |  */ | ||
|  | 
 | ||
|  | package haxe.xml; | ||
|  | 
 | ||
|  | using StringTools; | ||
|  | 
 | ||
|  | private enum abstract S(Int) { | ||
|  | 	var IGNORE_SPACES; | ||
|  | 	var BEGIN; | ||
|  | 	var BEGIN_NODE; | ||
|  | 	var TAG_NAME; | ||
|  | 	var BODY; | ||
|  | 	var ATTRIB_NAME; | ||
|  | 	var EQUALS; | ||
|  | 	var ATTVAL_BEGIN; | ||
|  | 	var ATTRIB_VAL; | ||
|  | 	var CHILDS; | ||
|  | 	var CLOSE; | ||
|  | 	var WAIT_END; | ||
|  | 	var WAIT_END_RET; | ||
|  | 	var PCDATA; | ||
|  | 	var HEADER; | ||
|  | 	var COMMENT; | ||
|  | 	var DOCTYPE; | ||
|  | 	var CDATA; | ||
|  | 	var ESCAPE; | ||
|  | } | ||
|  | 
 | ||
|  | class XmlParserException { | ||
|  | 	/** | ||
|  | 		the XML parsing error message | ||
|  | 	**/ | ||
|  | 	public var message:String; | ||
|  | 
 | ||
|  | 	/** | ||
|  | 		the line number at which the XML parsing error occurred | ||
|  | 	**/ | ||
|  | 	public var lineNumber:Int; | ||
|  | 
 | ||
|  | 	/** | ||
|  | 		the character position in the reported line at which the parsing error occurred | ||
|  | 	**/ | ||
|  | 	public var positionAtLine:Int; | ||
|  | 
 | ||
|  | 	/** | ||
|  | 		the character position in the XML string at which the parsing error occurred | ||
|  | 	**/ | ||
|  | 	public var position:Int; | ||
|  | 
 | ||
|  | 	/** | ||
|  | 		the invalid XML string | ||
|  | 	**/ | ||
|  | 	public var xml:String; | ||
|  | 
 | ||
|  | 	public function new(message:String, xml:String, position:Int) { | ||
|  | 		this.xml = xml; | ||
|  | 		this.message = message; | ||
|  | 		this.position = position; | ||
|  | 		lineNumber = 1; | ||
|  | 		positionAtLine = 0; | ||
|  | 
 | ||
|  | 		for (i in 0...position) { | ||
|  | 			var c = xml.fastCodeAt(i); | ||
|  | 			if (c == '\n'.code) { | ||
|  | 				lineNumber++; | ||
|  | 				positionAtLine = 0; | ||
|  | 			} else { | ||
|  | 				if (c != '\r'.code) | ||
|  | 					positionAtLine++; | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	public function toString():String { | ||
|  | 		return Type.getClassName(Type.getClass(this)) + ": " + message + " at line " + lineNumber + " char " + positionAtLine; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | class Parser { | ||
|  | 	static var escapes = { | ||
|  | 		var h = new haxe.ds.StringMap(); | ||
|  | 		h.set("lt", "<"); | ||
|  | 		h.set("gt", ">"); | ||
|  | 		h.set("amp", "&"); | ||
|  | 		h.set("quot", '"'); | ||
|  | 		h.set("apos", "'"); | ||
|  | 		h; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	/** | ||
|  | 		Parses the String into an XML Document. Set strict parsing to true in order to enable a strict check of XML attributes and entities. | ||
|  | 
 | ||
|  | 		@throws haxe.xml.XmlParserException | ||
|  | 	**/ | ||
|  | 	static public function parse(str:String, strict = false) { | ||
|  | 		var doc = Xml.createDocument(); | ||
|  | 		doParse(str, strict, 0, doc); | ||
|  | 		return doc; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	static function doParse(str:String, strict:Bool, p:Int = 0, ?parent:Xml):Int { | ||
|  | 		var xml:Xml = null; | ||
|  | 		var state = S.BEGIN; | ||
|  | 		var next = S.BEGIN; | ||
|  | 		var aname = null; | ||
|  | 		var start = 0; | ||
|  | 		var nsubs = 0; | ||
|  | 		var nbrackets = 0; | ||
|  | 		var buf = new StringBuf(); | ||
|  | 		// need extra state because next is in use | ||
|  | 		var escapeNext = S.BEGIN; | ||
|  | 		var attrValQuote = -1; | ||
|  | 		inline function addChild(xml:Xml) { | ||
|  | 			parent.addChild(xml); | ||
|  | 			nsubs++; | ||
|  | 		} | ||
|  | 		while (p < str.length) { | ||
|  | 			var c = str.unsafeCodeAt(p); | ||
|  | 			switch (state) { | ||
|  | 				case S.IGNORE_SPACES: | ||
|  | 					switch (c) { | ||
|  | 						case '\n'.code, '\r'.code, '\t'.code, ' '.code: | ||
|  | 						default: | ||
|  | 							state = next; | ||
|  | 							continue; | ||
|  | 					} | ||
|  | 				case S.BEGIN: | ||
|  | 					switch (c) { | ||
|  | 						case '<'.code: | ||
|  | 							state = S.IGNORE_SPACES; | ||
|  | 							next = S.BEGIN_NODE; | ||
|  | 						default: | ||
|  | 							start = p; | ||
|  | 							state = S.PCDATA; | ||
|  | 							continue; | ||
|  | 					} | ||
|  | 				case S.PCDATA: | ||
|  | 					if (c == '<'.code) { | ||
|  | 						buf.addSub(str, start, p - start); | ||
|  | 						var child = Xml.createPCData(buf.toString()); | ||
|  | 						buf = new StringBuf(); | ||
|  | 						addChild(child); | ||
|  | 						state = S.IGNORE_SPACES; | ||
|  | 						next = S.BEGIN_NODE; | ||
|  | 					} else if (c == '&'.code) { | ||
|  | 						buf.addSub(str, start, p - start); | ||
|  | 						state = S.ESCAPE; | ||
|  | 						escapeNext = S.PCDATA; | ||
|  | 						start = p + 1; | ||
|  | 					} | ||
|  | 				case S.CDATA: | ||
|  | 					if (c == ']'.code && str.fastCodeAt(p + 1) == ']'.code && str.fastCodeAt(p + 2) == '>'.code) { | ||
|  | 						var child = Xml.createCData(str.substr(start, p - start)); | ||
|  | 						addChild(child); | ||
|  | 						p += 2; | ||
|  | 						state = S.BEGIN; | ||
|  | 					} | ||
|  | 				case S.BEGIN_NODE: | ||
|  | 					switch (c) { | ||
|  | 						case '!'.code: | ||
|  | 							if (str.fastCodeAt(p + 1) == '['.code) { | ||
|  | 								p += 2; | ||
|  | 								if (str.substr(p, 6).toUpperCase() != "CDATA[") | ||
|  | 									throw new XmlParserException("Expected <![CDATA[", str, p); | ||
|  | 								p += 5; | ||
|  | 								state = S.CDATA; | ||
|  | 								start = p + 1; | ||
|  | 							} else if (str.fastCodeAt(p + 1) == 'D'.code || str.fastCodeAt(p + 1) == 'd'.code) { | ||
|  | 								if (str.substr(p + 2, 6).toUpperCase() != "OCTYPE") | ||
|  | 									throw new XmlParserException("Expected <!DOCTYPE", str, p); | ||
|  | 								p += 8; | ||
|  | 								state = S.DOCTYPE; | ||
|  | 								start = p + 1; | ||
|  | 							} else if (str.fastCodeAt(p + 1) != '-'.code || str.fastCodeAt(p + 2) != '-'.code) throw new XmlParserException("Expected <!--", | ||
|  | 								str, p); else { | ||
|  | 								p += 2; | ||
|  | 								state = S.COMMENT; | ||
|  | 								start = p + 1; | ||
|  | 							} | ||
|  | 						case '?'.code: | ||
|  | 							state = S.HEADER; | ||
|  | 							start = p; | ||
|  | 						case '/'.code: | ||
|  | 							if (parent == null) | ||
|  | 								throw new XmlParserException("Expected node name", str, p); | ||
|  | 							start = p + 1; | ||
|  | 							state = S.IGNORE_SPACES; | ||
|  | 							next = S.CLOSE; | ||
|  | 						default: | ||
|  | 							state = S.TAG_NAME; | ||
|  | 							start = p; | ||
|  | 							continue; | ||
|  | 					} | ||
|  | 				case S.TAG_NAME: | ||
|  | 					if (!isValidChar(c)) { | ||
|  | 						if (p == start) | ||
|  | 							throw new XmlParserException("Expected node name", str, p); | ||
|  | 						xml = Xml.createElement(str.substr(start, p - start)); | ||
|  | 						addChild(xml); | ||
|  | 						state = S.IGNORE_SPACES; | ||
|  | 						next = S.BODY; | ||
|  | 						continue; | ||
|  | 					} | ||
|  | 				case S.BODY: | ||
|  | 					switch (c) { | ||
|  | 						case '/'.code: | ||
|  | 							state = S.WAIT_END; | ||
|  | 						case '>'.code: | ||
|  | 							state = S.CHILDS; | ||
|  | 						default: | ||
|  | 							state = S.ATTRIB_NAME; | ||
|  | 							start = p; | ||
|  | 							continue; | ||
|  | 					} | ||
|  | 				case S.ATTRIB_NAME: | ||
|  | 					if (!isValidChar(c)) { | ||
|  | 						var tmp; | ||
|  | 						if (start == p) | ||
|  | 							throw new XmlParserException("Expected attribute name", str, p); | ||
|  | 						tmp = str.substr(start, p - start); | ||
|  | 						aname = tmp; | ||
|  | 						if (xml.exists(aname)) | ||
|  | 							throw new XmlParserException("Duplicate attribute [" + aname + "]", str, p); | ||
|  | 						state = S.IGNORE_SPACES; | ||
|  | 						next = S.EQUALS; | ||
|  | 						continue; | ||
|  | 					} | ||
|  | 				case S.EQUALS: | ||
|  | 					switch (c) { | ||
|  | 						case '='.code: | ||
|  | 							state = S.IGNORE_SPACES; | ||
|  | 							next = S.ATTVAL_BEGIN; | ||
|  | 						default: | ||
|  | 							throw new XmlParserException("Expected =", str, p); | ||
|  | 					} | ||
|  | 				case S.ATTVAL_BEGIN: | ||
|  | 					switch (c) { | ||
|  | 						case '"'.code | '\''.code: | ||
|  | 							buf = new StringBuf(); | ||
|  | 							state = S.ATTRIB_VAL; | ||
|  | 							start = p + 1; | ||
|  | 							attrValQuote = c; | ||
|  | 						default: | ||
|  | 							throw new XmlParserException("Expected \"", str, p); | ||
|  | 					} | ||
|  | 				case S.ATTRIB_VAL: | ||
|  | 					switch (c) { | ||
|  | 						case '&'.code: | ||
|  | 							buf.addSub(str, start, p - start); | ||
|  | 							state = S.ESCAPE; | ||
|  | 							escapeNext = S.ATTRIB_VAL; | ||
|  | 							start = p + 1; | ||
|  | 						case '>'.code | '<'.code if (strict): | ||
|  | 							// HTML allows these in attributes values | ||
|  | 							throw new XmlParserException("Invalid unescaped " + String.fromCharCode(c) + " in attribute value", str, p); | ||
|  | 						case _ if (c == attrValQuote): | ||
|  | 							buf.addSub(str, start, p - start); | ||
|  | 							var val = buf.toString(); | ||
|  | 							buf = new StringBuf(); | ||
|  | 							xml.set(aname, val); | ||
|  | 							state = S.IGNORE_SPACES; | ||
|  | 							next = S.BODY; | ||
|  | 					} | ||
|  | 				case S.CHILDS: | ||
|  | 					p = doParse(str, strict, p, xml); | ||
|  | 					start = p; | ||
|  | 					state = S.BEGIN; | ||
|  | 				case S.WAIT_END: | ||
|  | 					switch (c) { | ||
|  | 						case '>'.code: | ||
|  | 							state = S.BEGIN; | ||
|  | 						default: | ||
|  | 							throw new XmlParserException("Expected >", str, p); | ||
|  | 					} | ||
|  | 				case S.WAIT_END_RET: | ||
|  | 					switch (c) { | ||
|  | 						case '>'.code: | ||
|  | 							if (nsubs == 0) | ||
|  | 								parent.addChild(Xml.createPCData("")); | ||
|  | 							return p; | ||
|  | 						default: | ||
|  | 							throw new XmlParserException("Expected >", str, p); | ||
|  | 					} | ||
|  | 				case S.CLOSE: | ||
|  | 					if (!isValidChar(c)) { | ||
|  | 						if (start == p) | ||
|  | 							throw new XmlParserException("Expected node name", str, p); | ||
|  | 
 | ||
|  | 						var v = str.substr(start, p - start); | ||
|  | 						if (parent == null || parent.nodeType != Element) { | ||
|  | 							throw new XmlParserException('Unexpected </$v>, tag is not open', str, p); | ||
|  | 						} | ||
|  | 						if (v != parent.nodeName) | ||
|  | 							throw new XmlParserException("Expected </" + parent.nodeName + ">", str, p); | ||
|  | 
 | ||
|  | 						state = S.IGNORE_SPACES; | ||
|  | 						next = S.WAIT_END_RET; | ||
|  | 						continue; | ||
|  | 					} | ||
|  | 				case S.COMMENT: | ||
|  | 					if (c == '-'.code && str.fastCodeAt(p + 1) == '-'.code && str.fastCodeAt(p + 2) == '>'.code) { | ||
|  | 						addChild(Xml.createComment(str.substr(start, p - start))); | ||
|  | 						p += 2; | ||
|  | 						state = S.BEGIN; | ||
|  | 					} | ||
|  | 				case S.DOCTYPE: | ||
|  | 					if (c == '['.code) | ||
|  | 						nbrackets++; | ||
|  | 					else if (c == ']'.code) | ||
|  | 						nbrackets--; | ||
|  | 					else if (c == '>'.code && nbrackets == 0) { | ||
|  | 						addChild(Xml.createDocType(str.substr(start, p - start))); | ||
|  | 						state = S.BEGIN; | ||
|  | 					} | ||
|  | 				case S.HEADER: | ||
|  | 					if (c == '?'.code && str.fastCodeAt(p + 1) == '>'.code) { | ||
|  | 						p++; | ||
|  | 						var str = str.substr(start + 1, p - start - 2); | ||
|  | 						addChild(Xml.createProcessingInstruction(str)); | ||
|  | 						state = S.BEGIN; | ||
|  | 					} | ||
|  | 				case S.ESCAPE: | ||
|  | 					if (c == ';'.code) { | ||
|  | 						var s = str.substr(start, p - start); | ||
|  | 						if (s.fastCodeAt(0) == '#'.code) { | ||
|  | 							var c = s.fastCodeAt(1) == 'x'.code ? Std.parseInt("0" + s.substr(1, s.length - 1)) : Std.parseInt(s.substr(1, s.length - 1)); | ||
|  | 							#if !(target.unicode) | ||
|  | 							if (c >= 128) { | ||
|  | 								// UTF8-encode it | ||
|  | 								if (c <= 0x7FF) { | ||
|  | 									buf.addChar(0xC0 | (c >> 6)); | ||
|  | 									buf.addChar(0x80 | (c & 63)); | ||
|  | 								} else if (c <= 0xFFFF) { | ||
|  | 									buf.addChar(0xE0 | (c >> 12)); | ||
|  | 									buf.addChar(0x80 | ((c >> 6) & 63)); | ||
|  | 									buf.addChar(0x80 | (c & 63)); | ||
|  | 								} else if (c <= 0x10FFFF) { | ||
|  | 									buf.addChar(0xF0 | (c >> 18)); | ||
|  | 									buf.addChar(0x80 | ((c >> 12) & 63)); | ||
|  | 									buf.addChar(0x80 | ((c >> 6) & 63)); | ||
|  | 									buf.addChar(0x80 | (c & 63)); | ||
|  | 								} else | ||
|  | 									throw new XmlParserException("Cannot encode UTF8-char " + c, str, p); | ||
|  | 							} else | ||
|  | 							#end | ||
|  | 							buf.addChar(c); | ||
|  | 						} else if (!escapes.exists(s)) { | ||
|  | 							if (strict) | ||
|  | 								throw new XmlParserException("Undefined entity: " + s, str, p); | ||
|  | 							buf.add('&$s;'); | ||
|  | 						} else { | ||
|  | 							buf.add(escapes.get(s)); | ||
|  | 						} | ||
|  | 						start = p + 1; | ||
|  | 						state = escapeNext; | ||
|  | 					} else if (!isValidChar(c) && c != "#".code) { | ||
|  | 						if (strict) | ||
|  | 							throw new XmlParserException("Invalid character in entity: " + String.fromCharCode(c), str, p); | ||
|  | 						buf.addChar("&".code); | ||
|  | 						buf.addSub(str, start, p - start); | ||
|  | 						p--; | ||
|  | 						start = p + 1; | ||
|  | 						state = escapeNext; | ||
|  | 					} | ||
|  | 			} | ||
|  | 			++p; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (state == S.BEGIN) { | ||
|  | 			start = p; | ||
|  | 			state = S.PCDATA; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (state == S.PCDATA) { | ||
|  | 			if (parent.nodeType == Element) { | ||
|  | 				throw new XmlParserException("Unclosed node <" + parent.nodeName + ">", str, p); | ||
|  | 			} | ||
|  | 			if (p != start || nsubs == 0) { | ||
|  | 				buf.addSub(str, start, p - start); | ||
|  | 				addChild(Xml.createPCData(buf.toString())); | ||
|  | 			} | ||
|  | 			return p; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (!strict && state == S.ESCAPE && escapeNext == S.PCDATA) { | ||
|  | 			buf.addChar("&".code); | ||
|  | 			buf.addSub(str, start, p - start); | ||
|  | 			addChild(Xml.createPCData(buf.toString())); | ||
|  | 			return p; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		throw new XmlParserException("Unexpected end", str, p); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	static inline function isValidChar(c) { | ||
|  | 		return (c >= 'a'.code && c <= 'z'.code) || (c >= 'A'.code && c <= 'Z'.code) || (c >= '0'.code && c <= '9'.code) || c == ':'.code || c == '.'.code | ||
|  | 			|| c == '_'.code || c == '-'.code; | ||
|  | 	} | ||
|  | } |