587 lines
20 KiB
JavaScript
587 lines
20 KiB
JavaScript
'use strict';
|
|
|
|
var conventions = require('./conventions');
|
|
var dom = require('./dom');
|
|
var errors = require('./errors');
|
|
var entities = require('./entities');
|
|
var sax = require('./sax');
|
|
|
|
var DOMImplementation = dom.DOMImplementation;
|
|
|
|
var hasDefaultHTMLNamespace = conventions.hasDefaultHTMLNamespace;
|
|
var isHTMLMimeType = conventions.isHTMLMimeType;
|
|
var isValidMimeType = conventions.isValidMimeType;
|
|
var MIME_TYPE = conventions.MIME_TYPE;
|
|
var NAMESPACE = conventions.NAMESPACE;
|
|
var ParseError = errors.ParseError;
|
|
|
|
var XMLReader = sax.XMLReader;
|
|
|
|
/**
|
|
* Normalizes line ending according to <https://www.w3.org/TR/xml11/#sec-line-ends>,
|
|
* including some Unicode "newline" characters:
|
|
*
|
|
* > XML parsed entities are often stored in computer files which,
|
|
* > for editing convenience, are organized into lines.
|
|
* > These lines are typically separated by some combination
|
|
* > of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA).
|
|
* >
|
|
* > To simplify the tasks of applications, the XML processor must behave
|
|
* > as if it normalized all line breaks in external parsed entities (including the document entity)
|
|
* > on input, before parsing, by translating the following to a single #xA character:
|
|
* >
|
|
* > 1. the two-character sequence #xD #xA,
|
|
* > 2. the two-character sequence #xD #x85,
|
|
* > 3. the single character #x85,
|
|
* > 4. the single character #x2028,
|
|
* > 5. the single character #x2029,
|
|
* > 6. any #xD character that is not immediately followed by #xA or #x85.
|
|
*
|
|
* @param {string} input
|
|
* @returns {string}
|
|
* @prettierignore
|
|
*/
|
|
function normalizeLineEndings(input) {
|
|
return input.replace(/\r[\n\u0085]/g, '\n').replace(/[\r\u0085\u2028\u2029]/g, '\n');
|
|
}
|
|
|
|
/**
|
|
* @typedef Locator
|
|
* @property {number} [columnNumber]
|
|
* @property {number} [lineNumber]
|
|
*/
|
|
|
|
/**
|
|
* @typedef DOMParserOptions
|
|
* @property {typeof assign} [assign]
|
|
* The method to use instead of `conventions.assign`, which is used to copy values from
|
|
* `options` before they are used for parsing.
|
|
* @property {typeof DOMHandler} [domHandler]
|
|
* For internal testing: The class for creating an instance for handling events from the SAX
|
|
* parser.
|
|
* *****Warning: By configuring a faulty implementation, the specified behavior can completely
|
|
* be broken.*****.
|
|
* @property {Function} [errorHandler]
|
|
* DEPRECATED! use `onError` instead.
|
|
* @property {function(level:ErrorLevel, message:string, context: DOMHandler):void}
|
|
* [onError]
|
|
* A function invoked for every error that occurs during parsing.
|
|
*
|
|
* If it is not provided, all errors are reported to `console.error`
|
|
* and only `fatalError`s are thrown as a `ParseError`,
|
|
* which prevents any further processing.
|
|
* If the provided method throws, a `ParserError` is thrown,
|
|
* which prevents any further processing.
|
|
*
|
|
* Be aware that many `warning`s are considered an error that prevents further processing in
|
|
* most implementations.
|
|
* @property {boolean} [locator=true]
|
|
* Configures if the nodes created during parsing will have a `lineNumber` and a `columnNumber`
|
|
* attribute describing their location in the XML string.
|
|
* Default is true.
|
|
* @property {(string) => string} [normalizeLineEndings]
|
|
* used to replace line endings before parsing, defaults to exported `normalizeLineEndings`,
|
|
* which normalizes line endings according to <https://www.w3.org/TR/xml11/#sec-line-ends>,
|
|
* including some Unicode "newline" characters.
|
|
* @property {Object} [xmlns]
|
|
* The XML namespaces that should be assumed when parsing.
|
|
* The default namespace can be provided by the key that is the empty string.
|
|
* When the `mimeType` for HTML, XHTML or SVG are passed to `parseFromString`,
|
|
* the default namespace that will be used,
|
|
* will be overridden according to the specification.
|
|
* @see {@link normalizeLineEndings}
|
|
*/
|
|
|
|
/**
|
|
* The DOMParser interface provides the ability to parse XML or HTML source code from a string
|
|
* into a DOM `Document`.
|
|
*
|
|
* ***xmldom is different from the spec in that it allows an `options` parameter,
|
|
* to control the behavior***.
|
|
*
|
|
* @class
|
|
* @param {DOMParserOptions} [options]
|
|
* @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser
|
|
* @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
|
|
*/
|
|
function DOMParser(options) {
|
|
options = options || {};
|
|
if (options.locator === undefined) {
|
|
options.locator = true;
|
|
}
|
|
|
|
/**
|
|
* The method to use instead of `conventions.assign`, which is used to copy values from
|
|
* `options`
|
|
* before they are used for parsing.
|
|
*
|
|
* @type {conventions.assign}
|
|
* @private
|
|
* @see {@link conventions.assign}
|
|
* @readonly
|
|
*/
|
|
this.assign = options.assign || conventions.assign;
|
|
|
|
/**
|
|
* For internal testing: The class for creating an instance for handling events from the SAX
|
|
* parser.
|
|
* *****Warning: By configuring a faulty implementation, the specified behavior can completely
|
|
* be broken*****.
|
|
*
|
|
* @type {typeof DOMHandler}
|
|
* @private
|
|
* @readonly
|
|
*/
|
|
this.domHandler = options.domHandler || DOMHandler;
|
|
|
|
/**
|
|
* A function that is invoked for every error that occurs during parsing.
|
|
*
|
|
* If it is not provided, all errors are reported to `console.error`
|
|
* and only `fatalError`s are thrown as a `ParseError`,
|
|
* which prevents any further processing.
|
|
* If the provided method throws, a `ParserError` is thrown,
|
|
* which prevents any further processing.
|
|
*
|
|
* Be aware that many `warning`s are considered an error that prevents further processing in
|
|
* most implementations.
|
|
*
|
|
* @type {function(level:ErrorLevel, message:string, context: DOMHandler):void}
|
|
* @see {@link onErrorStopParsing}
|
|
* @see {@link onWarningStopParsing}
|
|
*/
|
|
this.onError = options.onError || options.errorHandler;
|
|
if (options.errorHandler && typeof options.errorHandler !== 'function') {
|
|
throw new TypeError('errorHandler object is no longer supported, switch to onError!');
|
|
} else if (options.errorHandler) {
|
|
options.errorHandler('warning', 'The `errorHandler` option has been deprecated, use `onError` instead!', this);
|
|
}
|
|
|
|
/**
|
|
* used to replace line endings before parsing, defaults to `normalizeLineEndings`
|
|
*
|
|
* @type {(string) => string}
|
|
* @readonly
|
|
*/
|
|
this.normalizeLineEndings = options.normalizeLineEndings || normalizeLineEndings;
|
|
|
|
/**
|
|
* Configures if the nodes created during parsing will have a `lineNumber` and a
|
|
* `columnNumber`
|
|
* attribute describing their location in the XML string.
|
|
* Default is true.
|
|
*
|
|
* @type {boolean}
|
|
* @readonly
|
|
*/
|
|
this.locator = !!options.locator;
|
|
|
|
/**
|
|
* The default namespace can be provided by the key that is the empty string.
|
|
* When the `mimeType` for HTML, XHTML or SVG are passed to `parseFromString`,
|
|
* the default namespace that will be used,
|
|
* will be overridden according to the specification.
|
|
*
|
|
* @type {Readonly<Object>}
|
|
* @readonly
|
|
*/
|
|
this.xmlns = this.assign(Object.create(null), options.xmlns);
|
|
}
|
|
|
|
/**
|
|
* Parses `source` using the options in the way configured by the `DOMParserOptions` of `this`
|
|
* `DOMParser`. If `mimeType` is `text/html` an HTML `Document` is created,
|
|
* otherwise an XML `Document` is created.
|
|
*
|
|
* __It behaves different from the description in the living standard__:
|
|
* - Uses the `options` passed to the `DOMParser` constructor to modify the behavior.
|
|
* - Any unexpected input is reported to `onError` with either a `warning`,
|
|
* `error` or `fatalError` level.
|
|
* - Any `fatalError` throws a `ParseError` which prevents further processing.
|
|
* - Any error thrown by `onError` is converted to a `ParseError` which prevents further
|
|
* processing - If no `Document` was created during parsing it is reported as a `fatalError`.
|
|
* *****Warning: By configuring a faulty DOMHandler implementation,
|
|
* the specified behavior can completely be broken*****.
|
|
*
|
|
* @param {string} source
|
|
* The XML mime type only allows string input!
|
|
* @param {string} [mimeType='application/xml']
|
|
* the mimeType or contentType of the document to be created determines the `type` of document
|
|
* created (XML or HTML)
|
|
* @returns {Document}
|
|
* The `Document` node.
|
|
* @throws {ParseError}
|
|
* for any `fatalError` or anything that is thrown by `onError`
|
|
* @throws {TypeError}
|
|
* for any invalid `mimeType`
|
|
* @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser/parseFromString
|
|
* @see https://html.spec.whatwg.org/#dom-domparser-parsefromstring-dev
|
|
*/
|
|
DOMParser.prototype.parseFromString = function (source, mimeType) {
|
|
if (!isValidMimeType(mimeType)) {
|
|
throw new TypeError('DOMParser.parseFromString: the provided mimeType "' + mimeType + '" is not valid.');
|
|
}
|
|
var defaultNSMap = this.assign(Object.create(null), this.xmlns);
|
|
var entityMap = entities.XML_ENTITIES;
|
|
var defaultNamespace = defaultNSMap[''] || null;
|
|
if (hasDefaultHTMLNamespace(mimeType)) {
|
|
entityMap = entities.HTML_ENTITIES;
|
|
defaultNamespace = NAMESPACE.HTML;
|
|
} else if (mimeType === MIME_TYPE.XML_SVG_IMAGE) {
|
|
defaultNamespace = NAMESPACE.SVG;
|
|
}
|
|
defaultNSMap[''] = defaultNamespace;
|
|
defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML;
|
|
|
|
var domBuilder = new this.domHandler({
|
|
mimeType: mimeType,
|
|
defaultNamespace: defaultNamespace,
|
|
onError: this.onError,
|
|
});
|
|
var locator = this.locator ? {} : undefined;
|
|
if (this.locator) {
|
|
domBuilder.setDocumentLocator(locator);
|
|
}
|
|
|
|
var sax = new XMLReader();
|
|
sax.errorHandler = domBuilder;
|
|
sax.domBuilder = domBuilder;
|
|
var isXml = !conventions.isHTMLMimeType(mimeType);
|
|
if (isXml && typeof source !== 'string') {
|
|
sax.errorHandler.fatalError('source is not a string');
|
|
}
|
|
sax.parse(this.normalizeLineEndings(String(source)), defaultNSMap, entityMap);
|
|
if (!domBuilder.doc.documentElement) {
|
|
sax.errorHandler.fatalError('missing root element');
|
|
}
|
|
return domBuilder.doc;
|
|
};
|
|
|
|
/**
|
|
* @typedef DOMHandlerOptions
|
|
* @property {string} [mimeType=MIME_TYPE.XML_APPLICATION]
|
|
* @property {string | null} [defaultNamespace=null]
|
|
*/
|
|
/**
|
|
* The class that is used to handle events from the SAX parser to create the related DOM
|
|
* elements.
|
|
*
|
|
* Some methods are only implemented as an empty function,
|
|
* since they are (at least currently) not relevant for xmldom.
|
|
*
|
|
* @class
|
|
* @param {DOMHandlerOptions} [options]
|
|
* @see http://www.saxproject.org/apidoc/org/xml/sax/ext/DefaultHandler2.html
|
|
*/
|
|
function DOMHandler(options) {
|
|
var opt = options || {};
|
|
/**
|
|
* The mime type is used to determine if the DOM handler will create an XML or HTML document.
|
|
* Only if it is set to `text/html` it will create an HTML document.
|
|
* It defaults to MIME_TYPE.XML_APPLICATION.
|
|
*
|
|
* @type {string}
|
|
* @see {@link MIME_TYPE}
|
|
* @readonly
|
|
*/
|
|
this.mimeType = opt.mimeType || MIME_TYPE.XML_APPLICATION;
|
|
|
|
/**
|
|
* The namespace to use to create an XML document.
|
|
* For the following reasons this is required:
|
|
* - The SAX API for `startDocument` doesn't offer any way to pass a namespace,
|
|
* since at that point there is no way for the parser to know what the default namespace from
|
|
* the document will be.
|
|
* - When creating using `DOMImplementation.createDocument` it is required to pass a
|
|
* namespace,
|
|
* to determine the correct `Document.contentType`, which should match `this.mimeType`.
|
|
* - When parsing an XML document with the `application/xhtml+xml` mimeType,
|
|
* the HTML namespace needs to be the default namespace.
|
|
*
|
|
* @type {string | null}
|
|
* @private
|
|
* @readonly
|
|
*/
|
|
this.defaultNamespace = opt.defaultNamespace || null;
|
|
|
|
/**
|
|
* @type {boolean}
|
|
* @private
|
|
*/
|
|
this.cdata = false;
|
|
|
|
/**
|
|
* The last `Element` that was created by `startElement`.
|
|
* `endElement` sets it to the `currentElement.parentNode`.
|
|
*
|
|
* Note: The sax parser currently sets it to white space text nodes between tags.
|
|
*
|
|
* @type {Element | Node | undefined}
|
|
* @private
|
|
*/
|
|
this.currentElement = undefined;
|
|
|
|
/**
|
|
* The Document that is created as part of `startDocument`,
|
|
* and returned by `DOMParser.parseFromString`.
|
|
*
|
|
* @type {Document | undefined}
|
|
* @readonly
|
|
*/
|
|
this.doc = undefined;
|
|
|
|
/**
|
|
* The locator is stored as part of setDocumentLocator.
|
|
* It is controlled and mutated by the SAX parser to store the current parsing position.
|
|
* It is used by DOMHandler to set `columnNumber` and `lineNumber`
|
|
* on the DOM nodes.
|
|
*
|
|
* @type {Readonly<Locator> | undefined}
|
|
* @private
|
|
* @readonly (the
|
|
* sax parser currently sometimes set's it)
|
|
*/
|
|
this.locator = undefined;
|
|
/**
|
|
* @type {function (level:ErrorLevel ,message:string, context:DOMHandler):void}
|
|
* @readonly
|
|
*/
|
|
this.onError = opt.onError;
|
|
}
|
|
|
|
function position(locator, node) {
|
|
node.lineNumber = locator.lineNumber;
|
|
node.columnNumber = locator.columnNumber;
|
|
}
|
|
|
|
DOMHandler.prototype = {
|
|
/**
|
|
* Either creates an XML or an HTML document and stores it under `this.doc`.
|
|
* If it is an XML document, `this.defaultNamespace` is used to create it,
|
|
* and it will not contain any `childNodes`.
|
|
* If it is an HTML document, it will be created without any `childNodes`.
|
|
*
|
|
* @see http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
|
|
*/
|
|
startDocument: function () {
|
|
var impl = new DOMImplementation();
|
|
this.doc = isHTMLMimeType(this.mimeType) ? impl.createHTMLDocument(false) : impl.createDocument(this.defaultNamespace, '');
|
|
},
|
|
startElement: function (namespaceURI, localName, qName, attrs) {
|
|
var doc = this.doc;
|
|
var el = doc.createElementNS(namespaceURI, qName || localName);
|
|
var len = attrs.length;
|
|
appendElement(this, el);
|
|
this.currentElement = el;
|
|
|
|
this.locator && position(this.locator, el);
|
|
for (var i = 0; i < len; i++) {
|
|
var namespaceURI = attrs.getURI(i);
|
|
var value = attrs.getValue(i);
|
|
var qName = attrs.getQName(i);
|
|
var attr = doc.createAttributeNS(namespaceURI, qName);
|
|
this.locator && position(attrs.getLocator(i), attr);
|
|
attr.value = attr.nodeValue = value;
|
|
el.setAttributeNode(attr);
|
|
}
|
|
},
|
|
endElement: function (namespaceURI, localName, qName) {
|
|
this.currentElement = this.currentElement.parentNode;
|
|
},
|
|
startPrefixMapping: function (prefix, uri) {},
|
|
endPrefixMapping: function (prefix) {},
|
|
processingInstruction: function (target, data) {
|
|
var ins = this.doc.createProcessingInstruction(target, data);
|
|
this.locator && position(this.locator, ins);
|
|
appendElement(this, ins);
|
|
},
|
|
ignorableWhitespace: function (ch, start, length) {},
|
|
characters: function (chars, start, length) {
|
|
chars = _toString.apply(this, arguments);
|
|
//console.log(chars)
|
|
if (chars) {
|
|
if (this.cdata) {
|
|
var charNode = this.doc.createCDATASection(chars);
|
|
} else {
|
|
var charNode = this.doc.createTextNode(chars);
|
|
}
|
|
if (this.currentElement) {
|
|
this.currentElement.appendChild(charNode);
|
|
} else if (/^\s*$/.test(chars)) {
|
|
this.doc.appendChild(charNode);
|
|
//process xml
|
|
}
|
|
this.locator && position(this.locator, charNode);
|
|
}
|
|
},
|
|
skippedEntity: function (name) {},
|
|
endDocument: function () {
|
|
this.doc.normalize();
|
|
},
|
|
/**
|
|
* Stores the locator to be able to set the `columnNumber` and `lineNumber`
|
|
* on the created DOM nodes.
|
|
*
|
|
* @param {Locator} locator
|
|
*/
|
|
setDocumentLocator: function (locator) {
|
|
if (locator) {
|
|
locator.lineNumber = 0;
|
|
}
|
|
this.locator = locator;
|
|
},
|
|
//LexicalHandler
|
|
comment: function (chars, start, length) {
|
|
chars = _toString.apply(this, arguments);
|
|
var comm = this.doc.createComment(chars);
|
|
this.locator && position(this.locator, comm);
|
|
appendElement(this, comm);
|
|
},
|
|
|
|
startCDATA: function () {
|
|
//used in characters() methods
|
|
this.cdata = true;
|
|
},
|
|
endCDATA: function () {
|
|
this.cdata = false;
|
|
},
|
|
|
|
startDTD: function (name, publicId, systemId, internalSubset) {
|
|
var impl = this.doc.implementation;
|
|
if (impl && impl.createDocumentType) {
|
|
var dt = impl.createDocumentType(name, publicId, systemId, internalSubset);
|
|
this.locator && position(this.locator, dt);
|
|
appendElement(this, dt);
|
|
this.doc.doctype = dt;
|
|
}
|
|
},
|
|
reportError: function (level, message) {
|
|
if (typeof this.onError === 'function') {
|
|
try {
|
|
this.onError(level, message, this);
|
|
} catch (e) {
|
|
throw new ParseError('Reporting ' + level + ' "' + message + '" caused ' + e, this.locator);
|
|
}
|
|
} else {
|
|
console.error('[xmldom ' + level + ']\t' + message, _locator(this.locator));
|
|
}
|
|
},
|
|
/**
|
|
* @see http://www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html
|
|
*/
|
|
warning: function (message) {
|
|
this.reportError('warning', message);
|
|
},
|
|
error: function (message) {
|
|
this.reportError('error', message);
|
|
},
|
|
/**
|
|
* This function reports a fatal error and throws a ParseError.
|
|
*
|
|
* @param {string} message
|
|
* - The message to be used for reporting and throwing the error.
|
|
* @returns {never}
|
|
* This function always throws an error and never returns a value.
|
|
* @throws {ParseError}
|
|
* Always throws a ParseError with the provided message.
|
|
*/
|
|
fatalError: function (message) {
|
|
this.reportError('fatalError', message);
|
|
throw new ParseError(message, this.locator);
|
|
},
|
|
};
|
|
|
|
function _locator(l) {
|
|
if (l) {
|
|
return '\n@#[line:' + l.lineNumber + ',col:' + l.columnNumber + ']';
|
|
}
|
|
}
|
|
|
|
function _toString(chars, start, length) {
|
|
if (typeof chars == 'string') {
|
|
return chars.substr(start, length);
|
|
} else {
|
|
//java sax connect width xmldom on rhino(what about: "? && !(chars instanceof String)")
|
|
if (chars.length >= start + length || start) {
|
|
return new java.lang.String(chars, start, length) + '';
|
|
}
|
|
return chars;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/ext/LexicalHandler.html
|
|
* used method of org.xml.sax.ext.LexicalHandler:
|
|
* #comment(chars, start, length)
|
|
* #startCDATA()
|
|
* #endCDATA()
|
|
* #startDTD(name, publicId, systemId)
|
|
*
|
|
*
|
|
* IGNORED method of org.xml.sax.ext.LexicalHandler:
|
|
* #endDTD()
|
|
* #startEntity(name)
|
|
* #endEntity(name)
|
|
*
|
|
*
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/ext/DeclHandler.html
|
|
* IGNORED method of org.xml.sax.ext.DeclHandler
|
|
* #attributeDecl(eName, aName, type, mode, value)
|
|
* #elementDecl(name, model)
|
|
* #externalEntityDecl(name, publicId, systemId)
|
|
* #internalEntityDecl(name, value)
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/ext/EntityResolver2.html
|
|
* IGNORED method of org.xml.sax.EntityResolver2
|
|
* #resolveEntity(String name,String publicId,String baseURI,String systemId)
|
|
* #resolveEntity(publicId, systemId)
|
|
* #getExternalSubset(name, baseURI)
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/DTDHandler.html
|
|
* IGNORED method of org.xml.sax.DTDHandler
|
|
* #notationDecl(name, publicId, systemId) {};
|
|
* #unparsedEntityDecl(name, publicId, systemId, notationName) {};
|
|
*/
|
|
'endDTD,startEntity,endEntity,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,resolveEntity,getExternalSubset,notationDecl,unparsedEntityDecl'.replace(
|
|
/\w+/g,
|
|
function (key) {
|
|
DOMHandler.prototype[key] = function () {
|
|
return null;
|
|
};
|
|
}
|
|
);
|
|
|
|
/* Private static helpers treated below as private instance methods, so don't need to add these to the public API; we might use a Relator to also get rid of non-standard public properties */
|
|
function appendElement(handler, node) {
|
|
if (!handler.currentElement) {
|
|
handler.doc.appendChild(node);
|
|
} else {
|
|
handler.currentElement.appendChild(node);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A method that prevents any further parsing when an `error`
|
|
* with level `error` is reported during parsing.
|
|
*
|
|
* @see {@link DOMParserOptions.onError}
|
|
* @see {@link onWarningStopParsing}
|
|
*/
|
|
function onErrorStopParsing(level) {
|
|
if (level === 'error') throw 'onErrorStopParsing';
|
|
}
|
|
|
|
/**
|
|
* A method that prevents any further parsing when any `error` is reported during parsing.
|
|
*
|
|
* @see {@link DOMParserOptions.onError}
|
|
* @see {@link onErrorStopParsing}
|
|
*/
|
|
function onWarningStopParsing() {
|
|
throw 'onWarningStopParsing';
|
|
}
|
|
|
|
exports.__DOMHandler = DOMHandler;
|
|
exports.DOMParser = DOMParser;
|
|
exports.normalizeLineEndings = normalizeLineEndings;
|
|
exports.onErrorStopParsing = onErrorStopParsing;
|
|
exports.onWarningStopParsing = onWarningStopParsing;
|