"use strict";
|
|
const parse5 = require("parse5");
|
const sax = require("sax");
|
const attributes = require("../living/attributes");
|
const DocumentType = require("../living/generated/DocumentType");
|
const JSDOMParse5Adapter = require("./parse5-adapter-parsing");
|
const { HTML_NS } = require("../living/helpers/namespaces");
|
|
// Horrible monkey-patch to implement https://github.com/inikulin/parse5/issues/237
|
const OpenElementStack = require("parse5/lib/parser/open_element_stack");
|
const originalPop = OpenElementStack.prototype.pop;
|
OpenElementStack.prototype.pop = function (...args) {
|
const before = this.items[this.stackTop];
|
originalPop.apply(this, args);
|
if (before._poppedOffStackOfOpenElements) {
|
before._poppedOffStackOfOpenElements();
|
}
|
};
|
|
const originalPush = OpenElementStack.prototype.push;
|
OpenElementStack.prototype.push = function (...args) {
|
originalPush.apply(this, args);
|
const after = this.items[this.stackTop];
|
if (after._pushedOnStackOfOpenElements) {
|
after._pushedOnStackOfOpenElements();
|
}
|
};
|
|
module.exports = class HTMLToDOM {
|
constructor(parsingMode) {
|
this.parser = parsingMode === "xml" ? sax : parse5;
|
}
|
|
appendToNode(html, node) {
|
html = String(html);
|
|
return this._doParse(html, true, node);
|
}
|
|
appendToDocument(html, documentImpl) {
|
html = String(html);
|
|
return this._doParse(html, false, documentImpl, documentImpl._parseOptions);
|
}
|
|
_doParse(...args) {
|
return this.parser === parse5 ? this._parseWithParse5(...args) : this._parseWithSax(...args);
|
}
|
|
_parseWithParse5(html, isFragment, contextNode, options = {}) {
|
const adapter = new JSDOMParse5Adapter(contextNode._ownerDocument || contextNode);
|
options.treeAdapter = adapter;
|
|
if (isFragment) {
|
const fragment = this.parser.parseFragment(contextNode, html, options);
|
|
if (contextNode._templateContents) {
|
contextNode._templateContents.appendChild(fragment);
|
} else {
|
contextNode.appendChild(fragment);
|
}
|
} else {
|
this.parser.parse(html, options);
|
}
|
|
return contextNode;
|
}
|
|
_parseWithSax(html, isFragment, contextNode) {
|
const SaxParser = this.parser.parser;
|
const parser = new SaxParser(/* strict = */true, { xmlns: true, strictEntities: true });
|
parser.noscript = false;
|
parser.looseCase = "toString";
|
const openStack = [contextNode];
|
parser.ontext = text => {
|
setChildForSax(openStack[openStack.length - 1], {
|
type: "text",
|
data: text
|
});
|
};
|
parser.oncdata = cdata => {
|
setChildForSax(openStack[openStack.length - 1], {
|
type: "cdata",
|
data: cdata
|
});
|
};
|
parser.onopentag = arg => {
|
const attrs = Object.keys(arg.attributes).map(key => {
|
const rawAttribute = arg.attributes[key];
|
|
let { prefix } = rawAttribute;
|
let localName = rawAttribute.local;
|
if (prefix === "xmlns" && localName === "") {
|
// intended weirdness in node-sax, see https://github.com/isaacs/sax-js/issues/165
|
localName = prefix;
|
prefix = null;
|
}
|
|
if (prefix === "") {
|
prefix = null;
|
}
|
|
const namespace = rawAttribute.uri === "" ? null : rawAttribute.uri;
|
|
return { name: rawAttribute.name, value: rawAttribute.value, prefix, localName, namespace };
|
});
|
const tag = {
|
type: "tag",
|
name: arg.local,
|
prefix: arg.prefix,
|
namespace: arg.uri,
|
attributes: attrs
|
};
|
|
if (arg.local === "script" && arg.uri === HTML_NS) {
|
openStack.push(tag);
|
} else {
|
const elem = setChildForSax(openStack[openStack.length - 1], tag);
|
openStack.push(elem);
|
}
|
};
|
parser.onclosetag = () => {
|
const elem = openStack.pop();
|
if (elem.constructor.name === "Object") { // we have an empty script tag
|
setChildForSax(openStack[openStack.length - 1], elem);
|
}
|
};
|
parser.onscript = scriptText => {
|
const tag = openStack.pop();
|
tag.children = [{ type: "text", data: scriptText }];
|
const elem = setChildForSax(openStack[openStack.length - 1], tag);
|
openStack.push(elem);
|
};
|
parser.oncomment = comment => {
|
setChildForSax(openStack[openStack.length - 1], {
|
type: "comment",
|
data: comment
|
});
|
};
|
parser.onprocessinginstruction = pi => {
|
setChildForSax(openStack[openStack.length - 1], {
|
type: "directive",
|
name: "?" + pi.name,
|
data: "?" + pi.name + " " + pi.body + "?"
|
});
|
};
|
parser.ondoctype = dt => {
|
setChildForSax(openStack[openStack.length - 1], {
|
type: "directive",
|
name: "!doctype",
|
data: "!doctype " + dt
|
});
|
|
const entityMatcher = /<!ENTITY ([^ ]+) "([^"]+)">/g;
|
let result;
|
while ((result = entityMatcher.exec(dt))) {
|
const [, name, value] = result;
|
if (!(name in parser.ENTITIES)) {
|
parser.ENTITIES[name] = value;
|
}
|
}
|
};
|
|
parser.onerror = err => {
|
throw err;
|
};
|
parser.write(html).close();
|
}
|
};
|
|
function setChildForSax(parentImpl, node) {
|
const currentDocument = (parentImpl && parentImpl._ownerDocument) || parentImpl;
|
|
let newNode;
|
let isTemplateContents = false;
|
switch (node.type) {
|
case "tag":
|
case "script":
|
case "style":
|
newNode = currentDocument._createElementWithCorrectElementInterface(node.name, node.namespace);
|
newNode._prefix = node.prefix || null;
|
newNode._namespaceURI = node.namespace || null;
|
break;
|
|
case "root":
|
// If we are in <template> then add all children to the parent's _templateContents; skip this virtual root node.
|
if (parentImpl.tagName === "TEMPLATE" && parentImpl._namespaceURI === HTML_NS) {
|
newNode = parentImpl._templateContents;
|
isTemplateContents = true;
|
}
|
break;
|
|
case "text":
|
// HTML entities should already be decoded by the parser, so no need to decode them
|
newNode = currentDocument.createTextNode(node.data);
|
break;
|
|
case "cdata":
|
newNode = currentDocument.createCDATASection(node.data);
|
break;
|
|
case "comment":
|
newNode = currentDocument.createComment(node.data);
|
break;
|
|
case "directive":
|
if (node.name[0] === "?" && node.name.toLowerCase() !== "?xml") {
|
const data = node.data.slice(node.name.length + 1, -1);
|
newNode = currentDocument.createProcessingInstruction(node.name.substring(1), data);
|
} else if (node.name.toLowerCase() === "!doctype") {
|
newNode = parseDocType(currentDocument, "<" + node.data + ">");
|
}
|
break;
|
}
|
|
if (!newNode) {
|
return null;
|
}
|
|
if (node.attributes) {
|
for (const a of node.attributes) {
|
attributes.setAttributeValue(newNode, a.localName, a.value, a.prefix, a.namespace);
|
}
|
}
|
|
if (node.children) {
|
for (let c = 0; c < node.children.length; c++) {
|
setChildForSax(newNode, node.children[c]);
|
}
|
}
|
|
if (!isTemplateContents) {
|
if (parentImpl._templateContents) {
|
// Setting innerHTML on a <template>
|
parentImpl._templateContents.appendChild(newNode);
|
} else {
|
parentImpl.appendChild(newNode);
|
}
|
}
|
|
return newNode;
|
}
|
|
const HTML5_DOCTYPE = /<!doctype html>/i;
|
const PUBLIC_DOCTYPE = /<!doctype\s+([^\s]+)\s+public\s+"([^"]+)"\s+"([^"]+)"/i;
|
const SYSTEM_DOCTYPE = /<!doctype\s+([^\s]+)\s+system\s+"([^"]+)"/i;
|
|
function parseDocType(doc, html) {
|
if (HTML5_DOCTYPE.test(html)) {
|
return createDocumentTypeInternal(doc, "html", "", "");
|
}
|
|
const publicPieces = PUBLIC_DOCTYPE.exec(html);
|
if (publicPieces) {
|
return createDocumentTypeInternal(doc, publicPieces[1], publicPieces[2], publicPieces[3]);
|
}
|
|
const systemPieces = SYSTEM_DOCTYPE.exec(html);
|
if (systemPieces) {
|
return createDocumentTypeInternal(doc, systemPieces[1], "", systemPieces[2]);
|
}
|
|
// Shouldn't get here (the parser shouldn't let us know about invalid doctypes), but our logic likely isn't
|
// real-world perfect, so let's fallback.
|
return createDocumentTypeInternal(doc, "html", "", "");
|
}
|
|
function createDocumentTypeInternal(ownerDocument, name, publicId, systemId) {
|
return DocumentType.createImpl([], { ownerDocument, name, publicId, systemId });
|
}
|