544 lines
18 KiB
JavaScript
544 lines
18 KiB
JavaScript
import { Debugger } from '../common/debugger.js';
|
|
import * as DomUtil from '../common/dom_util.js';
|
|
import { Engine } from '../common/engine.js';
|
|
import { NamedSymbol } from '../semantic_tree/semantic_attr.js';
|
|
import { SemanticRole, SemanticType } from '../semantic_tree/semantic_meaning.js';
|
|
import { SemanticHeuristics } from '../semantic_tree/semantic_heuristic_factory.js';
|
|
import { SemanticSkeleton } from '../semantic_tree/semantic_skeleton.js';
|
|
import * as SemanticUtil from '../semantic_tree/semantic_util.js';
|
|
import { MMLTAGS } from '../semantic_tree/semantic_util.js';
|
|
import * as EnrichAttr from './enrich_attr.js';
|
|
import { getCase } from './enrich_case.js';
|
|
const SETTINGS = {
|
|
collapsed: true,
|
|
implicit: true,
|
|
wiki: true
|
|
};
|
|
const IDS = new Map();
|
|
export function enrich(mml, semantic) {
|
|
IDS.clear();
|
|
const oldMml = DomUtil.cloneNode(mml);
|
|
walkTree(semantic.root);
|
|
if (Engine.getInstance().structure) {
|
|
mml.setAttribute(EnrichAttr.Attribute.STRUCTURE, SemanticSkeleton.fromStructure(mml, semantic).toString());
|
|
}
|
|
Debugger.getInstance().generateOutput(() => [
|
|
formattedOutput(oldMml, 'Original MathML', SETTINGS.wiki),
|
|
formattedOutput(semantic, 'Semantic Tree', SETTINGS.wiki),
|
|
formattedOutput(mml, 'Semantically enriched MathML', SETTINGS.wiki)
|
|
]);
|
|
return mml;
|
|
}
|
|
export function walkTree(semantic) {
|
|
Debugger.getInstance().output('WALKING START: ' + semantic.toString());
|
|
const specialCase = getCase(semantic);
|
|
let newNode;
|
|
if (specialCase) {
|
|
newNode = specialCase.getMathml();
|
|
Debugger.getInstance().output('WALKING END: ' + semantic.toString());
|
|
return ascendNewNode(newNode);
|
|
}
|
|
if (semantic.mathml.length === 1) {
|
|
Debugger.getInstance().output('Walktree Case 0');
|
|
if (!semantic.childNodes.length) {
|
|
Debugger.getInstance().output('Walktree Case 0.1');
|
|
newNode = semantic.mathml[0];
|
|
EnrichAttr.setAttributes(newNode, semantic);
|
|
Debugger.getInstance().output('WALKING END: ' + semantic.toString());
|
|
return ascendNewNode(newNode);
|
|
}
|
|
const fchild = semantic.childNodes[0];
|
|
if (semantic.childNodes.length === 1 &&
|
|
fchild.type === SemanticType.EMPTY) {
|
|
Debugger.getInstance().output('Walktree Case 0.2');
|
|
newNode = semantic.mathml[0];
|
|
EnrichAttr.setAttributes(newNode, semantic);
|
|
newNode.appendChild(walkTree(fchild));
|
|
Debugger.getInstance().output('WALKING END: ' + semantic.toString());
|
|
return ascendNewNode(newNode);
|
|
}
|
|
semantic.childNodes.forEach((child) => {
|
|
if (!child.mathml.length) {
|
|
child.mathml = [createInvisibleOperator(child)];
|
|
}
|
|
});
|
|
}
|
|
const newContent = semantic.contentNodes.map(cloneContentNode);
|
|
setOperatorAttribute(semantic, newContent);
|
|
const newChildren = semantic.childNodes.map(walkTree);
|
|
const childrenList = SemanticSkeleton.combineContentChildren(semantic.type, semantic.role, newContent, newChildren);
|
|
newNode = semantic.mathmlTree;
|
|
if (newNode === null) {
|
|
Debugger.getInstance().output('Walktree Case 1');
|
|
newNode = introduceNewLayer(childrenList, semantic);
|
|
}
|
|
else {
|
|
const attached = attachedElement(childrenList);
|
|
Debugger.getInstance().output('Walktree Case 2');
|
|
if (attached) {
|
|
Debugger.getInstance().output('Walktree Case 2.1');
|
|
newNode = parentNode(attached);
|
|
}
|
|
else {
|
|
Debugger.getInstance().output('Walktree Case 2.2');
|
|
newNode = getInnerNode(newNode);
|
|
}
|
|
}
|
|
newNode = rewriteMfenced(newNode);
|
|
mergeChildren(newNode, childrenList, semantic);
|
|
if (!IDS.has(semantic.id)) {
|
|
IDS.set(semantic.id, true);
|
|
EnrichAttr.setAttributes(newNode, semantic);
|
|
}
|
|
Debugger.getInstance().output('WALKING END: ' + semantic.toString());
|
|
return ascendNewNode(newNode);
|
|
}
|
|
export function introduceNewLayer(children, semantic) {
|
|
const lca = mathmlLca(children);
|
|
let newNode = lca.node;
|
|
const info = lca.type;
|
|
if (info !== lcaType.VALID ||
|
|
!SemanticUtil.hasEmptyTag(newNode) ||
|
|
(!newNode.parentNode && semantic.parent)) {
|
|
Debugger.getInstance().output('Walktree Case 1.1');
|
|
newNode = EnrichAttr.addMrow();
|
|
if (info === lcaType.PRUNED) {
|
|
Debugger.getInstance().output('Walktree Case 1.1.0');
|
|
newNode = introduceLayerAboveLca(newNode, lca.node, children);
|
|
}
|
|
else if (children[0]) {
|
|
Debugger.getInstance().output('Walktree Case 1.1.1');
|
|
const node = attachedElement(children);
|
|
if (node) {
|
|
const oldChildren = childrenSubset(parentNode(node), children);
|
|
DomUtil.replaceNode(node, newNode);
|
|
oldChildren.forEach(function (x) {
|
|
newNode.appendChild(x);
|
|
});
|
|
}
|
|
else {
|
|
moveSemanticAttributes(newNode, children[0]);
|
|
newNode = children[0];
|
|
}
|
|
}
|
|
}
|
|
if (!semantic.mathmlTree) {
|
|
semantic.mathmlTree = newNode;
|
|
}
|
|
return newNode;
|
|
}
|
|
function introduceLayerAboveLca(mrow, lca, children) {
|
|
let innerNode = descendNode(lca);
|
|
if (SemanticUtil.hasMathTag(innerNode)) {
|
|
Debugger.getInstance().output('Walktree Case 1.1.0.0');
|
|
moveSemanticAttributes(innerNode, mrow);
|
|
DomUtil.toArray(innerNode.childNodes).forEach(function (x) {
|
|
mrow.appendChild(x);
|
|
});
|
|
const auxNode = mrow;
|
|
mrow = innerNode;
|
|
innerNode = auxNode;
|
|
}
|
|
const index = children.indexOf(lca);
|
|
children[index] = innerNode;
|
|
DomUtil.replaceNode(innerNode, mrow);
|
|
mrow.appendChild(innerNode);
|
|
children.forEach(function (x) {
|
|
mrow.appendChild(x);
|
|
});
|
|
return mrow;
|
|
}
|
|
function moveSemanticAttributes(oldNode, newNode) {
|
|
for (const attr of EnrichAttr.EnrichAttributes) {
|
|
if (oldNode.hasAttribute(attr)) {
|
|
newNode.setAttribute(attr, oldNode.getAttribute(attr));
|
|
oldNode.removeAttribute(attr);
|
|
}
|
|
}
|
|
}
|
|
function childrenSubset(node, newChildren) {
|
|
const oldChildren = DomUtil.toArray(node.childNodes);
|
|
let leftIndex = +Infinity;
|
|
let rightIndex = -Infinity;
|
|
newChildren.forEach(function (child) {
|
|
const index = oldChildren.indexOf(child);
|
|
if (index !== -1) {
|
|
leftIndex = Math.min(leftIndex, index);
|
|
rightIndex = Math.max(rightIndex, index);
|
|
}
|
|
});
|
|
return oldChildren.slice(leftIndex, rightIndex + 1);
|
|
}
|
|
function collateChildNodes(node, children, semantic) {
|
|
const oldChildren = [];
|
|
let newChildren = DomUtil.toArray(node.childNodes);
|
|
let notFirst = false;
|
|
while (newChildren.length) {
|
|
const child = newChildren.shift();
|
|
if (child.hasAttribute(EnrichAttr.Attribute.TYPE)) {
|
|
oldChildren.push(child);
|
|
continue;
|
|
}
|
|
const collect = collectChildNodes(child, children);
|
|
if (collect.length === 0) {
|
|
continue;
|
|
}
|
|
if (collect.length === 1) {
|
|
oldChildren.push(child);
|
|
continue;
|
|
}
|
|
if (notFirst) {
|
|
child.setAttribute('AuxiliaryImplicit', true);
|
|
}
|
|
else {
|
|
notFirst = true;
|
|
}
|
|
newChildren = collect.concat(newChildren);
|
|
}
|
|
const rear = [];
|
|
const semChildren = semantic.childNodes.map(function (x) {
|
|
return x.mathmlTree;
|
|
});
|
|
while (semChildren.length) {
|
|
const schild = semChildren.pop();
|
|
if (!schild) {
|
|
continue;
|
|
}
|
|
if (oldChildren.indexOf(schild) !== -1) {
|
|
break;
|
|
}
|
|
if (children.indexOf(schild) !== -1) {
|
|
rear.unshift(schild);
|
|
}
|
|
}
|
|
return oldChildren.concat(rear);
|
|
}
|
|
function collectChildNodes(node, children) {
|
|
const collect = [];
|
|
let newChildren = DomUtil.toArray(node.childNodes);
|
|
while (newChildren.length) {
|
|
const child = newChildren.shift();
|
|
if (child.nodeType !== DomUtil.NodeType.ELEMENT_NODE) {
|
|
continue;
|
|
}
|
|
if (child.hasAttribute(EnrichAttr.Attribute.TYPE) ||
|
|
children.indexOf(child) !== -1) {
|
|
collect.push(child);
|
|
continue;
|
|
}
|
|
newChildren = DomUtil.toArray(child.childNodes).concat(newChildren);
|
|
}
|
|
return collect;
|
|
}
|
|
function mergeChildren(node, newChildren, semantic) {
|
|
if (!newChildren.length)
|
|
return;
|
|
if (newChildren.length === 1 && node === newChildren[0])
|
|
return;
|
|
const oldChildren = semantic.role === SemanticRole.IMPLICIT &&
|
|
SemanticHeuristics.flags.combine_juxtaposition
|
|
? collateChildNodes(node, newChildren, semantic)
|
|
: DomUtil.toArray(node.childNodes);
|
|
if (!oldChildren.length) {
|
|
newChildren.forEach(function (x) {
|
|
node.appendChild(x);
|
|
});
|
|
return;
|
|
}
|
|
let oldCounter = 0;
|
|
while (newChildren.length) {
|
|
const newChild = newChildren[0];
|
|
if (oldChildren[oldCounter] === newChild ||
|
|
functionApplication(oldChildren[oldCounter], newChild)) {
|
|
newChildren.shift();
|
|
oldCounter++;
|
|
continue;
|
|
}
|
|
if (oldChildren[oldCounter] &&
|
|
newChildren.indexOf(oldChildren[oldCounter]) === -1) {
|
|
oldCounter++;
|
|
continue;
|
|
}
|
|
if (isDescendant(newChild, node)) {
|
|
newChildren.shift();
|
|
continue;
|
|
}
|
|
const oldChild = oldChildren[oldCounter];
|
|
if (!oldChild) {
|
|
if (newChild.parentNode) {
|
|
node = parentNode(newChild);
|
|
newChildren.shift();
|
|
continue;
|
|
}
|
|
const nextChild = newChildren[1];
|
|
if (nextChild && nextChild.parentNode) {
|
|
node = parentNode(nextChild);
|
|
node.insertBefore(newChild, nextChild);
|
|
newChildren.shift();
|
|
newChildren.shift();
|
|
continue;
|
|
}
|
|
node.insertBefore(newChild, null);
|
|
newChildren.shift();
|
|
continue;
|
|
}
|
|
insertNewChild(node, oldChild, newChild);
|
|
newChildren.shift();
|
|
}
|
|
}
|
|
function insertNewChild(node, oldChild, newChild) {
|
|
let parent = oldChild;
|
|
let next = parentNode(parent);
|
|
while (next &&
|
|
next.firstChild === parent &&
|
|
!parent.hasAttribute('AuxiliaryImplicit') &&
|
|
next !== node) {
|
|
parent = next;
|
|
next = parentNode(parent);
|
|
}
|
|
if (next) {
|
|
next.insertBefore(newChild, parent);
|
|
parent.removeAttribute('AuxiliaryImplicit');
|
|
}
|
|
}
|
|
function isDescendant(child, node) {
|
|
if (!child) {
|
|
return false;
|
|
}
|
|
do {
|
|
child = parentNode(child);
|
|
if (child === node) {
|
|
return true;
|
|
}
|
|
} while (child);
|
|
return false;
|
|
}
|
|
function functionApplication(oldNode, newNode) {
|
|
const appl = NamedSymbol.functionApplication;
|
|
if (oldNode &&
|
|
newNode &&
|
|
oldNode.textContent &&
|
|
newNode.textContent &&
|
|
oldNode.textContent === appl &&
|
|
newNode.textContent === appl &&
|
|
newNode.getAttribute(EnrichAttr.Attribute.ADDED) === 'true') {
|
|
for (let i = 0, attr; (attr = oldNode.attributes[i]); i++) {
|
|
if (!newNode.hasAttribute(attr.nodeName)) {
|
|
newNode.setAttribute(attr.nodeName, attr.nodeValue);
|
|
}
|
|
}
|
|
DomUtil.replaceNode(oldNode, newNode);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
var lcaType;
|
|
(function (lcaType) {
|
|
lcaType["VALID"] = "valid";
|
|
lcaType["INVALID"] = "invalid";
|
|
lcaType["PRUNED"] = "pruned";
|
|
})(lcaType || (lcaType = {}));
|
|
function mathmlLca(children) {
|
|
const leftMost = attachedElement(children);
|
|
if (!leftMost) {
|
|
return { type: lcaType.INVALID, node: null };
|
|
}
|
|
const rightMost = attachedElement(children.slice().reverse());
|
|
if (leftMost === rightMost) {
|
|
return { type: lcaType.VALID, node: leftMost };
|
|
}
|
|
const leftPath = pathToRoot(leftMost);
|
|
const newLeftPath = prunePath(leftPath, children);
|
|
const rightPath = pathToRoot(rightMost, function (x) {
|
|
return newLeftPath.indexOf(x) !== -1;
|
|
});
|
|
const lca = rightPath[0];
|
|
const lIndex = newLeftPath.indexOf(lca);
|
|
if (lIndex === -1) {
|
|
return { type: lcaType.INVALID, node: null };
|
|
}
|
|
return {
|
|
type: newLeftPath.length !== leftPath.length
|
|
? lcaType.PRUNED
|
|
: validLca(newLeftPath[lIndex + 1], rightPath[1])
|
|
? lcaType.VALID
|
|
: lcaType.INVALID,
|
|
node: lca
|
|
};
|
|
}
|
|
function prunePath(path, children) {
|
|
let i = 0;
|
|
while (path[i] && children.indexOf(path[i]) === -1) {
|
|
i++;
|
|
}
|
|
return path.slice(0, i + 1);
|
|
}
|
|
function attachedElement(nodes) {
|
|
let count = 0;
|
|
let attached = null;
|
|
while (!attached && count < nodes.length) {
|
|
if (nodes[count].parentNode) {
|
|
attached = nodes[count];
|
|
}
|
|
count++;
|
|
}
|
|
return attached;
|
|
}
|
|
function pathToRoot(node, opt_test) {
|
|
const test = opt_test || ((_x) => false);
|
|
const path = [node];
|
|
while (!test(node) && !SemanticUtil.hasMathTag(node) && node.parentNode) {
|
|
node = parentNode(node);
|
|
path.unshift(node);
|
|
}
|
|
return path;
|
|
}
|
|
function validLca(left, right) {
|
|
return !!(left && right && !left.previousSibling && !right.nextSibling);
|
|
}
|
|
export function ascendNewNode(newNode) {
|
|
while (!SemanticUtil.hasMathTag(newNode) && unitChild(newNode)) {
|
|
newNode = parentNode(newNode);
|
|
}
|
|
return newNode;
|
|
}
|
|
function descendNode(node) {
|
|
const children = DomUtil.toArray(node.childNodes);
|
|
if (!children) {
|
|
return node;
|
|
}
|
|
const remainder = children.filter(function (child) {
|
|
return (child.nodeType === DomUtil.NodeType.ELEMENT_NODE &&
|
|
!SemanticUtil.hasIgnoreTag(child));
|
|
});
|
|
if (remainder.length === 1 &&
|
|
SemanticUtil.hasEmptyTag(remainder[0]) &&
|
|
!remainder[0].hasAttribute(EnrichAttr.Attribute.TYPE)) {
|
|
return descendNode(remainder[0]);
|
|
}
|
|
return node;
|
|
}
|
|
function unitChild(node) {
|
|
const parent = parentNode(node);
|
|
if (!parent || !SemanticUtil.hasEmptyTag(parent)) {
|
|
return false;
|
|
}
|
|
return DomUtil.toArray(parent.childNodes).every(function (child) {
|
|
return child === node || isIgnorable(child);
|
|
});
|
|
}
|
|
function isIgnorable(node) {
|
|
if (node.nodeType !== DomUtil.NodeType.ELEMENT_NODE) {
|
|
return true;
|
|
}
|
|
if (!node || SemanticUtil.hasIgnoreTag(node)) {
|
|
return true;
|
|
}
|
|
const children = DomUtil.toArray(node.childNodes);
|
|
if ((!SemanticUtil.hasEmptyTag(node) && children.length) ||
|
|
SemanticUtil.hasDisplayTag(node) ||
|
|
node.hasAttribute(EnrichAttr.Attribute.TYPE) ||
|
|
SemanticUtil.isOrphanedGlyph(node)) {
|
|
return false;
|
|
}
|
|
return DomUtil.toArray(node.childNodes).every(isIgnorable);
|
|
}
|
|
function parentNode(element) {
|
|
return element.parentNode;
|
|
}
|
|
export function addCollapsedAttribute(node, collapsed) {
|
|
const skeleton = new SemanticSkeleton(collapsed);
|
|
node.setAttribute(EnrichAttr.Attribute.COLLAPSED, skeleton.toString());
|
|
}
|
|
export function cloneContentNode(content) {
|
|
if (content.mathml.length) {
|
|
return walkTree(content);
|
|
}
|
|
const clone = SETTINGS.implicit
|
|
? createInvisibleOperator(content)
|
|
: EnrichAttr.addMrow();
|
|
content.mathml = [clone];
|
|
return clone;
|
|
}
|
|
export function rewriteMfenced(mml) {
|
|
if (DomUtil.tagName(mml) !== MMLTAGS.MFENCED) {
|
|
return mml;
|
|
}
|
|
const newNode = EnrichAttr.addMrow();
|
|
for (let i = 0, attr; (attr = mml.attributes[i]); i++) {
|
|
if (['open', 'close', 'separators'].indexOf(attr.name) === -1) {
|
|
newNode.setAttribute(attr.name, attr.value);
|
|
}
|
|
}
|
|
DomUtil.toArray(mml.childNodes).forEach(function (x) {
|
|
newNode.appendChild(x);
|
|
});
|
|
DomUtil.replaceNode(mml, newNode);
|
|
return newNode;
|
|
}
|
|
function createInvisibleOperator(operator) {
|
|
const moNode = DomUtil.createElement('mo');
|
|
const text = DomUtil.createTextNode(operator.textContent);
|
|
moNode.appendChild(text);
|
|
EnrichAttr.setAttributes(moNode, operator);
|
|
moNode.setAttribute(EnrichAttr.Attribute.ADDED, 'true');
|
|
return moNode;
|
|
}
|
|
export function setOperatorAttribute(semantic, content) {
|
|
const operator = semantic.type + (semantic.textContent ? ',' + semantic.textContent : '');
|
|
content.forEach(function (c) {
|
|
getInnerNode(c).setAttribute(EnrichAttr.Attribute.OPERATOR, operator);
|
|
});
|
|
}
|
|
export function getInnerNode(node) {
|
|
const children = DomUtil.toArray(node.childNodes);
|
|
if (!children) {
|
|
return node;
|
|
}
|
|
const remainder = children.filter(function (child) {
|
|
return !isIgnorable(child);
|
|
});
|
|
const result = [];
|
|
for (let i = 0, remain; (remain = remainder[i]); i++) {
|
|
if (SemanticUtil.hasEmptyTag(remain) &&
|
|
remain.getAttribute(EnrichAttr.Attribute.TYPE) !==
|
|
SemanticType.PUNCTUATION) {
|
|
const nextInner = getInnerNode(remain);
|
|
if (nextInner && nextInner !== remain) {
|
|
result.push(nextInner);
|
|
}
|
|
}
|
|
else {
|
|
result.push(remain);
|
|
}
|
|
}
|
|
if (result.length === 1) {
|
|
return result[0];
|
|
}
|
|
return node;
|
|
}
|
|
function formattedOutput(element, name, wiki = false) {
|
|
const output = EnrichAttr.removeAttributePrefix(DomUtil.formatXml(element.toString()));
|
|
return wiki ? name + ':\n```html\n' + output + '\n```\n' : output;
|
|
}
|
|
export function collapsePunctuated(semantic, opt_children) {
|
|
const optional = !!opt_children;
|
|
const children = opt_children || [];
|
|
const parent = semantic.parent;
|
|
const contentIds = semantic.contentNodes.map(function (x) {
|
|
return x.id;
|
|
});
|
|
contentIds.unshift('c');
|
|
const childIds = [semantic.id, contentIds];
|
|
for (let i = 0, child; (child = semantic.childNodes[i]); i++) {
|
|
const mmlChild = walkTree(child);
|
|
children.push(mmlChild);
|
|
const innerNode = getInnerNode(mmlChild);
|
|
if (parent && !optional) {
|
|
innerNode.setAttribute(EnrichAttr.Attribute.PARENT, parent.id.toString());
|
|
}
|
|
childIds.push(child.id);
|
|
}
|
|
return childIds;
|
|
}
|