Files
2025-07-16 16:30:56 +00:00

544 lines
18 KiB
JavaScript

import { Debugger } from '../common/debugger.js';
import * as DomUtil from '../common/dom_util.js';
import { Engine } from '../common/engine.js';
import { NamedSymbol } from '../semantic_tree/semantic_attr.js';
import { SemanticRole, SemanticType } from '../semantic_tree/semantic_meaning.js';
import { SemanticHeuristics } from '../semantic_tree/semantic_heuristic_factory.js';
import { SemanticSkeleton } from '../semantic_tree/semantic_skeleton.js';
import * as SemanticUtil from '../semantic_tree/semantic_util.js';
import { MMLTAGS } from '../semantic_tree/semantic_util.js';
import * as EnrichAttr from './enrich_attr.js';
import { getCase } from './enrich_case.js';
const SETTINGS = {
collapsed: true,
implicit: true,
wiki: true
};
const IDS = new Map();
export function enrich(mml, semantic) {
IDS.clear();
const oldMml = DomUtil.cloneNode(mml);
walkTree(semantic.root);
if (Engine.getInstance().structure) {
mml.setAttribute(EnrichAttr.Attribute.STRUCTURE, SemanticSkeleton.fromStructure(mml, semantic).toString());
}
Debugger.getInstance().generateOutput(() => [
formattedOutput(oldMml, 'Original MathML', SETTINGS.wiki),
formattedOutput(semantic, 'Semantic Tree', SETTINGS.wiki),
formattedOutput(mml, 'Semantically enriched MathML', SETTINGS.wiki)
]);
return mml;
}
export function walkTree(semantic) {
Debugger.getInstance().output('WALKING START: ' + semantic.toString());
const specialCase = getCase(semantic);
let newNode;
if (specialCase) {
newNode = specialCase.getMathml();
Debugger.getInstance().output('WALKING END: ' + semantic.toString());
return ascendNewNode(newNode);
}
if (semantic.mathml.length === 1) {
Debugger.getInstance().output('Walktree Case 0');
if (!semantic.childNodes.length) {
Debugger.getInstance().output('Walktree Case 0.1');
newNode = semantic.mathml[0];
EnrichAttr.setAttributes(newNode, semantic);
Debugger.getInstance().output('WALKING END: ' + semantic.toString());
return ascendNewNode(newNode);
}
const fchild = semantic.childNodes[0];
if (semantic.childNodes.length === 1 &&
fchild.type === SemanticType.EMPTY) {
Debugger.getInstance().output('Walktree Case 0.2');
newNode = semantic.mathml[0];
EnrichAttr.setAttributes(newNode, semantic);
newNode.appendChild(walkTree(fchild));
Debugger.getInstance().output('WALKING END: ' + semantic.toString());
return ascendNewNode(newNode);
}
semantic.childNodes.forEach((child) => {
if (!child.mathml.length) {
child.mathml = [createInvisibleOperator(child)];
}
});
}
const newContent = semantic.contentNodes.map(cloneContentNode);
setOperatorAttribute(semantic, newContent);
const newChildren = semantic.childNodes.map(walkTree);
const childrenList = SemanticSkeleton.combineContentChildren(semantic.type, semantic.role, newContent, newChildren);
newNode = semantic.mathmlTree;
if (newNode === null) {
Debugger.getInstance().output('Walktree Case 1');
newNode = introduceNewLayer(childrenList, semantic);
}
else {
const attached = attachedElement(childrenList);
Debugger.getInstance().output('Walktree Case 2');
if (attached) {
Debugger.getInstance().output('Walktree Case 2.1');
newNode = parentNode(attached);
}
else {
Debugger.getInstance().output('Walktree Case 2.2');
newNode = getInnerNode(newNode);
}
}
newNode = rewriteMfenced(newNode);
mergeChildren(newNode, childrenList, semantic);
if (!IDS.has(semantic.id)) {
IDS.set(semantic.id, true);
EnrichAttr.setAttributes(newNode, semantic);
}
Debugger.getInstance().output('WALKING END: ' + semantic.toString());
return ascendNewNode(newNode);
}
export function introduceNewLayer(children, semantic) {
const lca = mathmlLca(children);
let newNode = lca.node;
const info = lca.type;
if (info !== lcaType.VALID ||
!SemanticUtil.hasEmptyTag(newNode) ||
(!newNode.parentNode && semantic.parent)) {
Debugger.getInstance().output('Walktree Case 1.1');
newNode = EnrichAttr.addMrow();
if (info === lcaType.PRUNED) {
Debugger.getInstance().output('Walktree Case 1.1.0');
newNode = introduceLayerAboveLca(newNode, lca.node, children);
}
else if (children[0]) {
Debugger.getInstance().output('Walktree Case 1.1.1');
const node = attachedElement(children);
if (node) {
const oldChildren = childrenSubset(parentNode(node), children);
DomUtil.replaceNode(node, newNode);
oldChildren.forEach(function (x) {
newNode.appendChild(x);
});
}
else {
moveSemanticAttributes(newNode, children[0]);
newNode = children[0];
}
}
}
if (!semantic.mathmlTree) {
semantic.mathmlTree = newNode;
}
return newNode;
}
function introduceLayerAboveLca(mrow, lca, children) {
let innerNode = descendNode(lca);
if (SemanticUtil.hasMathTag(innerNode)) {
Debugger.getInstance().output('Walktree Case 1.1.0.0');
moveSemanticAttributes(innerNode, mrow);
DomUtil.toArray(innerNode.childNodes).forEach(function (x) {
mrow.appendChild(x);
});
const auxNode = mrow;
mrow = innerNode;
innerNode = auxNode;
}
const index = children.indexOf(lca);
children[index] = innerNode;
DomUtil.replaceNode(innerNode, mrow);
mrow.appendChild(innerNode);
children.forEach(function (x) {
mrow.appendChild(x);
});
return mrow;
}
function moveSemanticAttributes(oldNode, newNode) {
for (const attr of EnrichAttr.EnrichAttributes) {
if (oldNode.hasAttribute(attr)) {
newNode.setAttribute(attr, oldNode.getAttribute(attr));
oldNode.removeAttribute(attr);
}
}
}
function childrenSubset(node, newChildren) {
const oldChildren = DomUtil.toArray(node.childNodes);
let leftIndex = +Infinity;
let rightIndex = -Infinity;
newChildren.forEach(function (child) {
const index = oldChildren.indexOf(child);
if (index !== -1) {
leftIndex = Math.min(leftIndex, index);
rightIndex = Math.max(rightIndex, index);
}
});
return oldChildren.slice(leftIndex, rightIndex + 1);
}
function collateChildNodes(node, children, semantic) {
const oldChildren = [];
let newChildren = DomUtil.toArray(node.childNodes);
let notFirst = false;
while (newChildren.length) {
const child = newChildren.shift();
if (child.hasAttribute(EnrichAttr.Attribute.TYPE)) {
oldChildren.push(child);
continue;
}
const collect = collectChildNodes(child, children);
if (collect.length === 0) {
continue;
}
if (collect.length === 1) {
oldChildren.push(child);
continue;
}
if (notFirst) {
child.setAttribute('AuxiliaryImplicit', true);
}
else {
notFirst = true;
}
newChildren = collect.concat(newChildren);
}
const rear = [];
const semChildren = semantic.childNodes.map(function (x) {
return x.mathmlTree;
});
while (semChildren.length) {
const schild = semChildren.pop();
if (!schild) {
continue;
}
if (oldChildren.indexOf(schild) !== -1) {
break;
}
if (children.indexOf(schild) !== -1) {
rear.unshift(schild);
}
}
return oldChildren.concat(rear);
}
function collectChildNodes(node, children) {
const collect = [];
let newChildren = DomUtil.toArray(node.childNodes);
while (newChildren.length) {
const child = newChildren.shift();
if (child.nodeType !== DomUtil.NodeType.ELEMENT_NODE) {
continue;
}
if (child.hasAttribute(EnrichAttr.Attribute.TYPE) ||
children.indexOf(child) !== -1) {
collect.push(child);
continue;
}
newChildren = DomUtil.toArray(child.childNodes).concat(newChildren);
}
return collect;
}
function mergeChildren(node, newChildren, semantic) {
if (!newChildren.length)
return;
if (newChildren.length === 1 && node === newChildren[0])
return;
const oldChildren = semantic.role === SemanticRole.IMPLICIT &&
SemanticHeuristics.flags.combine_juxtaposition
? collateChildNodes(node, newChildren, semantic)
: DomUtil.toArray(node.childNodes);
if (!oldChildren.length) {
newChildren.forEach(function (x) {
node.appendChild(x);
});
return;
}
let oldCounter = 0;
while (newChildren.length) {
const newChild = newChildren[0];
if (oldChildren[oldCounter] === newChild ||
functionApplication(oldChildren[oldCounter], newChild)) {
newChildren.shift();
oldCounter++;
continue;
}
if (oldChildren[oldCounter] &&
newChildren.indexOf(oldChildren[oldCounter]) === -1) {
oldCounter++;
continue;
}
if (isDescendant(newChild, node)) {
newChildren.shift();
continue;
}
const oldChild = oldChildren[oldCounter];
if (!oldChild) {
if (newChild.parentNode) {
node = parentNode(newChild);
newChildren.shift();
continue;
}
const nextChild = newChildren[1];
if (nextChild && nextChild.parentNode) {
node = parentNode(nextChild);
node.insertBefore(newChild, nextChild);
newChildren.shift();
newChildren.shift();
continue;
}
node.insertBefore(newChild, null);
newChildren.shift();
continue;
}
insertNewChild(node, oldChild, newChild);
newChildren.shift();
}
}
function insertNewChild(node, oldChild, newChild) {
let parent = oldChild;
let next = parentNode(parent);
while (next &&
next.firstChild === parent &&
!parent.hasAttribute('AuxiliaryImplicit') &&
next !== node) {
parent = next;
next = parentNode(parent);
}
if (next) {
next.insertBefore(newChild, parent);
parent.removeAttribute('AuxiliaryImplicit');
}
}
function isDescendant(child, node) {
if (!child) {
return false;
}
do {
child = parentNode(child);
if (child === node) {
return true;
}
} while (child);
return false;
}
function functionApplication(oldNode, newNode) {
const appl = NamedSymbol.functionApplication;
if (oldNode &&
newNode &&
oldNode.textContent &&
newNode.textContent &&
oldNode.textContent === appl &&
newNode.textContent === appl &&
newNode.getAttribute(EnrichAttr.Attribute.ADDED) === 'true') {
for (let i = 0, attr; (attr = oldNode.attributes[i]); i++) {
if (!newNode.hasAttribute(attr.nodeName)) {
newNode.setAttribute(attr.nodeName, attr.nodeValue);
}
}
DomUtil.replaceNode(oldNode, newNode);
return true;
}
return false;
}
var lcaType;
(function (lcaType) {
lcaType["VALID"] = "valid";
lcaType["INVALID"] = "invalid";
lcaType["PRUNED"] = "pruned";
})(lcaType || (lcaType = {}));
function mathmlLca(children) {
const leftMost = attachedElement(children);
if (!leftMost) {
return { type: lcaType.INVALID, node: null };
}
const rightMost = attachedElement(children.slice().reverse());
if (leftMost === rightMost) {
return { type: lcaType.VALID, node: leftMost };
}
const leftPath = pathToRoot(leftMost);
const newLeftPath = prunePath(leftPath, children);
const rightPath = pathToRoot(rightMost, function (x) {
return newLeftPath.indexOf(x) !== -1;
});
const lca = rightPath[0];
const lIndex = newLeftPath.indexOf(lca);
if (lIndex === -1) {
return { type: lcaType.INVALID, node: null };
}
return {
type: newLeftPath.length !== leftPath.length
? lcaType.PRUNED
: validLca(newLeftPath[lIndex + 1], rightPath[1])
? lcaType.VALID
: lcaType.INVALID,
node: lca
};
}
function prunePath(path, children) {
let i = 0;
while (path[i] && children.indexOf(path[i]) === -1) {
i++;
}
return path.slice(0, i + 1);
}
function attachedElement(nodes) {
let count = 0;
let attached = null;
while (!attached && count < nodes.length) {
if (nodes[count].parentNode) {
attached = nodes[count];
}
count++;
}
return attached;
}
function pathToRoot(node, opt_test) {
const test = opt_test || ((_x) => false);
const path = [node];
while (!test(node) && !SemanticUtil.hasMathTag(node) && node.parentNode) {
node = parentNode(node);
path.unshift(node);
}
return path;
}
function validLca(left, right) {
return !!(left && right && !left.previousSibling && !right.nextSibling);
}
export function ascendNewNode(newNode) {
while (!SemanticUtil.hasMathTag(newNode) && unitChild(newNode)) {
newNode = parentNode(newNode);
}
return newNode;
}
function descendNode(node) {
const children = DomUtil.toArray(node.childNodes);
if (!children) {
return node;
}
const remainder = children.filter(function (child) {
return (child.nodeType === DomUtil.NodeType.ELEMENT_NODE &&
!SemanticUtil.hasIgnoreTag(child));
});
if (remainder.length === 1 &&
SemanticUtil.hasEmptyTag(remainder[0]) &&
!remainder[0].hasAttribute(EnrichAttr.Attribute.TYPE)) {
return descendNode(remainder[0]);
}
return node;
}
function unitChild(node) {
const parent = parentNode(node);
if (!parent || !SemanticUtil.hasEmptyTag(parent)) {
return false;
}
return DomUtil.toArray(parent.childNodes).every(function (child) {
return child === node || isIgnorable(child);
});
}
function isIgnorable(node) {
if (node.nodeType !== DomUtil.NodeType.ELEMENT_NODE) {
return true;
}
if (!node || SemanticUtil.hasIgnoreTag(node)) {
return true;
}
const children = DomUtil.toArray(node.childNodes);
if ((!SemanticUtil.hasEmptyTag(node) && children.length) ||
SemanticUtil.hasDisplayTag(node) ||
node.hasAttribute(EnrichAttr.Attribute.TYPE) ||
SemanticUtil.isOrphanedGlyph(node)) {
return false;
}
return DomUtil.toArray(node.childNodes).every(isIgnorable);
}
function parentNode(element) {
return element.parentNode;
}
export function addCollapsedAttribute(node, collapsed) {
const skeleton = new SemanticSkeleton(collapsed);
node.setAttribute(EnrichAttr.Attribute.COLLAPSED, skeleton.toString());
}
export function cloneContentNode(content) {
if (content.mathml.length) {
return walkTree(content);
}
const clone = SETTINGS.implicit
? createInvisibleOperator(content)
: EnrichAttr.addMrow();
content.mathml = [clone];
return clone;
}
export function rewriteMfenced(mml) {
if (DomUtil.tagName(mml) !== MMLTAGS.MFENCED) {
return mml;
}
const newNode = EnrichAttr.addMrow();
for (let i = 0, attr; (attr = mml.attributes[i]); i++) {
if (['open', 'close', 'separators'].indexOf(attr.name) === -1) {
newNode.setAttribute(attr.name, attr.value);
}
}
DomUtil.toArray(mml.childNodes).forEach(function (x) {
newNode.appendChild(x);
});
DomUtil.replaceNode(mml, newNode);
return newNode;
}
function createInvisibleOperator(operator) {
const moNode = DomUtil.createElement('mo');
const text = DomUtil.createTextNode(operator.textContent);
moNode.appendChild(text);
EnrichAttr.setAttributes(moNode, operator);
moNode.setAttribute(EnrichAttr.Attribute.ADDED, 'true');
return moNode;
}
export function setOperatorAttribute(semantic, content) {
const operator = semantic.type + (semantic.textContent ? ',' + semantic.textContent : '');
content.forEach(function (c) {
getInnerNode(c).setAttribute(EnrichAttr.Attribute.OPERATOR, operator);
});
}
export function getInnerNode(node) {
const children = DomUtil.toArray(node.childNodes);
if (!children) {
return node;
}
const remainder = children.filter(function (child) {
return !isIgnorable(child);
});
const result = [];
for (let i = 0, remain; (remain = remainder[i]); i++) {
if (SemanticUtil.hasEmptyTag(remain) &&
remain.getAttribute(EnrichAttr.Attribute.TYPE) !==
SemanticType.PUNCTUATION) {
const nextInner = getInnerNode(remain);
if (nextInner && nextInner !== remain) {
result.push(nextInner);
}
}
else {
result.push(remain);
}
}
if (result.length === 1) {
return result[0];
}
return node;
}
function formattedOutput(element, name, wiki = false) {
const output = EnrichAttr.removeAttributePrefix(DomUtil.formatXml(element.toString()));
return wiki ? name + ':\n```html\n' + output + '\n```\n' : output;
}
export function collapsePunctuated(semantic, opt_children) {
const optional = !!opt_children;
const children = opt_children || [];
const parent = semantic.parent;
const contentIds = semantic.contentNodes.map(function (x) {
return x.id;
});
contentIds.unshift('c');
const childIds = [semantic.id, contentIds];
for (let i = 0, child; (child = semantic.childNodes[i]); i++) {
const mmlChild = walkTree(child);
children.push(mmlChild);
const innerNode = getInnerNode(mmlChild);
if (parent && !optional) {
innerNode.setAttribute(EnrichAttr.Attribute.PARENT, parent.id.toString());
}
childIds.push(child.id);
}
return childIds;
}