534 lines
22 KiB
JavaScript
534 lines
22 KiB
JavaScript
'use strict';
|
|
|
|
/**
|
|
* Detects relevant unicode support for regular expressions in the runtime.
|
|
* Should the runtime not accepts the flag `u` or unicode ranges,
|
|
* character classes without unicode handling will be used.
|
|
*
|
|
* @param {typeof RegExp} [RegExpImpl=RegExp]
|
|
* For testing: the RegExp class.
|
|
* @returns {boolean}
|
|
* @see https://node.green/#ES2015-syntax-RegExp--y--and--u--flags
|
|
*/
|
|
function detectUnicodeSupport(RegExpImpl) {
|
|
try {
|
|
if (typeof RegExpImpl !== 'function') {
|
|
RegExpImpl = RegExp;
|
|
}
|
|
// eslint-disable-next-line es5/no-unicode-regex,es5/no-unicode-code-point-escape
|
|
var match = new RegExpImpl('\u{1d306}', 'u').exec('𝌆');
|
|
return !!match && match[0].length === 2;
|
|
} catch (error) {}
|
|
return false;
|
|
}
|
|
var UNICODE_SUPPORT = detectUnicodeSupport();
|
|
|
|
/**
|
|
* Removes `[`, `]` and any trailing quantifiers from the source of a RegExp.
|
|
*
|
|
* @param {RegExp} regexp
|
|
*/
|
|
function chars(regexp) {
|
|
if (regexp.source[0] !== '[') {
|
|
throw new Error(regexp + ' can not be used with chars');
|
|
}
|
|
return regexp.source.slice(1, regexp.source.lastIndexOf(']'));
|
|
}
|
|
|
|
/**
|
|
* Creates a new character list regular expression,
|
|
* by removing `search` from the source of `regexp`.
|
|
*
|
|
* @param {RegExp} regexp
|
|
* @param {string} search
|
|
* The character(s) to remove.
|
|
* @returns {RegExp}
|
|
*/
|
|
function chars_without(regexp, search) {
|
|
if (regexp.source[0] !== '[') {
|
|
throw new Error('/' + regexp.source + '/ can not be used with chars_without');
|
|
}
|
|
if (!search || typeof search !== 'string') {
|
|
throw new Error(JSON.stringify(search) + ' is not a valid search');
|
|
}
|
|
if (regexp.source.indexOf(search) === -1) {
|
|
throw new Error('"' + search + '" is not is /' + regexp.source + '/');
|
|
}
|
|
if (search === '-' && regexp.source.indexOf(search) !== 1) {
|
|
throw new Error('"' + search + '" is not at the first postion of /' + regexp.source + '/');
|
|
}
|
|
return new RegExp(regexp.source.replace(search, ''), UNICODE_SUPPORT ? 'u' : '');
|
|
}
|
|
|
|
/**
|
|
* Combines and Regular expressions correctly by using `RegExp.source`.
|
|
*
|
|
* @param {...(RegExp | string)[]} args
|
|
* @returns {RegExp}
|
|
*/
|
|
function reg(args) {
|
|
var self = this;
|
|
return new RegExp(
|
|
Array.prototype.slice
|
|
.call(arguments)
|
|
.map(function (part) {
|
|
var isStr = typeof part === 'string';
|
|
if (isStr && self === undefined && part === '|') {
|
|
throw new Error('use regg instead of reg to wrap expressions with `|`!');
|
|
}
|
|
return isStr ? part : part.source;
|
|
})
|
|
.join(''),
|
|
UNICODE_SUPPORT ? 'mu' : 'm'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Like `reg` but wraps the expression in `(?:`,`)` to create a non tracking group.
|
|
*
|
|
* @param {...(RegExp | string)[]} args
|
|
* @returns {RegExp}
|
|
*/
|
|
function regg(args) {
|
|
if (arguments.length === 0) {
|
|
throw new Error('no parameters provided');
|
|
}
|
|
return reg.apply(regg, ['(?:'].concat(Array.prototype.slice.call(arguments), [')']));
|
|
}
|
|
|
|
// /**
|
|
// * Append ^ to the beginning of the expression.
|
|
// * @param {...(RegExp | string)[]} args
|
|
// * @returns {RegExp}
|
|
// */
|
|
// function reg_start(args) {
|
|
// if (arguments.length === 0) {
|
|
// throw new Error('no parameters provided');
|
|
// }
|
|
// return reg.apply(reg_start, ['^'].concat(Array.prototype.slice.call(arguments)));
|
|
// }
|
|
|
|
// https://www.w3.org/TR/xml/#document
|
|
// `[1] document ::= prolog element Misc*`
|
|
// https://www.w3.org/TR/xml11/#NT-document
|
|
// `[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )`
|
|
|
|
/**
|
|
* A character usually appearing in wrongly converted strings.
|
|
*
|
|
* @type {string}
|
|
* @see https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
|
|
* @see https://nodejs.dev/en/api/v18/buffer/#buffers-and-character-encodings
|
|
* @see https://www.unicode.org/faq/utf_bom.html#BOM
|
|
* @readonly
|
|
*/
|
|
var UNICODE_REPLACEMENT_CHARACTER = '\uFFFD';
|
|
// https://www.w3.org/TR/xml/#NT-Char
|
|
// any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
|
// `[2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
|
|
// https://www.w3.org/TR/xml11/#NT-Char
|
|
// `[2] Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
|
|
// https://www.w3.org/TR/xml11/#NT-RestrictedChar
|
|
// `[2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]`
|
|
// https://www.w3.org/TR/xml11/#charsets
|
|
var Char = /[-\x09\x0A\x0D\x20-\x2C\x2E-\uD7FF\uE000-\uFFFD]/; // without \u10000-\uEFFFF
|
|
if (UNICODE_SUPPORT) {
|
|
// eslint-disable-next-line es5/no-unicode-code-point-escape
|
|
Char = reg('[', chars(Char), '\\u{10000}-\\u{10FFFF}', ']');
|
|
}
|
|
|
|
var _SChar = /[\x20\x09\x0D\x0A]/;
|
|
var SChar_s = chars(_SChar);
|
|
// https://www.w3.org/TR/xml11/#NT-S
|
|
// `[3] S ::= (#x20 | #x9 | #xD | #xA)+`
|
|
var S = reg(_SChar, '+');
|
|
// optional whitespace described as `S?` in the grammar,
|
|
// simplified to 0-n occurrences of the character class
|
|
// instead of 0-1 occurrences of a non-capturing group around S
|
|
var S_OPT = reg(_SChar, '*');
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-NameStartChar
|
|
// `[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]`
|
|
var NameStartChar =
|
|
/[:_a-zA-Z\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; // without \u10000-\uEFFFF
|
|
if (UNICODE_SUPPORT) {
|
|
// eslint-disable-next-line es5/no-unicode-code-point-escape
|
|
NameStartChar = reg('[', chars(NameStartChar), '\\u{10000}-\\u{10FFFF}', ']');
|
|
}
|
|
var NameStartChar_s = chars(NameStartChar);
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-NameChar
|
|
// `[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]`
|
|
var NameChar = reg('[', NameStartChar_s, chars(/[-.0-9\xB7]/), chars(/[\u0300-\u036F\u203F-\u2040]/), ']');
|
|
// https://www.w3.org/TR/xml11/#NT-Name
|
|
// `[5] Name ::= NameStartChar (NameChar)*`
|
|
var Name = reg(NameStartChar, NameChar, '*');
|
|
/*
|
|
https://www.w3.org/TR/xml11/#NT-Names
|
|
`[6] Names ::= Name (#x20 Name)*`
|
|
*/
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-Nmtoken
|
|
// `[7] Nmtoken ::= (NameChar)+`
|
|
var Nmtoken = reg(NameChar, '+');
|
|
/*
|
|
https://www.w3.org/TR/xml11/#NT-Nmtokens
|
|
`[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*`
|
|
var Nmtokens = reg(Nmtoken, regg(/\x20/, Nmtoken), '*');
|
|
*/
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-EntityRef
|
|
// `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
|
|
var EntityRef = reg('&', Name, ';');
|
|
// https://www.w3.org/TR/xml11/#NT-CharRef
|
|
// `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
|
|
var CharRef = regg(/&#[0-9]+;|&#x[0-9a-fA-F]+;/);
|
|
|
|
/*
|
|
https://www.w3.org/TR/xml11/#NT-Reference
|
|
- `[67] Reference ::= EntityRef | CharRef`
|
|
- `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
|
|
- `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
|
|
*/
|
|
var Reference = regg(EntityRef, '|', CharRef);
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-PEReference
|
|
// `[69] PEReference ::= '%' Name ';'`
|
|
// [VC: Entity Declared] [WFC: No Recursion] [WFC: In DTD]
|
|
var PEReference = reg('%', Name, ';');
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-EntityValue
|
|
// `[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"`
|
|
var EntityValue = regg(
|
|
reg('"', regg(/[^%&"]/, '|', PEReference, '|', Reference), '*', '"'),
|
|
'|',
|
|
reg("'", regg(/[^%&']/, '|', PEReference, '|', Reference), '*', "'")
|
|
);
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-AttValue
|
|
// `[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"`
|
|
var AttValue = regg('"', regg(/[^<&"]/, '|', Reference), '*', '"', '|', "'", regg(/[^<&']/, '|', Reference), '*', "'");
|
|
|
|
// https://www.w3.org/TR/xml-names/#ns-decl
|
|
// https://www.w3.org/TR/xml-names/#ns-qualnames
|
|
// NameStartChar without ":"
|
|
var NCNameStartChar = chars_without(NameStartChar, ':');
|
|
// https://www.w3.org/TR/xml-names/#orphans
|
|
// `[5] NCNameChar ::= NameChar - ':'`
|
|
// An XML NameChar, minus the ":"
|
|
var NCNameChar = chars_without(NameChar, ':');
|
|
// https://www.w3.org/TR/xml-names/#NT-NCName
|
|
// `[4] NCName ::= Name - (Char* ':' Char*)`
|
|
// An XML Name, minus the ":"
|
|
var NCName = reg(NCNameStartChar, NCNameChar, '*');
|
|
|
|
/**
|
|
https://www.w3.org/TR/xml-names/#ns-qualnames
|
|
|
|
```
|
|
[7] QName ::= PrefixedName | UnprefixedName
|
|
=== (NCName ':' NCName) | NCName
|
|
=== NCName (':' NCName)?
|
|
[8] PrefixedName ::= Prefix ':' LocalPart
|
|
=== NCName ':' NCName
|
|
[9] UnprefixedName ::= LocalPart
|
|
=== NCName
|
|
[10] Prefix ::= NCName
|
|
[11] LocalPart ::= NCName
|
|
```
|
|
*/
|
|
var QName = reg(NCName, regg(':', NCName), '?');
|
|
var QName_exact = reg('^', QName, '$');
|
|
var QName_group = reg('(', QName, ')');
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-SystemLiteral
|
|
// `[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")`
|
|
var SystemLiteral = regg(/"[^"]*"|'[^']*'/);
|
|
|
|
/*
|
|
https://www.w3.org/TR/xml11/#NT-PI
|
|
```
|
|
[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
|
|
[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
|
```
|
|
target /xml/i is not excluded!
|
|
*/
|
|
var PI = reg(/^<\?/, '(', Name, ')', regg(S, '(', Char, '*?)'), '?', /\?>/);
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-PubidChar
|
|
// `[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]`
|
|
var PubidChar = /[\x20\x0D\x0Aa-zA-Z0-9-'()+,./:=?;!*#@$_%]/;
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-PubidLiteral
|
|
// `[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"`
|
|
var PubidLiteral = regg('"', PubidChar, '*"', '|', "'", chars_without(PubidChar, "'"), "*'");
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-CharData
|
|
// `[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)`
|
|
|
|
var COMMENT_START = '<!--';
|
|
var COMMENT_END = '-->';
|
|
// https://www.w3.org/TR/xml11/#NT-Comment
|
|
// `[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'`
|
|
var Comment = reg(COMMENT_START, regg(chars_without(Char, '-'), '|', reg('-', chars_without(Char, '-'))), '*', COMMENT_END);
|
|
|
|
var PCDATA = '#PCDATA';
|
|
// https://www.w3.org/TR/xml11/#NT-Mixed
|
|
// `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'`
|
|
// https://www.w3.org/TR/xml-names/#NT-Mixed
|
|
// `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'`
|
|
// [VC: Proper Group/PE Nesting] [VC: No Duplicate Types]
|
|
var Mixed = regg(
|
|
reg(/\(/, S_OPT, PCDATA, regg(S_OPT, /\|/, S_OPT, QName), '*', S_OPT, /\)\*/),
|
|
'|',
|
|
reg(/\(/, S_OPT, PCDATA, S_OPT, /\)/)
|
|
);
|
|
|
|
var _children_quantity = /[?*+]?/;
|
|
/*
|
|
`[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'` [VC: Proper Group/PE Nesting]
|
|
`[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'` [VC: Proper Group/PE Nesting]
|
|
simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
|
|
var _choice_or_seq = reg('[', NameChar_s, SChar_s, chars(_children_quantity), '()|,]*');
|
|
```
|
|
[48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
|
|
=== (Name | '(' S? cp ( S? '|' S? cp )+ S? ')' | '(' S? cp ( S? ',' S? cp )* S? ')') ('?' | '*' | '+')?
|
|
!== (Name | [_choice_or_seq]*) ('?' | '*' | '+')?
|
|
```
|
|
simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
|
|
var cp = reg(regg(Name, '|', _choice_or_seq), _children_quantity);
|
|
*/
|
|
/*
|
|
Inefficient regular expression (High)
|
|
This part of the regular expression may cause exponential backtracking on strings starting with '(|' and containing many repetitions of '|'.
|
|
https://github.com/xmldom/xmldom/security/code-scanning/91
|
|
var choice = regg(/\(/, S_OPT, cp, regg(S_OPT, /\|/, S_OPT, cp), '+', S_OPT, /\)/);
|
|
*/
|
|
/*
|
|
Inefficient regular expression (High)
|
|
This part of the regular expression may cause exponential backtracking on strings starting with '(,' and containing many repetitions of ','.
|
|
https://github.com/xmldom/xmldom/security/code-scanning/92
|
|
var seq = regg(/\(/, S_OPT, cp, regg(S_OPT, /,/, S_OPT, cp), '*', S_OPT, /\)/);
|
|
*/
|
|
|
|
// `[47] children ::= (choice | seq) ('?' | '*' | '+')?`
|
|
// simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
|
|
var children = reg(/\([^>]+\)/, _children_quantity /*regg(choice, '|', seq), _children_quantity*/);
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-contentspec
|
|
// `[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children`
|
|
var contentspec = regg('EMPTY', '|', 'ANY', '|', Mixed, '|', children);
|
|
|
|
var ELEMENTDECL_START = '<!ELEMENT';
|
|
// https://www.w3.org/TR/xml11/#NT-elementdecl
|
|
// `[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'`
|
|
// https://www.w3.org/TR/xml-names/#NT-elementdecl
|
|
// `[17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'`
|
|
// because of https://www.w3.org/TR/xml11/#NT-PEReference
|
|
// since xmldom is not supporting replacements of PEReferences in the DTD
|
|
// this also supports PEReference in the possible places
|
|
var elementdecl = reg(ELEMENTDECL_START, S, regg(QName, '|', PEReference), S, regg(contentspec, '|', PEReference), S_OPT, '>');
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-NotationType
|
|
// `[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'`
|
|
// [VC: Notation Attributes] [VC: One Notation Per Element Type] [VC: No Notation on Empty Element] [VC: No Duplicate Tokens]
|
|
var NotationType = reg('NOTATION', S, /\(/, S_OPT, Name, regg(S_OPT, /\|/, S_OPT, Name), '*', S_OPT, /\)/);
|
|
// https://www.w3.org/TR/xml11/#NT-Enumeration
|
|
// `[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'`
|
|
// [VC: Enumeration] [VC: No Duplicate Tokens]
|
|
var Enumeration = reg(/\(/, S_OPT, Nmtoken, regg(S_OPT, /\|/, S_OPT, Nmtoken), '*', S_OPT, /\)/);
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-EnumeratedType
|
|
// `[57] EnumeratedType ::= NotationType | Enumeration`
|
|
var EnumeratedType = regg(NotationType, '|', Enumeration);
|
|
|
|
/*
|
|
```
|
|
[55] StringType ::= 'CDATA'
|
|
[56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default]
|
|
| 'IDREF' [VC: IDREF]
|
|
| 'IDREFS' [VC: IDREF]
|
|
| 'ENTITY' [VC: Entity Name]
|
|
| 'ENTITIES' [VC: Entity Name]
|
|
| 'NMTOKEN' [VC: Name Token]
|
|
| 'NMTOKENS' [VC: Name Token]
|
|
[54] AttType ::= StringType | TokenizedType | EnumeratedType
|
|
```*/
|
|
var AttType = regg(/CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS/, '|', EnumeratedType);
|
|
|
|
// `[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)`
|
|
// [WFC: No < in Attribute Values] [WFC: No External Entity References]
|
|
// [VC: Fixed Attribute Default] [VC: Required Attribute] [VC: Attribute Default Value Syntactically Correct]
|
|
var DefaultDecl = regg(/#REQUIRED|#IMPLIED/, '|', regg(regg('#FIXED', S), '?', AttValue));
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-AttDef
|
|
// [53] AttDef ::= S Name S AttType S DefaultDecl
|
|
// https://www.w3.org/TR/xml-names/#NT-AttDef
|
|
// [1] NSAttName ::= PrefixedAttName | DefaultAttName
|
|
// [2] PrefixedAttName ::= 'xmlns:' NCName [NSC: Reserved Prefixes and Namespace Names]
|
|
// [3] DefaultAttName ::= 'xmlns'
|
|
// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
|
|
// === S Name S AttType S DefaultDecl
|
|
// xmldom is not distinguishing between QName and NSAttName on this level
|
|
// to support XML without namespaces in DTD we can not restrict it to QName
|
|
var AttDef = regg(S, Name, S, AttType, S, DefaultDecl);
|
|
|
|
var ATTLIST_DECL_START = '<!ATTLIST';
|
|
// https://www.w3.org/TR/xml11/#NT-AttlistDecl
|
|
// `[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'`
|
|
// https://www.w3.org/TR/xml-names/#NT-AttlistDecl
|
|
// `[20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'`
|
|
// to support XML without namespaces in DTD we can not restrict it to QName
|
|
var AttlistDecl = reg(ATTLIST_DECL_START, S, Name, AttDef, '*', S_OPT, '>');
|
|
|
|
// https://html.spec.whatwg.org/multipage/urls-and-fetching.html#about:legacy-compat
|
|
var ABOUT_LEGACY_COMPAT = 'about:legacy-compat';
|
|
var ABOUT_LEGACY_COMPAT_SystemLiteral = regg('"' + ABOUT_LEGACY_COMPAT + '"', '|', "'" + ABOUT_LEGACY_COMPAT + "'");
|
|
var SYSTEM = 'SYSTEM';
|
|
var PUBLIC = 'PUBLIC';
|
|
// https://www.w3.org/TR/xml11/#NT-ExternalID
|
|
// `[75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral`
|
|
var ExternalID = regg(regg(SYSTEM, S, SystemLiteral), '|', regg(PUBLIC, S, PubidLiteral, S, SystemLiteral));
|
|
var ExternalID_match = reg(
|
|
'^',
|
|
regg(
|
|
regg(SYSTEM, S, '(?<SystemLiteralOnly>', SystemLiteral, ')'),
|
|
'|',
|
|
regg(PUBLIC, S, '(?<PubidLiteral>', PubidLiteral, ')', S, '(?<SystemLiteral>', SystemLiteral, ')')
|
|
)
|
|
);
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-NDataDecl
|
|
// `[76] NDataDecl ::= S 'NDATA' S Name` [VC: Notation Declared]
|
|
var NDataDecl = regg(S, 'NDATA', S, Name);
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-EntityDef
|
|
// `[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)`
|
|
var EntityDef = regg(EntityValue, '|', regg(ExternalID, NDataDecl, '?'));
|
|
|
|
var ENTITY_DECL_START = '<!ENTITY';
|
|
// https://www.w3.org/TR/xml11/#NT-GEDecl
|
|
// `[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'`
|
|
var GEDecl = reg(ENTITY_DECL_START, S, Name, S, EntityDef, S_OPT, '>');
|
|
// https://www.w3.org/TR/xml11/#NT-PEDef
|
|
// `[74] PEDef ::= EntityValue | ExternalID`
|
|
var PEDef = regg(EntityValue, '|', ExternalID);
|
|
// https://www.w3.org/TR/xml11/#NT-PEDecl
|
|
// `[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'`
|
|
var PEDecl = reg(ENTITY_DECL_START, S, '%', S, Name, S, PEDef, S_OPT, '>');
|
|
// https://www.w3.org/TR/xml11/#NT-EntityDecl
|
|
// `[70] EntityDecl ::= GEDecl | PEDecl`
|
|
var EntityDecl = regg(GEDecl, '|', PEDecl);
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-PublicID
|
|
// `[83] PublicID ::= 'PUBLIC' S PubidLiteral`
|
|
var PublicID = reg(PUBLIC, S, PubidLiteral);
|
|
// https://www.w3.org/TR/xml11/#NT-NotationDecl
|
|
// `[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'` [VC: Unique Notation Name]
|
|
var NotationDecl = reg('<!NOTATION', S, Name, S, regg(ExternalID, '|', PublicID), S_OPT, '>');
|
|
|
|
// https://www.w3.org/TR/xml11/#NT-Eq
|
|
// `[25] Eq ::= S? '=' S?`
|
|
var Eq = reg(S_OPT, '=', S_OPT);
|
|
// https://www.w3.org/TR/xml/#NT-VersionNum
|
|
// `[26] VersionNum ::= '1.' [0-9]+`
|
|
// https://www.w3.org/TR/xml11/#NT-VersionNum
|
|
// `[26] VersionNum ::= '1.1'`
|
|
var VersionNum = /1[.]\d+/;
|
|
// https://www.w3.org/TR/xml11/#NT-VersionInfo
|
|
// `[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')`
|
|
var VersionInfo = reg(S, 'version', Eq, regg("'", VersionNum, "'", '|', '"', VersionNum, '"'));
|
|
// https://www.w3.org/TR/xml11/#NT-EncName
|
|
// `[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*`
|
|
var EncName = /[A-Za-z][-A-Za-z0-9._]*/;
|
|
// https://www.w3.org/TR/xml11/#NT-EncDecl
|
|
// `[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )`
|
|
var EncodingDecl = regg(S, 'encoding', Eq, regg('"', EncName, '"', '|', "'", EncName, "'"));
|
|
// https://www.w3.org/TR/xml11/#NT-SDDecl
|
|
// `[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))`
|
|
var SDDecl = regg(S, 'standalone', Eq, regg("'", regg('yes', '|', 'no'), "'", '|', '"', regg('yes', '|', 'no'), '"'));
|
|
// https://www.w3.org/TR/xml11/#NT-XMLDecl
|
|
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
|
|
var XMLDecl = reg(/^<\?xml/, VersionInfo, EncodingDecl, '?', SDDecl, '?', S_OPT, /\?>/);
|
|
|
|
/*
|
|
https://www.w3.org/TR/xml/#NT-markupdecl
|
|
https://www.w3.org/TR/xml11/#NT-markupdecl
|
|
`[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment`
|
|
var markupdecl = regg(elementdecl, '|', AttlistDecl, '|', EntityDecl, '|', NotationDecl, '|', PI_unsafe, '|', Comment);
|
|
*/
|
|
/*
|
|
https://www.w3.org/TR/xml-names/#NT-doctypedecl
|
|
`[28a] DeclSep ::= PEReference | S`
|
|
https://www.w3.org/TR/xml11/#NT-intSubset
|
|
```
|
|
[28b] intSubset ::= (markupdecl | DeclSep)*
|
|
=== (markupdecl | PEReference | S)*
|
|
```
|
|
[WFC: PE Between Declarations]
|
|
var intSubset = reg(regg(markupdecl, '|', PEReference, '|', S), '*');
|
|
*/
|
|
var DOCTYPE_DECL_START = '<!DOCTYPE';
|
|
/*
|
|
https://www.w3.org/TR/xml11/#NT-doctypedecl
|
|
`[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'`
|
|
https://www.afterwardsw3.org/TR/xml-names/#NT-doctypedecl
|
|
`[16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'`
|
|
var doctypedecl = reg('<!DOCTYPE', S, Name, regg(S, ExternalID), '?', S_OPT, regg(/\[/, intSubset, /]/, S_OPT), '?', '>');
|
|
*/
|
|
|
|
var CDATA_START = '<![CDATA[';
|
|
var CDATA_END = ']]>';
|
|
var CDStart = /<!\[CDATA\[/;
|
|
var CDEnd = /\]\]>/;
|
|
var CData = reg(Char, '*?', CDEnd);
|
|
/*
|
|
https://www.w3.org/TR/xml/#dt-cdsection
|
|
`[18] CDSect ::= CDStart CData CDEnd`
|
|
`[19] CDStart ::= '<![CDATA['`
|
|
`[20] CData ::= (Char* - (Char* ']]>' Char*))`
|
|
`[21] CDEnd ::= ']]>'`
|
|
*/
|
|
var CDSect = reg(CDStart, CData);
|
|
|
|
// unit tested
|
|
exports.chars = chars;
|
|
exports.chars_without = chars_without;
|
|
exports.detectUnicodeSupport = detectUnicodeSupport;
|
|
exports.reg = reg;
|
|
exports.regg = regg;
|
|
exports.ABOUT_LEGACY_COMPAT = ABOUT_LEGACY_COMPAT;
|
|
exports.ABOUT_LEGACY_COMPAT_SystemLiteral = ABOUT_LEGACY_COMPAT_SystemLiteral;
|
|
exports.AttlistDecl = AttlistDecl;
|
|
exports.CDATA_START = CDATA_START;
|
|
exports.CDATA_END = CDATA_END;
|
|
exports.CDSect = CDSect;
|
|
exports.Char = Char;
|
|
exports.Comment = Comment;
|
|
exports.COMMENT_START = COMMENT_START;
|
|
exports.COMMENT_END = COMMENT_END;
|
|
exports.DOCTYPE_DECL_START = DOCTYPE_DECL_START;
|
|
exports.elementdecl = elementdecl;
|
|
exports.EntityDecl = EntityDecl;
|
|
exports.EntityValue = EntityValue;
|
|
exports.ExternalID = ExternalID;
|
|
exports.ExternalID_match = ExternalID_match;
|
|
exports.Name = Name;
|
|
exports.NotationDecl = NotationDecl;
|
|
exports.Reference = Reference;
|
|
exports.PEReference = PEReference;
|
|
exports.PI = PI;
|
|
exports.PUBLIC = PUBLIC;
|
|
exports.PubidLiteral = PubidLiteral;
|
|
exports.QName = QName;
|
|
exports.QName_exact = QName_exact;
|
|
exports.QName_group = QName_group;
|
|
exports.S = S;
|
|
exports.SChar_s = SChar_s;
|
|
exports.S_OPT = S_OPT;
|
|
exports.SYSTEM = SYSTEM;
|
|
exports.SystemLiteral = SystemLiteral;
|
|
exports.UNICODE_REPLACEMENT_CHARACTER = UNICODE_REPLACEMENT_CHARACTER;
|
|
exports.UNICODE_SUPPORT = UNICODE_SUPPORT;
|
|
exports.XMLDecl = XMLDecl;
|