1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-03-20 20:55:18 +02:00
2020-11-05 16:58:23 +00:00

907 lines
31 KiB
TypeScript

import decodeCodePoint from "entities/lib/decode_codepoint";
import entityMap from "entities/lib/maps/entities.json";
import legacyMap from "entities/lib/maps/legacy.json";
import xmlMap from "entities/lib/maps/xml.json";
/** All the states the tokenizer can be in. */
const enum State {
Text = 1,
BeforeTagName, //after <
InTagName,
InSelfClosingTag,
BeforeClosingTagName,
InClosingTagName,
AfterClosingTagName,
//attributes
BeforeAttributeName,
InAttributeName,
AfterAttributeName,
BeforeAttributeValue,
InAttributeValueDq, // "
InAttributeValueSq, // '
InAttributeValueNq,
//declarations
BeforeDeclaration, // !
InDeclaration,
//processing instructions
InProcessingInstruction, // ?
//comments
BeforeComment,
InComment,
AfterComment1,
AfterComment2,
//cdata
BeforeCdata1, // [
BeforeCdata2, // C
BeforeCdata3, // D
BeforeCdata4, // A
BeforeCdata5, // T
BeforeCdata6, // A
InCdata, // [
AfterCdata1, // ]
AfterCdata2, // ]
//special tags
BeforeSpecial, //S
BeforeSpecialEnd, //S
BeforeScript1, //C
BeforeScript2, //R
BeforeScript3, //I
BeforeScript4, //P
BeforeScript5, //T
AfterScript1, //C
AfterScript2, //R
AfterScript3, //I
AfterScript4, //P
AfterScript5, //T
BeforeStyle1, //T
BeforeStyle2, //Y
BeforeStyle3, //L
BeforeStyle4, //E
AfterStyle1, //T
AfterStyle2, //Y
AfterStyle3, //L
AfterStyle4, //E
BeforeEntity, //&
BeforeNumericEntity, //#
InNamedEntity,
InNumericEntity,
InHexEntity //X
}
const enum Special {
None = 1,
Script,
Style
}
function whitespace(c: string): boolean {
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
}
interface Callbacks {
onattribdata(value: string): void; //TODO implement the new event
onattribend(): void;
onattribname(name: string): void;
oncdata(data: string): void;
onclosetag(name: string): void;
oncomment(data: string): void;
ondeclaration(content: string): void;
onend(): void;
onerror(error: Error, state?: State): void;
onopentagend(): void;
onopentagname(name: string): void;
onprocessinginstruction(instruction: string): void;
onselfclosingtag(): void;
ontext(value: string): void;
}
function ifElseState(upper: string, SUCCESS: State, FAILURE: State) {
const lower = upper.toLowerCase();
if (upper === lower) {
return (t: Tokenizer, c: string) => {
if (c === lower) {
t._state = SUCCESS;
} else {
t._state = FAILURE;
t._index--;
}
};
} else {
return (t: Tokenizer, c: string) => {
if (c === lower || c === upper) {
t._state = SUCCESS;
} else {
t._state = FAILURE;
t._index--;
}
};
}
}
function consumeSpecialNameChar(upper: string, NEXT_STATE: State) {
const lower = upper.toLowerCase();
return (t: Tokenizer, c: string) => {
if (c === lower || c === upper) {
t._state = NEXT_STATE;
} else {
t._state = State.InTagName;
t._index--; //consume the token again
}
};
}
const stateBeforeCdata1 = ifElseState(
"C",
State.BeforeCdata2,
State.InDeclaration
);
const stateBeforeCdata2 = ifElseState(
"D",
State.BeforeCdata3,
State.InDeclaration
);
const stateBeforeCdata3 = ifElseState(
"A",
State.BeforeCdata4,
State.InDeclaration
);
const stateBeforeCdata4 = ifElseState(
"T",
State.BeforeCdata5,
State.InDeclaration
);
const stateBeforeCdata5 = ifElseState(
"A",
State.BeforeCdata6,
State.InDeclaration
);
const stateBeforeScript1 = consumeSpecialNameChar("R", State.BeforeScript2);
const stateBeforeScript2 = consumeSpecialNameChar("I", State.BeforeScript3);
const stateBeforeScript3 = consumeSpecialNameChar("P", State.BeforeScript4);
const stateBeforeScript4 = consumeSpecialNameChar("T", State.BeforeScript5);
const stateAfterScript1 = ifElseState("R", State.AfterScript2, State.Text);
const stateAfterScript2 = ifElseState("I", State.AfterScript3, State.Text);
const stateAfterScript3 = ifElseState("P", State.AfterScript4, State.Text);
const stateAfterScript4 = ifElseState("T", State.AfterScript5, State.Text);
const stateBeforeStyle1 = consumeSpecialNameChar("Y", State.BeforeStyle2);
const stateBeforeStyle2 = consumeSpecialNameChar("L", State.BeforeStyle3);
const stateBeforeStyle3 = consumeSpecialNameChar("E", State.BeforeStyle4);
const stateAfterStyle1 = ifElseState("Y", State.AfterStyle2, State.Text);
const stateAfterStyle2 = ifElseState("L", State.AfterStyle3, State.Text);
const stateAfterStyle3 = ifElseState("E", State.AfterStyle4, State.Text);
const stateBeforeEntity = ifElseState(
"#",
State.BeforeNumericEntity,
State.InNamedEntity
);
const stateBeforeNumericEntity = ifElseState(
"X",
State.InHexEntity,
State.InNumericEntity
);
export default class Tokenizer {
/** The current state the tokenizer is in. */
_state = State.Text;
/** The read buffer. */
_buffer = "";
/** The beginning of the section that is currently being read. */
_sectionStart = 0;
/** The index within the buffer that we are currently looking at. */
_index = 0;
/**
* Data that has already been processed will be removed from the buffer occasionally.
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
*/
_bufferOffset = 0;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
_baseState = State.Text;
/** For special parsing behavior inside of script and style tags. */
_special = Special.None;
/** Indicates whether the tokenizer has been paused. */
_running = true;
/** Indicates whether the tokenizer has finished running / `.end` has been called. */
_ended = false;
_cbs: Callbacks;
_xmlMode: boolean;
_decodeEntities: boolean;
constructor(
options: { xmlMode?: boolean; decodeEntities?: boolean } | null,
cbs: Callbacks
) {
this._cbs = cbs;
this._xmlMode = !!(options && options.xmlMode);
this._decodeEntities = !!(options && options.decodeEntities);
}
reset() {
this._state = State.Text;
this._buffer = "";
this._sectionStart = 0;
this._index = 0;
this._bufferOffset = 0;
this._baseState = State.Text;
this._special = Special.None;
this._running = true;
this._ended = false;
}
_stateText(c: string) {
if (c === "<") {
if (this._index > this._sectionStart) {
this._cbs.ontext(this._getSection());
}
this._state = State.BeforeTagName;
this._sectionStart = this._index;
} else if (
this._decodeEntities &&
this._special === Special.None &&
c === "&"
) {
if (this._index > this._sectionStart) {
this._cbs.ontext(this._getSection());
}
this._baseState = State.Text;
this._state = State.BeforeEntity;
this._sectionStart = this._index;
}
}
_stateBeforeTagName(c: string) {
if (c === "/") {
this._state = State.BeforeClosingTagName;
} else if (c === "<") {
this._cbs.ontext(this._getSection());
this._sectionStart = this._index;
} else if (
c === ">" ||
this._special !== Special.None ||
whitespace(c)
) {
this._state = State.Text;
} else if (c === "!") {
this._state = State.BeforeDeclaration;
this._sectionStart = this._index + 1;
} else if (c === "?") {
this._state = State.InProcessingInstruction;
this._sectionStart = this._index + 1;
} else {
this._state =
!this._xmlMode && (c === "s" || c === "S")
? State.BeforeSpecial
: State.InTagName;
this._sectionStart = this._index;
}
}
_stateInTagName(c: string) {
if (c === "/" || c === ">" || whitespace(c)) {
this._emitToken("onopentagname");
this._state = State.BeforeAttributeName;
this._index--;
}
}
_stateBeforeClosingTagName(c: string) {
if (whitespace(c)) {
// ignore
} else if (c === ">") {
this._state = State.Text;
} else if (this._special !== Special.None) {
if (c === "s" || c === "S") {
this._state = State.BeforeSpecialEnd;
} else {
this._state = State.Text;
this._index--;
}
} else {
this._state = State.InClosingTagName;
this._sectionStart = this._index;
}
}
_stateInClosingTagName(c: string) {
if (c === ">" || whitespace(c)) {
this._emitToken("onclosetag");
this._state = State.AfterClosingTagName;
this._index--;
}
}
_stateAfterClosingTagName(c: string) {
//skip everything until ">"
if (c === ">") {
this._state = State.Text;
this._sectionStart = this._index + 1;
}
}
_stateBeforeAttributeName(c: string) {
if (c === ">") {
this._cbs.onopentagend();
this._state = State.Text;
this._sectionStart = this._index + 1;
} else if (c === "/") {
this._state = State.InSelfClosingTag;
} else if (!whitespace(c)) {
this._state = State.InAttributeName;
this._sectionStart = this._index;
}
}
_stateInSelfClosingTag(c: string) {
if (c === ">") {
this._cbs.onselfclosingtag();
this._state = State.Text;
this._sectionStart = this._index + 1;
} else if (!whitespace(c)) {
this._state = State.BeforeAttributeName;
this._index--;
}
}
_stateInAttributeName(c: string) {
if (c === "=" || c === "/" || c === ">" || whitespace(c)) {
this._cbs.onattribname(this._getSection());
this._sectionStart = -1;
this._state = State.AfterAttributeName;
this._index--;
}
}
_stateAfterAttributeName(c: string) {
if (c === "=") {
this._state = State.BeforeAttributeValue;
} else if (c === "/" || c === ">") {
this._cbs.onattribend();
this._state = State.BeforeAttributeName;
this._index--;
} else if (!whitespace(c)) {
this._cbs.onattribend();
this._state = State.InAttributeName;
this._sectionStart = this._index;
}
}
_stateBeforeAttributeValue(c: string) {
if (c === '"') {
this._state = State.InAttributeValueDq;
this._sectionStart = this._index + 1;
} else if (c === "'") {
this._state = State.InAttributeValueSq;
this._sectionStart = this._index + 1;
} else if (!whitespace(c)) {
this._state = State.InAttributeValueNq;
this._sectionStart = this._index;
this._index--; //reconsume token
}
}
_stateInAttributeValueDoubleQuotes(c: string) {
if (c === '"') {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = State.BeforeAttributeName;
} else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = State.BeforeEntity;
this._sectionStart = this._index;
}
}
_stateInAttributeValueSingleQuotes(c: string) {
if (c === "'") {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = State.BeforeAttributeName;
} else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = State.BeforeEntity;
this._sectionStart = this._index;
}
}
_stateInAttributeValueNoQuotes(c: string) {
if (whitespace(c) || c === ">") {
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = State.BeforeAttributeName;
this._index--;
} else if (this._decodeEntities && c === "&") {
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = State.BeforeEntity;
this._sectionStart = this._index;
}
}
_stateBeforeDeclaration(c: string) {
this._state =
c === "["
? State.BeforeCdata1
: c === "-"
? State.BeforeComment
: State.InDeclaration;
}
_stateInDeclaration(c: string) {
if (c === ">") {
this._cbs.ondeclaration(this._getSection());
this._state = State.Text;
this._sectionStart = this._index + 1;
}
}
_stateInProcessingInstruction(c: string) {
if (c === ">") {
this._cbs.onprocessinginstruction(this._getSection());
this._state = State.Text;
this._sectionStart = this._index + 1;
}
}
_stateBeforeComment(c: string) {
if (c === "-") {
this._state = State.InComment;
this._sectionStart = this._index + 1;
} else {
this._state = State.InDeclaration;
}
}
_stateInComment(c: string) {
if (c === "-") this._state = State.AfterComment1;
}
_stateAfterComment1(c: string) {
if (c === "-") {
this._state = State.AfterComment2;
} else {
this._state = State.InComment;
}
}
_stateAfterComment2(c: string) {
if (c === ">") {
//remove 2 trailing chars
this._cbs.oncomment(
this._buffer.substring(this._sectionStart, this._index - 2)
);
this._state = State.Text;
this._sectionStart = this._index + 1;
} else if (c !== "-") {
this._state = State.InComment;
}
// else: stay in AFTER_COMMENT_2 (`--->`)
}
_stateBeforeCdata6(c: string) {
if (c === "[") {
this._state = State.InCdata;
this._sectionStart = this._index + 1;
} else {
this._state = State.InDeclaration;
this._index--;
}
}
_stateInCdata(c: string) {
if (c === "]") this._state = State.AfterCdata1;
}
_stateAfterCdata1(c: string) {
if (c === "]") this._state = State.AfterCdata2;
else this._state = State.InCdata;
}
_stateAfterCdata2(c: string) {
if (c === ">") {
//remove 2 trailing chars
this._cbs.oncdata(
this._buffer.substring(this._sectionStart, this._index - 2)
);
this._state = State.Text;
this._sectionStart = this._index + 1;
} else if (c !== "]") {
this._state = State.InCdata;
}
//else: stay in AFTER_CDATA_2 (`]]]>`)
}
_stateBeforeSpecial(c: string) {
if (c === "c" || c === "C") {
this._state = State.BeforeScript1;
} else if (c === "t" || c === "T") {
this._state = State.BeforeStyle1;
} else {
this._state = State.InTagName;
this._index--; //consume the token again
}
}
_stateBeforeSpecialEnd(c: string) {
if (this._special === Special.Script && (c === "c" || c === "C")) {
this._state = State.AfterScript1;
} else if (
this._special === Special.Style &&
(c === "t" || c === "T")
) {
this._state = State.AfterStyle1;
} else this._state = State.Text;
}
_stateBeforeScript5(c: string) {
if (c === "/" || c === ">" || whitespace(c)) {
this._special = Special.Script;
}
this._state = State.InTagName;
this._index--; //consume the token again
}
_stateAfterScript5(c: string) {
if (c === ">" || whitespace(c)) {
this._special = Special.None;
this._state = State.InClosingTagName;
this._sectionStart = this._index - 6;
this._index--; //reconsume the token
} else this._state = State.Text;
}
_stateBeforeStyle4(c: string) {
if (c === "/" || c === ">" || whitespace(c)) {
this._special = Special.Style;
}
this._state = State.InTagName;
this._index--; //consume the token again
}
_stateAfterStyle4(c: string) {
if (c === ">" || whitespace(c)) {
this._special = Special.None;
this._state = State.InClosingTagName;
this._sectionStart = this._index - 5;
this._index--; //reconsume the token
} else this._state = State.Text;
}
//for entities terminated with a semicolon
_parseNamedEntityStrict() {
//offset = 1
if (this._sectionStart + 1 < this._index) {
const entity = this._buffer.substring(
this._sectionStart + 1,
this._index
),
map = this._xmlMode ? xmlMap : entityMap;
if (Object.prototype.hasOwnProperty.call(map, entity)) {
// @ts-ignore
this._emitPartial(map[entity]);
this._sectionStart = this._index + 1;
}
}
}
//parses legacy entities (without trailing semicolon)
_parseLegacyEntity() {
const start = this._sectionStart + 1;
let limit = this._index - start;
if (limit > 6) limit = 6; // The max length of legacy entities is 6
while (limit >= 2) {
// The min length of legacy entities is 2
const entity = this._buffer.substr(start, limit);
if (Object.prototype.hasOwnProperty.call(legacyMap, entity)) {
// @ts-ignore
this._emitPartial(legacyMap[entity]);
this._sectionStart += limit + 1;
return;
} else {
limit--;
}
}
}
_stateInNamedEntity(c: string) {
if (c === ";") {
this._parseNamedEntityStrict();
if (this._sectionStart + 1 < this._index && !this._xmlMode) {
this._parseLegacyEntity();
}
this._state = this._baseState;
} else if (
(c < "a" || c > "z") &&
(c < "A" || c > "Z") &&
(c < "0" || c > "9")
) {
if (this._xmlMode || this._sectionStart + 1 === this._index) {
// ignore
} else if (this._baseState !== State.Text) {
if (c !== "=") {
this._parseNamedEntityStrict();
}
} else {
this._parseLegacyEntity();
}
this._state = this._baseState;
this._index--;
}
}
_decodeNumericEntity(offset: number, base: number) {
const sectionStart = this._sectionStart + offset;
if (sectionStart !== this._index) {
//parse entity
const entity = this._buffer.substring(sectionStart, this._index);
const parsed = parseInt(entity, base);
this._emitPartial(decodeCodePoint(parsed));
this._sectionStart = this._index;
} else {
this._sectionStart--;
}
this._state = this._baseState;
}
_stateInNumericEntity(c: string) {
if (c === ";") {
this._decodeNumericEntity(2, 10);
this._sectionStart++;
} else if (c < "0" || c > "9") {
if (!this._xmlMode) {
this._decodeNumericEntity(2, 10);
} else {
this._state = this._baseState;
}
this._index--;
}
}
_stateInHexEntity(c: string) {
if (c === ";") {
this._decodeNumericEntity(3, 16);
this._sectionStart++;
} else if (
(c < "a" || c > "f") &&
(c < "A" || c > "F") &&
(c < "0" || c > "9")
) {
if (!this._xmlMode) {
this._decodeNumericEntity(3, 16);
} else {
this._state = this._baseState;
}
this._index--;
}
}
_cleanup() {
if (this._sectionStart < 0) {
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else if (this._running) {
if (this._state === State.Text) {
if (this._sectionStart !== this._index) {
this._cbs.ontext(this._buffer.substr(this._sectionStart));
}
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else if (this._sectionStart === this._index) {
//the section just started
this._buffer = "";
this._bufferOffset += this._index;
this._index = 0;
} else {
//remove everything unnecessary
this._buffer = this._buffer.substr(this._sectionStart);
this._index -= this._sectionStart;
this._bufferOffset += this._sectionStart;
}
this._sectionStart = 0;
}
}
//TODO make events conditional
write(chunk: string) {
if (this._ended) this._cbs.onerror(Error(".write() after done!"));
this._buffer += chunk;
this._parse();
}
// Iterates through the buffer, calling the function corresponding to the current state.
// States that are more likely to be hit are higher up, as a performance improvement.
_parse() {
while (this._index < this._buffer.length && this._running) {
const c = this._buffer.charAt(this._index);
if (this._state === State.Text) {
this._stateText(c);
} else if (this._state === State.InAttributeValueDq) {
this._stateInAttributeValueDoubleQuotes(c);
} else if (this._state === State.InAttributeName) {
this._stateInAttributeName(c);
} else if (this._state === State.InComment) {
this._stateInComment(c);
} else if (this._state === State.BeforeAttributeName) {
this._stateBeforeAttributeName(c);
} else if (this._state === State.InTagName) {
this._stateInTagName(c);
} else if (this._state === State.InClosingTagName) {
this._stateInClosingTagName(c);
} else if (this._state === State.BeforeTagName) {
this._stateBeforeTagName(c);
} else if (this._state === State.AfterAttributeName) {
this._stateAfterAttributeName(c);
} else if (this._state === State.InAttributeValueSq) {
this._stateInAttributeValueSingleQuotes(c);
} else if (this._state === State.BeforeAttributeValue) {
this._stateBeforeAttributeValue(c);
} else if (this._state === State.BeforeClosingTagName) {
this._stateBeforeClosingTagName(c);
} else if (this._state === State.AfterClosingTagName) {
this._stateAfterClosingTagName(c);
} else if (this._state === State.BeforeSpecial) {
this._stateBeforeSpecial(c);
} else if (this._state === State.AfterComment1) {
this._stateAfterComment1(c);
} else if (this._state === State.InAttributeValueNq) {
this._stateInAttributeValueNoQuotes(c);
} else if (this._state === State.InSelfClosingTag) {
this._stateInSelfClosingTag(c);
} else if (this._state === State.InDeclaration) {
this._stateInDeclaration(c);
} else if (this._state === State.BeforeDeclaration) {
this._stateBeforeDeclaration(c);
} else if (this._state === State.AfterComment2) {
this._stateAfterComment2(c);
} else if (this._state === State.BeforeComment) {
this._stateBeforeComment(c);
} else if (this._state === State.BeforeSpecialEnd) {
this._stateBeforeSpecialEnd(c);
} else if (this._state === State.AfterScript1) {
stateAfterScript1(this, c);
} else if (this._state === State.AfterScript2) {
stateAfterScript2(this, c);
} else if (this._state === State.AfterScript3) {
stateAfterScript3(this, c);
} else if (this._state === State.BeforeScript1) {
stateBeforeScript1(this, c);
} else if (this._state === State.BeforeScript2) {
stateBeforeScript2(this, c);
} else if (this._state === State.BeforeScript3) {
stateBeforeScript3(this, c);
} else if (this._state === State.BeforeScript4) {
stateBeforeScript4(this, c);
} else if (this._state === State.BeforeScript5) {
this._stateBeforeScript5(c);
} else if (this._state === State.AfterScript4) {
stateAfterScript4(this, c);
} else if (this._state === State.AfterScript5) {
this._stateAfterScript5(c);
} else if (this._state === State.BeforeStyle1) {
stateBeforeStyle1(this, c);
} else if (this._state === State.InCdata) {
this._stateInCdata(c);
} else if (this._state === State.BeforeStyle2) {
stateBeforeStyle2(this, c);
} else if (this._state === State.BeforeStyle3) {
stateBeforeStyle3(this, c);
} else if (this._state === State.BeforeStyle4) {
this._stateBeforeStyle4(c);
} else if (this._state === State.AfterStyle1) {
stateAfterStyle1(this, c);
} else if (this._state === State.AfterStyle2) {
stateAfterStyle2(this, c);
} else if (this._state === State.AfterStyle3) {
stateAfterStyle3(this, c);
} else if (this._state === State.AfterStyle4) {
this._stateAfterStyle4(c);
} else if (this._state === State.InProcessingInstruction) {
this._stateInProcessingInstruction(c);
} else if (this._state === State.InNamedEntity) {
this._stateInNamedEntity(c);
} else if (this._state === State.BeforeCdata1) {
stateBeforeCdata1(this, c);
} else if (this._state === State.BeforeEntity) {
stateBeforeEntity(this, c);
} else if (this._state === State.BeforeCdata2) {
stateBeforeCdata2(this, c);
} else if (this._state === State.BeforeCdata3) {
stateBeforeCdata3(this, c);
} else if (this._state === State.AfterCdata1) {
this._stateAfterCdata1(c);
} else if (this._state === State.AfterCdata2) {
this._stateAfterCdata2(c);
} else if (this._state === State.BeforeCdata4) {
stateBeforeCdata4(this, c);
} else if (this._state === State.BeforeCdata5) {
stateBeforeCdata5(this, c);
} else if (this._state === State.BeforeCdata6) {
this._stateBeforeCdata6(c);
} else if (this._state === State.InHexEntity) {
this._stateInHexEntity(c);
} else if (this._state === State.InNumericEntity) {
this._stateInNumericEntity(c);
} else if (this._state === State.BeforeNumericEntity) {
stateBeforeNumericEntity(this, c);
} else {
this._cbs.onerror(Error("unknown _state"), this._state);
}
this._index++;
}
this._cleanup();
}
pause() {
this._running = false;
}
resume() {
this._running = true;
if (this._index < this._buffer.length) {
this._parse();
}
if (this._ended) {
this._finish();
}
}
end(chunk?: string) {
if (this._ended) this._cbs.onerror(Error(".end() after done!"));
if (chunk) this.write(chunk);
this._ended = true;
if (this._running) this._finish();
}
_finish() {
//if there is remaining data, emit it in a reasonable way
if (this._sectionStart < this._index) {
this._handleTrailingData();
}
this._cbs.onend();
}
_handleTrailingData() {
const data = this._buffer.substr(this._sectionStart);
if (
this._state === State.InCdata ||
this._state === State.AfterCdata1 ||
this._state === State.AfterCdata2
) {
this._cbs.oncdata(data);
} else if (
this._state === State.InComment ||
this._state === State.AfterComment1 ||
this._state === State.AfterComment2
) {
this._cbs.oncomment(data);
} else if (this._state === State.InNamedEntity && !this._xmlMode) {
this._parseLegacyEntity();
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
} else if (this._state === State.InNumericEntity && !this._xmlMode) {
this._decodeNumericEntity(2, 10);
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
} else if (this._state === State.InHexEntity && !this._xmlMode) {
this._decodeNumericEntity(3, 16);
if (this._sectionStart < this._index) {
this._state = this._baseState;
this._handleTrailingData();
}
} else if (
this._state !== State.InTagName &&
this._state !== State.BeforeAttributeName &&
this._state !== State.BeforeAttributeValue &&
this._state !== State.AfterAttributeName &&
this._state !== State.InAttributeName &&
this._state !== State.InAttributeValueSq &&
this._state !== State.InAttributeValueDq &&
this._state !== State.InAttributeValueNq &&
this._state !== State.InClosingTagName
) {
this._cbs.ontext(data);
}
//else, ignore remaining data
//TODO add a way to remove current tag
}
getAbsoluteIndex(): number {
return this._bufferOffset + this._index;
}
_getSection(): string {
return this._buffer.substring(this._sectionStart, this._index);
}
_emitToken(name: "onopentagname" | "onclosetag" | "onattribdata") {
this._cbs[name](this._getSection());
this._sectionStart = -1;
}
_emitPartial(value: string) {
if (this._baseState !== State.Text) {
this._cbs.onattribdata(value); //TODO implement the new event
} else {
this._cbs.ontext(value);
}
}
}