You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-07-16 00:14:34 +02:00
All: Use Lerna to manage monorepo
This commit is contained in:
38
packages/fork-htmlparser2/src/CollectingHandler.ts
Normal file
38
packages/fork-htmlparser2/src/CollectingHandler.ts
Normal file
@ -0,0 +1,38 @@
|
||||
import MultiplexHandler from "./MultiplexHandler";
|
||||
import { Handler } from "./Parser";
|
||||
|
||||
export class CollectingHandler extends MultiplexHandler {
|
||||
_cbs: Partial<Handler>;
|
||||
events: [keyof Handler, ...unknown[]][];
|
||||
|
||||
constructor(cbs: Partial<Handler> = {}) {
|
||||
super((name, ...args) => {
|
||||
this.events.push([name, ...args]);
|
||||
// @ts-ignore
|
||||
if (this._cbs[name]) this._cbs[name](...args);
|
||||
});
|
||||
|
||||
this._cbs = cbs;
|
||||
this.events = [];
|
||||
}
|
||||
|
||||
onreset() {
|
||||
this.events = [];
|
||||
if (this._cbs.onreset) this._cbs.onreset();
|
||||
}
|
||||
|
||||
restart() {
|
||||
if (this._cbs.onreset) this._cbs.onreset();
|
||||
|
||||
for (let i = 0; i < this.events.length; i++) {
|
||||
const [name, ...args] = this.events[i];
|
||||
|
||||
if (!this._cbs[name]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
this._cbs[name](...args);
|
||||
}
|
||||
}
|
||||
}
|
25
packages/fork-htmlparser2/src/FeedHandler.spec.ts
Normal file
25
packages/fork-htmlparser2/src/FeedHandler.spec.ts
Normal file
@ -0,0 +1,25 @@
|
||||
//Runs tests for feeds
|
||||
|
||||
import * as helper from "./__fixtures__/test-helper";
|
||||
import { FeedHandler, parseFeed } from "./FeedHandler";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
const documents = path.join(__dirname, "__fixtures__", "Documents");
|
||||
|
||||
helper.createSuite("Feeds", (test, cb) => {
|
||||
const file = fs.readFileSync(path.join(documents, test.file), "utf8");
|
||||
const handler: FeedHandler = new FeedHandler(err => cb(err, handler.feed));
|
||||
|
||||
helper.writeToParser(handler, { xmlMode: true }, file);
|
||||
});
|
||||
|
||||
describe("parseFeed", () => {
|
||||
test("(rssFeed)", async () => {
|
||||
const file = path.join(documents, "RSS_Example.xml");
|
||||
const rss = await fs.promises.readFile(file, "utf8");
|
||||
const feed = parseFeed(rss);
|
||||
|
||||
expect(feed).toMatchSnapshot();
|
||||
});
|
||||
});
|
205
packages/fork-htmlparser2/src/FeedHandler.ts
Normal file
205
packages/fork-htmlparser2/src/FeedHandler.ts
Normal file
@ -0,0 +1,205 @@
|
||||
import DomHandler, { DomHandlerOptions, Node, Element } from "domhandler";
|
||||
import * as DomUtils from "domutils";
|
||||
import { Parser, ParserOptions } from "./Parser";
|
||||
|
||||
interface FeedItem {
|
||||
id?: string;
|
||||
title?: string;
|
||||
link?: string;
|
||||
description?: string;
|
||||
pubDate?: Date;
|
||||
}
|
||||
|
||||
interface Feed {
|
||||
type?: string;
|
||||
id?: string;
|
||||
title?: string;
|
||||
link?: string;
|
||||
description?: string;
|
||||
updated?: Date;
|
||||
author?: string;
|
||||
items?: FeedItem[];
|
||||
}
|
||||
|
||||
//TODO: Consume data as it is coming in
|
||||
export class FeedHandler extends DomHandler {
|
||||
feed?: Feed;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param callback
|
||||
* @param options
|
||||
*/
|
||||
constructor(
|
||||
callback?: ((error: Error | null) => void) | DomHandlerOptions,
|
||||
options?: DomHandlerOptions
|
||||
) {
|
||||
if (typeof callback === "object" && callback !== null) {
|
||||
callback = undefined;
|
||||
options = callback;
|
||||
}
|
||||
super(callback, options);
|
||||
}
|
||||
|
||||
onend() {
|
||||
const feed: Feed = {};
|
||||
const feedRoot = getOneElement(isValidFeed, this.dom);
|
||||
|
||||
if (feedRoot) {
|
||||
if (feedRoot.name === "feed") {
|
||||
const childs = feedRoot.children;
|
||||
feed.type = "atom";
|
||||
addConditionally(feed, "id", "id", childs);
|
||||
addConditionally(feed, "title", "title", childs);
|
||||
const href = getAttribute(
|
||||
"href",
|
||||
getOneElement("link", childs)
|
||||
);
|
||||
if (href) {
|
||||
feed.link = href;
|
||||
}
|
||||
addConditionally(feed, "description", "subtitle", childs);
|
||||
|
||||
const updated = fetch("updated", childs);
|
||||
if (updated) {
|
||||
feed.updated = new Date(updated);
|
||||
}
|
||||
|
||||
addConditionally(feed, "author", "email", childs, true);
|
||||
feed.items = getElements("entry", childs).map(item => {
|
||||
const entry: FeedItem = {};
|
||||
const { children } = item;
|
||||
|
||||
addConditionally(entry, "id", "id", children);
|
||||
addConditionally(entry, "title", "title", children);
|
||||
|
||||
const href = getAttribute(
|
||||
"href",
|
||||
getOneElement("link", children)
|
||||
);
|
||||
if (href) {
|
||||
entry.link = href;
|
||||
}
|
||||
|
||||
const description =
|
||||
fetch("summary", children) ||
|
||||
fetch("content", children);
|
||||
if (description) {
|
||||
entry.description = description;
|
||||
}
|
||||
|
||||
const pubDate = fetch("updated", children);
|
||||
if (pubDate) {
|
||||
entry.pubDate = new Date(pubDate);
|
||||
}
|
||||
|
||||
return entry;
|
||||
});
|
||||
} else {
|
||||
const childs = getOneElement("channel", feedRoot.children)
|
||||
.children;
|
||||
feed.type = feedRoot.name.substr(0, 3);
|
||||
feed.id = "";
|
||||
|
||||
addConditionally(feed, "title", "title", childs);
|
||||
addConditionally(feed, "link", "link", childs);
|
||||
addConditionally(feed, "description", "description", childs);
|
||||
|
||||
const updated = fetch("lastBuildDate", childs);
|
||||
if (updated) {
|
||||
feed.updated = new Date(updated);
|
||||
}
|
||||
|
||||
addConditionally(
|
||||
feed,
|
||||
"author",
|
||||
"managingEditor",
|
||||
childs,
|
||||
true
|
||||
);
|
||||
|
||||
feed.items = getElements("item", feedRoot.children).map(
|
||||
(item: Element) => {
|
||||
const entry: FeedItem = {};
|
||||
const { children } = item;
|
||||
addConditionally(entry, "id", "guid", children);
|
||||
addConditionally(entry, "title", "title", children);
|
||||
addConditionally(entry, "link", "link", children);
|
||||
addConditionally(
|
||||
entry,
|
||||
"description",
|
||||
"description",
|
||||
children
|
||||
);
|
||||
const pubDate = fetch("pubDate", children);
|
||||
if (pubDate) entry.pubDate = new Date(pubDate);
|
||||
return entry;
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
this.feed = feed;
|
||||
|
||||
this.handleCallback(
|
||||
feedRoot ? null : Error("couldn't find root of feed")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function getElements(what: string, where: Node | Node[]) {
|
||||
return DomUtils.getElementsByTagName(what, where, true);
|
||||
}
|
||||
function getOneElement(
|
||||
what: string | ((name: string) => boolean),
|
||||
where: Node | Node[]
|
||||
) {
|
||||
return DomUtils.getElementsByTagName(what, where, true, 1)[0];
|
||||
}
|
||||
function fetch(what: string, where: Node | Node[], recurse = false): string {
|
||||
return DomUtils.getText(
|
||||
DomUtils.getElementsByTagName(what, where, recurse, 1)
|
||||
).trim();
|
||||
}
|
||||
|
||||
function getAttribute(name: string, elem: Element | null): string | null {
|
||||
if (!elem) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const { attribs } = elem;
|
||||
return attribs[name];
|
||||
}
|
||||
|
||||
function addConditionally<T>(
|
||||
obj: T,
|
||||
prop: keyof T,
|
||||
what: string,
|
||||
where: Node | Node[],
|
||||
recurse = false
|
||||
) {
|
||||
const tmp = fetch(what, where, recurse);
|
||||
// @ts-ignore
|
||||
if (tmp) obj[prop] = tmp;
|
||||
}
|
||||
|
||||
function isValidFeed(value: string) {
|
||||
return value === "rss" || value === "feed" || value === "rdf:RDF";
|
||||
}
|
||||
|
||||
const defaultOptions = { xmlMode: true };
|
||||
|
||||
/**
|
||||
* Parse a feed.
|
||||
*
|
||||
* @param feed The feed that should be parsed, as a string.
|
||||
* @param options Optionally, options for parsing. When using this option, you probably want to set `xmlMode` to `true`.
|
||||
*/
|
||||
export function parseFeed(
|
||||
feed: string,
|
||||
options: ParserOptions & DomHandlerOptions = defaultOptions
|
||||
): Feed | undefined {
|
||||
const handler = new FeedHandler(options);
|
||||
new Parser(handler, options).end(feed);
|
||||
return handler.feed;
|
||||
}
|
58
packages/fork-htmlparser2/src/MultiplexHandler.ts
Normal file
58
packages/fork-htmlparser2/src/MultiplexHandler.ts
Normal file
@ -0,0 +1,58 @@
|
||||
import { Handler } from "./Parser";
|
||||
|
||||
/**
|
||||
* Calls a specific handler function for all events that are encountered.
|
||||
*
|
||||
* @param func — The function to multiplex all events to.
|
||||
*/
|
||||
export default class MultiplexHandler implements Handler {
|
||||
_func: (event: keyof Handler, ...args: unknown[]) => void;
|
||||
|
||||
constructor(func: (event: keyof Handler, ...args: unknown[]) => void) {
|
||||
this._func = func;
|
||||
}
|
||||
|
||||
/* Format: eventname: number of arguments */
|
||||
onattribute(name: string, value: string) {
|
||||
this._func("onattribute", name, value);
|
||||
}
|
||||
oncdatastart() {
|
||||
this._func("oncdatastart");
|
||||
}
|
||||
oncdataend() {
|
||||
this._func("oncdataend");
|
||||
}
|
||||
ontext(text: string) {
|
||||
this._func("ontext", text);
|
||||
}
|
||||
onprocessinginstruction(name: string, value: string) {
|
||||
this._func("onprocessinginstruction", name, value);
|
||||
}
|
||||
oncomment(comment: string) {
|
||||
this._func("oncomment", comment);
|
||||
}
|
||||
oncommentend() {
|
||||
this._func("oncommentend");
|
||||
}
|
||||
onclosetag(name: string) {
|
||||
this._func("onclosetag", name);
|
||||
}
|
||||
onopentag(name: string, attribs: { [key: string]: string }) {
|
||||
this._func("onopentag", name, attribs);
|
||||
}
|
||||
onopentagname(name: string) {
|
||||
this._func("onopentagname", name);
|
||||
}
|
||||
onerror(error: Error) {
|
||||
this._func("onerror", error);
|
||||
}
|
||||
onend() {
|
||||
this._func("onend");
|
||||
}
|
||||
onparserinit(parser: {}) {
|
||||
this._func("onparserinit", parser);
|
||||
}
|
||||
onreset() {
|
||||
this._func("onreset");
|
||||
}
|
||||
}
|
98
packages/fork-htmlparser2/src/Parser.spec.ts
Normal file
98
packages/fork-htmlparser2/src/Parser.spec.ts
Normal file
@ -0,0 +1,98 @@
|
||||
import { Parser, Tokenizer } from ".";
|
||||
|
||||
describe("API", () => {
|
||||
test("should work without callbacks", () => {
|
||||
const p = new Parser(null, {
|
||||
xmlMode: true,
|
||||
lowerCaseAttributeNames: true
|
||||
});
|
||||
|
||||
p.end("<a foo><bar></a><!-- --><![CDATA[]]]><?foo?><!bar><boo/>boohay");
|
||||
p.write("foo");
|
||||
|
||||
//check for an error
|
||||
p.end();
|
||||
let err = false;
|
||||
p._cbs.onerror = () => (err = true);
|
||||
p.write("foo");
|
||||
expect(err).toBeTruthy();
|
||||
err = false;
|
||||
p.end();
|
||||
expect(err).toBeTruthy();
|
||||
|
||||
p.reset();
|
||||
|
||||
//remove method
|
||||
p._cbs.onopentag = () => {};
|
||||
p.write("<a foo");
|
||||
delete p._cbs.onopentag;
|
||||
p.write(">");
|
||||
|
||||
//pause/resume
|
||||
let processed = false;
|
||||
p._cbs.ontext = t => {
|
||||
expect(t).toBe("foo");
|
||||
processed = true;
|
||||
};
|
||||
p.pause();
|
||||
p.write("foo");
|
||||
expect(processed).toBeFalsy();
|
||||
p.resume();
|
||||
expect(processed).toBeTruthy();
|
||||
processed = false;
|
||||
p.pause();
|
||||
expect(processed).toBeFalsy();
|
||||
p.resume();
|
||||
expect(processed).toBeFalsy();
|
||||
p.pause();
|
||||
p.end("foo");
|
||||
expect(processed).toBeFalsy();
|
||||
p.resume();
|
||||
expect(processed).toBeTruthy();
|
||||
});
|
||||
|
||||
test("should update the position", () => {
|
||||
const p = new Parser(null);
|
||||
|
||||
p.write("foo");
|
||||
|
||||
expect(p.startIndex).toBe(0);
|
||||
expect(p.endIndex).toBe(2);
|
||||
|
||||
p.write("<bar>");
|
||||
|
||||
expect(p.startIndex).toBe(3);
|
||||
expect(p.endIndex).toBe(7);
|
||||
});
|
||||
|
||||
test("should update the position when a single tag is spread across multiple chunks", () => {
|
||||
const p = new Parser(null);
|
||||
|
||||
p.write("<div ");
|
||||
p.write("foo=bar>");
|
||||
|
||||
expect(p.startIndex).toBe(0);
|
||||
expect(p.endIndex).toBe(12);
|
||||
});
|
||||
|
||||
test("should parse <__proto__>", () => {
|
||||
const p = new Parser(null);
|
||||
|
||||
// Should not throw (see #387)
|
||||
p.write("<__proto__>");
|
||||
});
|
||||
|
||||
test("should support custom tokenizer", () => {
|
||||
class CustomTokenizer extends Tokenizer {}
|
||||
|
||||
const p = new Parser(
|
||||
{
|
||||
onparserinit(parser: Parser) {
|
||||
expect(parser._tokenizer).toBeInstanceOf(CustomTokenizer);
|
||||
}
|
||||
},
|
||||
{ Tokenizer: CustomTokenizer }
|
||||
);
|
||||
p.done();
|
||||
});
|
||||
});
|
473
packages/fork-htmlparser2/src/Parser.ts
Normal file
473
packages/fork-htmlparser2/src/Parser.ts
Normal file
@ -0,0 +1,473 @@
|
||||
import Tokenizer from "./Tokenizer";
|
||||
import { EventEmitter } from "events";
|
||||
|
||||
const formTags = new Set([
|
||||
"input",
|
||||
"option",
|
||||
"optgroup",
|
||||
"select",
|
||||
"button",
|
||||
"datalist",
|
||||
"textarea"
|
||||
]);
|
||||
|
||||
const pTag = new Set(["p"]);
|
||||
|
||||
const openImpliesClose = {
|
||||
tr: new Set(["tr", "th", "td"]),
|
||||
th: new Set(["th"]),
|
||||
td: new Set(["thead", "th", "td"]),
|
||||
body: new Set(["head", "link", "script"]),
|
||||
li: new Set(["li"]),
|
||||
p: pTag,
|
||||
h1: pTag,
|
||||
h2: pTag,
|
||||
h3: pTag,
|
||||
h4: pTag,
|
||||
h5: pTag,
|
||||
h6: pTag,
|
||||
select: formTags,
|
||||
input: formTags,
|
||||
output: formTags,
|
||||
button: formTags,
|
||||
datalist: formTags,
|
||||
textarea: formTags,
|
||||
option: new Set(["option"]),
|
||||
optgroup: new Set(["optgroup", "option"]),
|
||||
dd: new Set(["dt", "dd"]),
|
||||
dt: new Set(["dt", "dd"]),
|
||||
address: pTag,
|
||||
article: pTag,
|
||||
aside: pTag,
|
||||
blockquote: pTag,
|
||||
details: pTag,
|
||||
div: pTag,
|
||||
dl: pTag,
|
||||
fieldset: pTag,
|
||||
figcaption: pTag,
|
||||
figure: pTag,
|
||||
footer: pTag,
|
||||
form: pTag,
|
||||
header: pTag,
|
||||
hr: pTag,
|
||||
main: pTag,
|
||||
nav: pTag,
|
||||
ol: pTag,
|
||||
pre: pTag,
|
||||
section: pTag,
|
||||
table: pTag,
|
||||
ul: pTag,
|
||||
rt: new Set(["rt", "rp"]),
|
||||
rp: new Set(["rt", "rp"]),
|
||||
tbody: new Set(["thead", "tbody"]),
|
||||
tfoot: new Set(["thead", "tbody"])
|
||||
};
|
||||
|
||||
const voidElements = new Set([
|
||||
"area",
|
||||
"base",
|
||||
"basefont",
|
||||
"br",
|
||||
"col",
|
||||
"command",
|
||||
"embed",
|
||||
"frame",
|
||||
"hr",
|
||||
"img",
|
||||
"input",
|
||||
"isindex",
|
||||
"keygen",
|
||||
"link",
|
||||
"meta",
|
||||
"param",
|
||||
"source",
|
||||
"track",
|
||||
"wbr"
|
||||
]);
|
||||
|
||||
const foreignContextElements = new Set(["math", "svg"]);
|
||||
|
||||
const htmlIntegrationElements = new Set([
|
||||
"mi",
|
||||
"mo",
|
||||
"mn",
|
||||
"ms",
|
||||
"mtext",
|
||||
"annotation-xml",
|
||||
"foreignObject",
|
||||
"desc",
|
||||
"title"
|
||||
]);
|
||||
|
||||
export interface ParserOptions {
|
||||
/***
|
||||
* Indicates whether special tags (<script> and <style>) should get special treatment
|
||||
* and if "empty" tags (eg. <br>) can have children. If `false`, the content of special tags
|
||||
* will be text only. For feeds and other XML content (documents that don't consist of HTML),
|
||||
* set this to `true`. Default: `false`.
|
||||
*/
|
||||
xmlMode?: boolean;
|
||||
|
||||
/***
|
||||
* If set to true, entities within the document will be decoded. Defaults to `false`.
|
||||
*/
|
||||
decodeEntities?: boolean;
|
||||
|
||||
/***
|
||||
* If set to true, all tags will be lowercased. If xmlMode is disabled, this defaults to `true`.
|
||||
*/
|
||||
lowerCaseTags?: boolean;
|
||||
|
||||
/***
|
||||
* If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed, so it defaults to `false`.
|
||||
*/
|
||||
lowerCaseAttributeNames?: boolean;
|
||||
|
||||
/***
|
||||
* If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled.
|
||||
* NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text.
|
||||
*/
|
||||
recognizeCDATA?: boolean;
|
||||
|
||||
/***
|
||||
* If set to `true`, self-closing tags will trigger the onclosetag event even if xmlMode is not set to `true`.
|
||||
* NOTE: If xmlMode is set to `true` then self-closing tags will always be recognized.
|
||||
*/
|
||||
recognizeSelfClosing?: boolean;
|
||||
|
||||
/**
|
||||
* Allows the default tokenizer to be overwritten.
|
||||
*/
|
||||
Tokenizer?: typeof Tokenizer;
|
||||
}
|
||||
|
||||
export interface Handler {
|
||||
onparserinit(parser: Parser): void;
|
||||
|
||||
/***
|
||||
* Resets the handler back to starting state
|
||||
*/
|
||||
onreset(): void;
|
||||
|
||||
/***
|
||||
* Signals the handler that parsing is done
|
||||
*/
|
||||
onend(): void;
|
||||
onerror(error: Error): void;
|
||||
onclosetag(name: string): void;
|
||||
onopentagname(name: string): void;
|
||||
onattribute(name: string, value: string): void;
|
||||
onopentag(name: string, attribs: { [s: string]: string }): void;
|
||||
ontext(data: string): void;
|
||||
oncomment(data: string): void;
|
||||
oncdatastart(): void;
|
||||
oncdataend(): void;
|
||||
oncommentend(): void;
|
||||
onprocessinginstruction(name: string, data: string): void;
|
||||
}
|
||||
|
||||
const reNameEnd = /\s|\//;
|
||||
|
||||
export class Parser extends EventEmitter {
|
||||
_tagname = "";
|
||||
_attribname = "";
|
||||
_attribvalue = "";
|
||||
_attribs: null | { [key: string]: string } = null;
|
||||
_stack: string[] = [];
|
||||
_foreignContext: boolean[] = [];
|
||||
startIndex = 0;
|
||||
endIndex: number | null = null;
|
||||
_cbs: Partial<Handler>;
|
||||
_options: ParserOptions;
|
||||
_lowerCaseTagNames: boolean;
|
||||
_lowerCaseAttributeNames: boolean;
|
||||
_tokenizer: Tokenizer;
|
||||
|
||||
constructor(cbs: Partial<Handler> | null, options?: ParserOptions) {
|
||||
super();
|
||||
|
||||
this._options = options || {};
|
||||
this._cbs = cbs || {};
|
||||
this._tagname = "";
|
||||
this._attribname = "";
|
||||
this._attribvalue = "";
|
||||
this._attribs = null;
|
||||
this._stack = [];
|
||||
this._foreignContext = [];
|
||||
this.startIndex = 0;
|
||||
this.endIndex = null;
|
||||
this._lowerCaseTagNames =
|
||||
"lowerCaseTags" in this._options
|
||||
? !!this._options.lowerCaseTags
|
||||
: !this._options.xmlMode;
|
||||
this._lowerCaseAttributeNames =
|
||||
"lowerCaseAttributeNames" in this._options
|
||||
? !!this._options.lowerCaseAttributeNames
|
||||
: !this._options.xmlMode;
|
||||
this._tokenizer = new (this._options.Tokenizer || Tokenizer)(
|
||||
this._options,
|
||||
this
|
||||
);
|
||||
if (this._cbs.onparserinit) this._cbs.onparserinit(this);
|
||||
}
|
||||
|
||||
_updatePosition(initialOffset: number) {
|
||||
if (this.endIndex === null) {
|
||||
if (this._tokenizer._sectionStart <= initialOffset) {
|
||||
this.startIndex = 0;
|
||||
} else {
|
||||
this.startIndex = this._tokenizer._sectionStart - initialOffset;
|
||||
}
|
||||
} else this.startIndex = this.endIndex + 1;
|
||||
this.endIndex = this._tokenizer.getAbsoluteIndex();
|
||||
}
|
||||
|
||||
//Tokenizer event handlers
|
||||
ontext(data: string) {
|
||||
this._updatePosition(1);
|
||||
// @ts-ignore
|
||||
this.endIndex--;
|
||||
if (this._cbs.ontext) this._cbs.ontext(data);
|
||||
}
|
||||
|
||||
onopentagname(name: string) {
|
||||
if (this._lowerCaseTagNames) {
|
||||
name = name.toLowerCase();
|
||||
}
|
||||
this._tagname = name;
|
||||
if (
|
||||
!this._options.xmlMode &&
|
||||
Object.prototype.hasOwnProperty.call(openImpliesClose, name)
|
||||
) {
|
||||
for (
|
||||
let el;
|
||||
// @ts-ignore
|
||||
openImpliesClose[name].has(
|
||||
(el = this._stack[this._stack.length - 1])
|
||||
);
|
||||
this.onclosetag(el)
|
||||
);
|
||||
}
|
||||
if (this._options.xmlMode || !voidElements.has(name)) {
|
||||
this._stack.push(name);
|
||||
if (foreignContextElements.has(name)) {
|
||||
this._foreignContext.push(true);
|
||||
} else if (htmlIntegrationElements.has(name)) {
|
||||
this._foreignContext.push(false);
|
||||
}
|
||||
}
|
||||
if (this._cbs.onopentagname) this._cbs.onopentagname(name);
|
||||
if (this._cbs.onopentag) this._attribs = {};
|
||||
}
|
||||
|
||||
onopentagend() {
|
||||
this._updatePosition(1);
|
||||
if (this._attribs) {
|
||||
if (this._cbs.onopentag) {
|
||||
this._cbs.onopentag(this._tagname, this._attribs);
|
||||
}
|
||||
this._attribs = null;
|
||||
}
|
||||
if (
|
||||
!this._options.xmlMode &&
|
||||
this._cbs.onclosetag &&
|
||||
voidElements.has(this._tagname)
|
||||
) {
|
||||
this._cbs.onclosetag(this._tagname);
|
||||
}
|
||||
this._tagname = "";
|
||||
}
|
||||
|
||||
onclosetag(name: string) {
|
||||
// When this is true, the onclosetag event will always be emitted
|
||||
// for closing tags (eg </div>) even if that tag was not previously
|
||||
// open. This is needed because we reconstruct the HTML based on
|
||||
// fragments that don't necessarily contain the opening tag.
|
||||
// Without this patch, onopentagname would not be emitted, and
|
||||
// so the closing tag would disappear from the output.
|
||||
let alwaysClose = true;
|
||||
|
||||
this._updatePosition(1);
|
||||
if (this._lowerCaseTagNames) {
|
||||
name = name.toLowerCase();
|
||||
}
|
||||
if (
|
||||
foreignContextElements.has(name) ||
|
||||
htmlIntegrationElements.has(name)
|
||||
) {
|
||||
this._foreignContext.pop();
|
||||
}
|
||||
if (
|
||||
this._stack.length &&
|
||||
(this._options.xmlMode || !voidElements.has(name))
|
||||
) {
|
||||
let pos = this._stack.lastIndexOf(name);
|
||||
if (pos !== -1) {
|
||||
if (this._cbs.onclosetag) {
|
||||
pos = this._stack.length - pos;
|
||||
// @ts-ignore
|
||||
while (pos--) this._cbs.onclosetag(this._stack.pop());
|
||||
} else this._stack.length = pos;
|
||||
} else if (name === "p" && !this._options.xmlMode) {
|
||||
this.onopentagname(name);
|
||||
this._closeCurrentTag();
|
||||
} else if (!this._stack.length && alwaysClose) {
|
||||
if (this._cbs.onclosetag) this._cbs.onclosetag(name);
|
||||
}
|
||||
} else if (!this._options.xmlMode && (name === "br" || name === "p")) {
|
||||
this.onopentagname(name);
|
||||
this._closeCurrentTag();
|
||||
} else if (!this._stack.length && alwaysClose && this._cbs.onclosetag) {
|
||||
this._cbs.onclosetag(name);
|
||||
}
|
||||
}
|
||||
|
||||
onselfclosingtag() {
|
||||
if (
|
||||
this._options.xmlMode ||
|
||||
this._options.recognizeSelfClosing ||
|
||||
this._foreignContext[this._foreignContext.length - 1]
|
||||
) {
|
||||
this._closeCurrentTag();
|
||||
} else {
|
||||
this.onopentagend();
|
||||
}
|
||||
}
|
||||
|
||||
_closeCurrentTag() {
|
||||
const name = this._tagname;
|
||||
this.onopentagend();
|
||||
//self-closing tags will be on the top of the stack
|
||||
//(cheaper check than in onclosetag)
|
||||
if (this._stack[this._stack.length - 1] === name) {
|
||||
if (this._cbs.onclosetag) {
|
||||
this._cbs.onclosetag(name);
|
||||
}
|
||||
this._stack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
onattribname(name: string) {
|
||||
if (this._lowerCaseAttributeNames) {
|
||||
name = name.toLowerCase();
|
||||
}
|
||||
this._attribname = name;
|
||||
}
|
||||
|
||||
onattribdata(value: string) {
|
||||
this._attribvalue += value;
|
||||
}
|
||||
|
||||
onattribend() {
|
||||
if (this._cbs.onattribute)
|
||||
this._cbs.onattribute(this._attribname, this._attribvalue);
|
||||
if (
|
||||
this._attribs &&
|
||||
!Object.prototype.hasOwnProperty.call(
|
||||
this._attribs,
|
||||
this._attribname
|
||||
)
|
||||
) {
|
||||
this._attribs[this._attribname] = this._attribvalue;
|
||||
}
|
||||
this._attribname = "";
|
||||
this._attribvalue = "";
|
||||
}
|
||||
|
||||
_getInstructionName(value: string) {
|
||||
const idx = value.search(reNameEnd);
|
||||
let name = idx < 0 ? value : value.substr(0, idx);
|
||||
|
||||
if (this._lowerCaseTagNames) {
|
||||
name = name.toLowerCase();
|
||||
}
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
ondeclaration(value: string) {
|
||||
if (this._cbs.onprocessinginstruction) {
|
||||
const name = this._getInstructionName(value);
|
||||
this._cbs.onprocessinginstruction(`!${name}`, `!${value}`);
|
||||
}
|
||||
}
|
||||
|
||||
onprocessinginstruction(value: string) {
|
||||
if (this._cbs.onprocessinginstruction) {
|
||||
const name = this._getInstructionName(value);
|
||||
this._cbs.onprocessinginstruction(`?${name}`, `?${value}`);
|
||||
}
|
||||
}
|
||||
|
||||
oncomment(value: string) {
|
||||
this._updatePosition(4);
|
||||
if (this._cbs.oncomment) this._cbs.oncomment(value);
|
||||
if (this._cbs.oncommentend) this._cbs.oncommentend();
|
||||
}
|
||||
|
||||
oncdata(value: string) {
|
||||
this._updatePosition(1);
|
||||
if (this._options.xmlMode || this._options.recognizeCDATA) {
|
||||
if (this._cbs.oncdatastart) this._cbs.oncdatastart();
|
||||
if (this._cbs.ontext) this._cbs.ontext(value);
|
||||
if (this._cbs.oncdataend) this._cbs.oncdataend();
|
||||
} else {
|
||||
this.oncomment(`[CDATA[${value}]]`);
|
||||
}
|
||||
}
|
||||
|
||||
onerror(err: Error) {
|
||||
if (this._cbs.onerror) this._cbs.onerror(err);
|
||||
}
|
||||
|
||||
onend() {
|
||||
if (this._cbs.onclosetag) {
|
||||
// Prevent the parser from auto-closing tags. Since we deal with fragments that
|
||||
// maybe contain the opening tag but not the closing one, we don't want that
|
||||
// closing tag to be auto-added.
|
||||
// for (
|
||||
// let i = this._stack.length;
|
||||
// i > 0;
|
||||
// this._cbs.onclosetag(this._stack[--i])
|
||||
// );
|
||||
}
|
||||
if (this._cbs.onend) this._cbs.onend();
|
||||
}
|
||||
|
||||
//Resets the parser to a blank state, ready to parse a new HTML document
|
||||
reset() {
|
||||
if (this._cbs.onreset) this._cbs.onreset();
|
||||
this._tokenizer.reset();
|
||||
this._tagname = "";
|
||||
this._attribname = "";
|
||||
this._attribs = null;
|
||||
this._stack = [];
|
||||
if (this._cbs.onparserinit) this._cbs.onparserinit(this);
|
||||
}
|
||||
|
||||
//Parses a complete HTML document and pushes it to the handler
|
||||
parseComplete(data: string) {
|
||||
this.reset();
|
||||
this.end(data);
|
||||
}
|
||||
|
||||
write(chunk: string) {
|
||||
this._tokenizer.write(chunk);
|
||||
}
|
||||
|
||||
end(chunk?: string) {
|
||||
this._tokenizer.end(chunk);
|
||||
}
|
||||
|
||||
pause() {
|
||||
this._tokenizer.pause();
|
||||
}
|
||||
|
||||
resume() {
|
||||
this._tokenizer.resume();
|
||||
}
|
||||
|
||||
// Aliases for backwards compatibility
|
||||
parseChunk = Parser.prototype.write;
|
||||
done = Parser.prototype.end;
|
||||
}
|
906
packages/fork-htmlparser2/src/Tokenizer.ts
Normal file
906
packages/fork-htmlparser2/src/Tokenizer.ts
Normal file
@ -0,0 +1,906 @@
|
||||
import decodeCodePoint from "entities/lib/decode_codepoint";
|
||||
import entityMap from "entities/lib/maps/entities.json";
|
||||
import legacyMap from "entities/lib/maps/legacy.json";
|
||||
import xmlMap from "entities/lib/maps/xml.json";
|
||||
|
||||
/** All the states the tokenizer can be in. */
|
||||
const enum State {
|
||||
Text = 1,
|
||||
BeforeTagName, //after <
|
||||
InTagName,
|
||||
InSelfClosingTag,
|
||||
BeforeClosingTagName,
|
||||
InClosingTagName,
|
||||
AfterClosingTagName,
|
||||
|
||||
//attributes
|
||||
BeforeAttributeName,
|
||||
InAttributeName,
|
||||
AfterAttributeName,
|
||||
BeforeAttributeValue,
|
||||
InAttributeValueDq, // "
|
||||
InAttributeValueSq, // '
|
||||
InAttributeValueNq,
|
||||
|
||||
//declarations
|
||||
BeforeDeclaration, // !
|
||||
InDeclaration,
|
||||
|
||||
//processing instructions
|
||||
InProcessingInstruction, // ?
|
||||
|
||||
//comments
|
||||
BeforeComment,
|
||||
InComment,
|
||||
AfterComment1,
|
||||
AfterComment2,
|
||||
|
||||
//cdata
|
||||
BeforeCdata1, // [
|
||||
BeforeCdata2, // C
|
||||
BeforeCdata3, // D
|
||||
BeforeCdata4, // A
|
||||
BeforeCdata5, // T
|
||||
BeforeCdata6, // A
|
||||
InCdata, // [
|
||||
AfterCdata1, // ]
|
||||
AfterCdata2, // ]
|
||||
|
||||
//special tags
|
||||
BeforeSpecial, //S
|
||||
BeforeSpecialEnd, //S
|
||||
|
||||
BeforeScript1, //C
|
||||
BeforeScript2, //R
|
||||
BeforeScript3, //I
|
||||
BeforeScript4, //P
|
||||
BeforeScript5, //T
|
||||
AfterScript1, //C
|
||||
AfterScript2, //R
|
||||
AfterScript3, //I
|
||||
AfterScript4, //P
|
||||
AfterScript5, //T
|
||||
|
||||
BeforeStyle1, //T
|
||||
BeforeStyle2, //Y
|
||||
BeforeStyle3, //L
|
||||
BeforeStyle4, //E
|
||||
AfterStyle1, //T
|
||||
AfterStyle2, //Y
|
||||
AfterStyle3, //L
|
||||
AfterStyle4, //E
|
||||
|
||||
BeforeEntity, //&
|
||||
BeforeNumericEntity, //#
|
||||
InNamedEntity,
|
||||
InNumericEntity,
|
||||
InHexEntity //X
|
||||
}
|
||||
|
||||
const enum Special {
|
||||
None = 1,
|
||||
Script,
|
||||
Style
|
||||
}
|
||||
|
||||
function whitespace(c: string): boolean {
|
||||
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
|
||||
}
|
||||
|
||||
interface Callbacks {
|
||||
onattribdata(value: string): void; //TODO implement the new event
|
||||
onattribend(): void;
|
||||
onattribname(name: string): void;
|
||||
oncdata(data: string): void;
|
||||
onclosetag(name: string): void;
|
||||
oncomment(data: string): void;
|
||||
ondeclaration(content: string): void;
|
||||
onend(): void;
|
||||
onerror(error: Error, state?: State): void;
|
||||
onopentagend(): void;
|
||||
onopentagname(name: string): void;
|
||||
onprocessinginstruction(instruction: string): void;
|
||||
onselfclosingtag(): void;
|
||||
ontext(value: string): void;
|
||||
}
|
||||
|
||||
function ifElseState(upper: string, SUCCESS: State, FAILURE: State) {
|
||||
const lower = upper.toLowerCase();
|
||||
|
||||
if (upper === lower) {
|
||||
return (t: Tokenizer, c: string) => {
|
||||
if (c === lower) {
|
||||
t._state = SUCCESS;
|
||||
} else {
|
||||
t._state = FAILURE;
|
||||
t._index--;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return (t: Tokenizer, c: string) => {
|
||||
if (c === lower || c === upper) {
|
||||
t._state = SUCCESS;
|
||||
} else {
|
||||
t._state = FAILURE;
|
||||
t._index--;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function consumeSpecialNameChar(upper: string, NEXT_STATE: State) {
|
||||
const lower = upper.toLowerCase();
|
||||
|
||||
return (t: Tokenizer, c: string) => {
|
||||
if (c === lower || c === upper) {
|
||||
t._state = NEXT_STATE;
|
||||
} else {
|
||||
t._state = State.InTagName;
|
||||
t._index--; //consume the token again
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const stateBeforeCdata1 = ifElseState(
|
||||
"C",
|
||||
State.BeforeCdata2,
|
||||
State.InDeclaration
|
||||
);
|
||||
const stateBeforeCdata2 = ifElseState(
|
||||
"D",
|
||||
State.BeforeCdata3,
|
||||
State.InDeclaration
|
||||
);
|
||||
const stateBeforeCdata3 = ifElseState(
|
||||
"A",
|
||||
State.BeforeCdata4,
|
||||
State.InDeclaration
|
||||
);
|
||||
const stateBeforeCdata4 = ifElseState(
|
||||
"T",
|
||||
State.BeforeCdata5,
|
||||
State.InDeclaration
|
||||
);
|
||||
const stateBeforeCdata5 = ifElseState(
|
||||
"A",
|
||||
State.BeforeCdata6,
|
||||
State.InDeclaration
|
||||
);
|
||||
|
||||
const stateBeforeScript1 = consumeSpecialNameChar("R", State.BeforeScript2);
|
||||
const stateBeforeScript2 = consumeSpecialNameChar("I", State.BeforeScript3);
|
||||
const stateBeforeScript3 = consumeSpecialNameChar("P", State.BeforeScript4);
|
||||
const stateBeforeScript4 = consumeSpecialNameChar("T", State.BeforeScript5);
|
||||
|
||||
const stateAfterScript1 = ifElseState("R", State.AfterScript2, State.Text);
|
||||
const stateAfterScript2 = ifElseState("I", State.AfterScript3, State.Text);
|
||||
const stateAfterScript3 = ifElseState("P", State.AfterScript4, State.Text);
|
||||
const stateAfterScript4 = ifElseState("T", State.AfterScript5, State.Text);
|
||||
|
||||
const stateBeforeStyle1 = consumeSpecialNameChar("Y", State.BeforeStyle2);
|
||||
const stateBeforeStyle2 = consumeSpecialNameChar("L", State.BeforeStyle3);
|
||||
const stateBeforeStyle3 = consumeSpecialNameChar("E", State.BeforeStyle4);
|
||||
|
||||
const stateAfterStyle1 = ifElseState("Y", State.AfterStyle2, State.Text);
|
||||
const stateAfterStyle2 = ifElseState("L", State.AfterStyle3, State.Text);
|
||||
const stateAfterStyle3 = ifElseState("E", State.AfterStyle4, State.Text);
|
||||
|
||||
const stateBeforeEntity = ifElseState(
|
||||
"#",
|
||||
State.BeforeNumericEntity,
|
||||
State.InNamedEntity
|
||||
);
|
||||
const stateBeforeNumericEntity = ifElseState(
|
||||
"X",
|
||||
State.InHexEntity,
|
||||
State.InNumericEntity
|
||||
);
|
||||
|
||||
export default class Tokenizer {
|
||||
/** The current state the tokenizer is in. */
|
||||
_state = State.Text;
|
||||
/** The read buffer. */
|
||||
_buffer = "";
|
||||
/** The beginning of the section that is currently being read. */
|
||||
_sectionStart = 0;
|
||||
/** The index within the buffer that we are currently looking at. */
|
||||
_index = 0;
|
||||
/**
|
||||
* Data that has already been processed will be removed from the buffer occasionally.
|
||||
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
|
||||
*/
|
||||
_bufferOffset = 0;
|
||||
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
|
||||
_baseState = State.Text;
|
||||
/** For special parsing behavior inside of script and style tags. */
|
||||
_special = Special.None;
|
||||
/** Indicates whether the tokenizer has been paused. */
|
||||
_running = true;
|
||||
/** Indicates whether the tokenizer has finished running / `.end` has been called. */
|
||||
_ended = false;
|
||||
|
||||
_cbs: Callbacks;
|
||||
_xmlMode: boolean;
|
||||
_decodeEntities: boolean;
|
||||
|
||||
constructor(
|
||||
options: { xmlMode?: boolean; decodeEntities?: boolean } | null,
|
||||
cbs: Callbacks
|
||||
) {
|
||||
this._cbs = cbs;
|
||||
this._xmlMode = !!(options && options.xmlMode);
|
||||
this._decodeEntities = !!(options && options.decodeEntities);
|
||||
}
|
||||
|
||||
reset() {
|
||||
this._state = State.Text;
|
||||
this._buffer = "";
|
||||
this._sectionStart = 0;
|
||||
this._index = 0;
|
||||
this._bufferOffset = 0;
|
||||
this._baseState = State.Text;
|
||||
this._special = Special.None;
|
||||
this._running = true;
|
||||
this._ended = false;
|
||||
}
|
||||
|
||||
_stateText(c: string) {
|
||||
if (c === "<") {
|
||||
if (this._index > this._sectionStart) {
|
||||
this._cbs.ontext(this._getSection());
|
||||
}
|
||||
this._state = State.BeforeTagName;
|
||||
this._sectionStart = this._index;
|
||||
} else if (
|
||||
this._decodeEntities &&
|
||||
this._special === Special.None &&
|
||||
c === "&"
|
||||
) {
|
||||
if (this._index > this._sectionStart) {
|
||||
this._cbs.ontext(this._getSection());
|
||||
}
|
||||
this._baseState = State.Text;
|
||||
this._state = State.BeforeEntity;
|
||||
this._sectionStart = this._index;
|
||||
}
|
||||
}
|
||||
_stateBeforeTagName(c: string) {
|
||||
if (c === "/") {
|
||||
this._state = State.BeforeClosingTagName;
|
||||
} else if (c === "<") {
|
||||
this._cbs.ontext(this._getSection());
|
||||
this._sectionStart = this._index;
|
||||
} else if (
|
||||
c === ">" ||
|
||||
this._special !== Special.None ||
|
||||
whitespace(c)
|
||||
) {
|
||||
this._state = State.Text;
|
||||
} else if (c === "!") {
|
||||
this._state = State.BeforeDeclaration;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else if (c === "?") {
|
||||
this._state = State.InProcessingInstruction;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else {
|
||||
this._state =
|
||||
!this._xmlMode && (c === "s" || c === "S")
|
||||
? State.BeforeSpecial
|
||||
: State.InTagName;
|
||||
this._sectionStart = this._index;
|
||||
}
|
||||
}
|
||||
_stateInTagName(c: string) {
|
||||
if (c === "/" || c === ">" || whitespace(c)) {
|
||||
this._emitToken("onopentagname");
|
||||
this._state = State.BeforeAttributeName;
|
||||
this._index--;
|
||||
}
|
||||
}
|
||||
_stateBeforeClosingTagName(c: string) {
|
||||
if (whitespace(c)) {
|
||||
// ignore
|
||||
} else if (c === ">") {
|
||||
this._state = State.Text;
|
||||
} else if (this._special !== Special.None) {
|
||||
if (c === "s" || c === "S") {
|
||||
this._state = State.BeforeSpecialEnd;
|
||||
} else {
|
||||
this._state = State.Text;
|
||||
this._index--;
|
||||
}
|
||||
} else {
|
||||
this._state = State.InClosingTagName;
|
||||
this._sectionStart = this._index;
|
||||
}
|
||||
}
|
||||
_stateInClosingTagName(c: string) {
|
||||
if (c === ">" || whitespace(c)) {
|
||||
this._emitToken("onclosetag");
|
||||
this._state = State.AfterClosingTagName;
|
||||
this._index--;
|
||||
}
|
||||
}
|
||||
_stateAfterClosingTagName(c: string) {
|
||||
//skip everything until ">"
|
||||
if (c === ">") {
|
||||
this._state = State.Text;
|
||||
this._sectionStart = this._index + 1;
|
||||
}
|
||||
}
|
||||
_stateBeforeAttributeName(c: string) {
|
||||
if (c === ">") {
|
||||
this._cbs.onopentagend();
|
||||
this._state = State.Text;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else if (c === "/") {
|
||||
this._state = State.InSelfClosingTag;
|
||||
} else if (!whitespace(c)) {
|
||||
this._state = State.InAttributeName;
|
||||
this._sectionStart = this._index;
|
||||
}
|
||||
}
|
||||
_stateInSelfClosingTag(c: string) {
|
||||
if (c === ">") {
|
||||
this._cbs.onselfclosingtag();
|
||||
this._state = State.Text;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else if (!whitespace(c)) {
|
||||
this._state = State.BeforeAttributeName;
|
||||
this._index--;
|
||||
}
|
||||
}
|
||||
_stateInAttributeName(c: string) {
|
||||
if (c === "=" || c === "/" || c === ">" || whitespace(c)) {
|
||||
this._cbs.onattribname(this._getSection());
|
||||
this._sectionStart = -1;
|
||||
this._state = State.AfterAttributeName;
|
||||
this._index--;
|
||||
}
|
||||
}
|
||||
_stateAfterAttributeName(c: string) {
|
||||
if (c === "=") {
|
||||
this._state = State.BeforeAttributeValue;
|
||||
} else if (c === "/" || c === ">") {
|
||||
this._cbs.onattribend();
|
||||
this._state = State.BeforeAttributeName;
|
||||
this._index--;
|
||||
} else if (!whitespace(c)) {
|
||||
this._cbs.onattribend();
|
||||
this._state = State.InAttributeName;
|
||||
this._sectionStart = this._index;
|
||||
}
|
||||
}
|
||||
_stateBeforeAttributeValue(c: string) {
|
||||
if (c === '"') {
|
||||
this._state = State.InAttributeValueDq;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else if (c === "'") {
|
||||
this._state = State.InAttributeValueSq;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else if (!whitespace(c)) {
|
||||
this._state = State.InAttributeValueNq;
|
||||
this._sectionStart = this._index;
|
||||
this._index--; //reconsume token
|
||||
}
|
||||
}
|
||||
_stateInAttributeValueDoubleQuotes(c: string) {
|
||||
if (c === '"') {
|
||||
this._emitToken("onattribdata");
|
||||
this._cbs.onattribend();
|
||||
this._state = State.BeforeAttributeName;
|
||||
} else if (this._decodeEntities && c === "&") {
|
||||
this._emitToken("onattribdata");
|
||||
this._baseState = this._state;
|
||||
this._state = State.BeforeEntity;
|
||||
this._sectionStart = this._index;
|
||||
}
|
||||
}
|
||||
_stateInAttributeValueSingleQuotes(c: string) {
|
||||
if (c === "'") {
|
||||
this._emitToken("onattribdata");
|
||||
this._cbs.onattribend();
|
||||
this._state = State.BeforeAttributeName;
|
||||
} else if (this._decodeEntities && c === "&") {
|
||||
this._emitToken("onattribdata");
|
||||
this._baseState = this._state;
|
||||
this._state = State.BeforeEntity;
|
||||
this._sectionStart = this._index;
|
||||
}
|
||||
}
|
||||
_stateInAttributeValueNoQuotes(c: string) {
|
||||
if (whitespace(c) || c === ">") {
|
||||
this._emitToken("onattribdata");
|
||||
this._cbs.onattribend();
|
||||
this._state = State.BeforeAttributeName;
|
||||
this._index--;
|
||||
} else if (this._decodeEntities && c === "&") {
|
||||
this._emitToken("onattribdata");
|
||||
this._baseState = this._state;
|
||||
this._state = State.BeforeEntity;
|
||||
this._sectionStart = this._index;
|
||||
}
|
||||
}
|
||||
_stateBeforeDeclaration(c: string) {
|
||||
this._state =
|
||||
c === "["
|
||||
? State.BeforeCdata1
|
||||
: c === "-"
|
||||
? State.BeforeComment
|
||||
: State.InDeclaration;
|
||||
}
|
||||
_stateInDeclaration(c: string) {
|
||||
if (c === ">") {
|
||||
this._cbs.ondeclaration(this._getSection());
|
||||
this._state = State.Text;
|
||||
this._sectionStart = this._index + 1;
|
||||
}
|
||||
}
|
||||
_stateInProcessingInstruction(c: string) {
|
||||
if (c === ">") {
|
||||
this._cbs.onprocessinginstruction(this._getSection());
|
||||
this._state = State.Text;
|
||||
this._sectionStart = this._index + 1;
|
||||
}
|
||||
}
|
||||
_stateBeforeComment(c: string) {
|
||||
if (c === "-") {
|
||||
this._state = State.InComment;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else {
|
||||
this._state = State.InDeclaration;
|
||||
}
|
||||
}
|
||||
_stateInComment(c: string) {
|
||||
if (c === "-") this._state = State.AfterComment1;
|
||||
}
|
||||
_stateAfterComment1(c: string) {
|
||||
if (c === "-") {
|
||||
this._state = State.AfterComment2;
|
||||
} else {
|
||||
this._state = State.InComment;
|
||||
}
|
||||
}
|
||||
_stateAfterComment2(c: string) {
|
||||
if (c === ">") {
|
||||
//remove 2 trailing chars
|
||||
this._cbs.oncomment(
|
||||
this._buffer.substring(this._sectionStart, this._index - 2)
|
||||
);
|
||||
this._state = State.Text;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else if (c !== "-") {
|
||||
this._state = State.InComment;
|
||||
}
|
||||
// else: stay in AFTER_COMMENT_2 (`--->`)
|
||||
}
|
||||
_stateBeforeCdata6(c: string) {
|
||||
if (c === "[") {
|
||||
this._state = State.InCdata;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else {
|
||||
this._state = State.InDeclaration;
|
||||
this._index--;
|
||||
}
|
||||
}
|
||||
_stateInCdata(c: string) {
|
||||
if (c === "]") this._state = State.AfterCdata1;
|
||||
}
|
||||
_stateAfterCdata1(c: string) {
|
||||
if (c === "]") this._state = State.AfterCdata2;
|
||||
else this._state = State.InCdata;
|
||||
}
|
||||
_stateAfterCdata2(c: string) {
|
||||
if (c === ">") {
|
||||
//remove 2 trailing chars
|
||||
this._cbs.oncdata(
|
||||
this._buffer.substring(this._sectionStart, this._index - 2)
|
||||
);
|
||||
this._state = State.Text;
|
||||
this._sectionStart = this._index + 1;
|
||||
} else if (c !== "]") {
|
||||
this._state = State.InCdata;
|
||||
}
|
||||
//else: stay in AFTER_CDATA_2 (`]]]>`)
|
||||
}
|
||||
_stateBeforeSpecial(c: string) {
|
||||
if (c === "c" || c === "C") {
|
||||
this._state = State.BeforeScript1;
|
||||
} else if (c === "t" || c === "T") {
|
||||
this._state = State.BeforeStyle1;
|
||||
} else {
|
||||
this._state = State.InTagName;
|
||||
this._index--; //consume the token again
|
||||
}
|
||||
}
|
||||
_stateBeforeSpecialEnd(c: string) {
|
||||
if (this._special === Special.Script && (c === "c" || c === "C")) {
|
||||
this._state = State.AfterScript1;
|
||||
} else if (
|
||||
this._special === Special.Style &&
|
||||
(c === "t" || c === "T")
|
||||
) {
|
||||
this._state = State.AfterStyle1;
|
||||
} else this._state = State.Text;
|
||||
}
|
||||
_stateBeforeScript5(c: string) {
|
||||
if (c === "/" || c === ">" || whitespace(c)) {
|
||||
this._special = Special.Script;
|
||||
}
|
||||
this._state = State.InTagName;
|
||||
this._index--; //consume the token again
|
||||
}
|
||||
_stateAfterScript5(c: string) {
|
||||
if (c === ">" || whitespace(c)) {
|
||||
this._special = Special.None;
|
||||
this._state = State.InClosingTagName;
|
||||
this._sectionStart = this._index - 6;
|
||||
this._index--; //reconsume the token
|
||||
} else this._state = State.Text;
|
||||
}
|
||||
_stateBeforeStyle4(c: string) {
|
||||
if (c === "/" || c === ">" || whitespace(c)) {
|
||||
this._special = Special.Style;
|
||||
}
|
||||
this._state = State.InTagName;
|
||||
this._index--; //consume the token again
|
||||
}
|
||||
_stateAfterStyle4(c: string) {
|
||||
if (c === ">" || whitespace(c)) {
|
||||
this._special = Special.None;
|
||||
this._state = State.InClosingTagName;
|
||||
this._sectionStart = this._index - 5;
|
||||
this._index--; //reconsume the token
|
||||
} else this._state = State.Text;
|
||||
}
|
||||
//for entities terminated with a semicolon
|
||||
_parseNamedEntityStrict() {
|
||||
//offset = 1
|
||||
if (this._sectionStart + 1 < this._index) {
|
||||
const entity = this._buffer.substring(
|
||||
this._sectionStart + 1,
|
||||
this._index
|
||||
),
|
||||
map = this._xmlMode ? xmlMap : entityMap;
|
||||
if (Object.prototype.hasOwnProperty.call(map, entity)) {
|
||||
// @ts-ignore
|
||||
this._emitPartial(map[entity]);
|
||||
this._sectionStart = this._index + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
//parses legacy entities (without trailing semicolon)
|
||||
_parseLegacyEntity() {
|
||||
const start = this._sectionStart + 1;
|
||||
let limit = this._index - start;
|
||||
if (limit > 6) limit = 6; // The max length of legacy entities is 6
|
||||
while (limit >= 2) {
|
||||
// The min length of legacy entities is 2
|
||||
const entity = this._buffer.substr(start, limit);
|
||||
if (Object.prototype.hasOwnProperty.call(legacyMap, entity)) {
|
||||
// @ts-ignore
|
||||
this._emitPartial(legacyMap[entity]);
|
||||
this._sectionStart += limit + 1;
|
||||
return;
|
||||
} else {
|
||||
limit--;
|
||||
}
|
||||
}
|
||||
}
|
||||
_stateInNamedEntity(c: string) {
|
||||
if (c === ";") {
|
||||
this._parseNamedEntityStrict();
|
||||
if (this._sectionStart + 1 < this._index && !this._xmlMode) {
|
||||
this._parseLegacyEntity();
|
||||
}
|
||||
this._state = this._baseState;
|
||||
} else if (
|
||||
(c < "a" || c > "z") &&
|
||||
(c < "A" || c > "Z") &&
|
||||
(c < "0" || c > "9")
|
||||
) {
|
||||
if (this._xmlMode || this._sectionStart + 1 === this._index) {
|
||||
// ignore
|
||||
} else if (this._baseState !== State.Text) {
|
||||
if (c !== "=") {
|
||||
this._parseNamedEntityStrict();
|
||||
}
|
||||
} else {
|
||||
this._parseLegacyEntity();
|
||||
}
|
||||
this._state = this._baseState;
|
||||
this._index--;
|
||||
}
|
||||
}
|
||||
_decodeNumericEntity(offset: number, base: number) {
|
||||
const sectionStart = this._sectionStart + offset;
|
||||
if (sectionStart !== this._index) {
|
||||
//parse entity
|
||||
const entity = this._buffer.substring(sectionStart, this._index);
|
||||
const parsed = parseInt(entity, base);
|
||||
this._emitPartial(decodeCodePoint(parsed));
|
||||
this._sectionStart = this._index;
|
||||
} else {
|
||||
this._sectionStart--;
|
||||
}
|
||||
this._state = this._baseState;
|
||||
}
|
||||
_stateInNumericEntity(c: string) {
|
||||
if (c === ";") {
|
||||
this._decodeNumericEntity(2, 10);
|
||||
this._sectionStart++;
|
||||
} else if (c < "0" || c > "9") {
|
||||
if (!this._xmlMode) {
|
||||
this._decodeNumericEntity(2, 10);
|
||||
} else {
|
||||
this._state = this._baseState;
|
||||
}
|
||||
this._index--;
|
||||
}
|
||||
}
|
||||
_stateInHexEntity(c: string) {
|
||||
if (c === ";") {
|
||||
this._decodeNumericEntity(3, 16);
|
||||
this._sectionStart++;
|
||||
} else if (
|
||||
(c < "a" || c > "f") &&
|
||||
(c < "A" || c > "F") &&
|
||||
(c < "0" || c > "9")
|
||||
) {
|
||||
if (!this._xmlMode) {
|
||||
this._decodeNumericEntity(3, 16);
|
||||
} else {
|
||||
this._state = this._baseState;
|
||||
}
|
||||
this._index--;
|
||||
}
|
||||
}
|
||||
|
||||
_cleanup() {
|
||||
if (this._sectionStart < 0) {
|
||||
this._buffer = "";
|
||||
this._bufferOffset += this._index;
|
||||
this._index = 0;
|
||||
} else if (this._running) {
|
||||
if (this._state === State.Text) {
|
||||
if (this._sectionStart !== this._index) {
|
||||
this._cbs.ontext(this._buffer.substr(this._sectionStart));
|
||||
}
|
||||
this._buffer = "";
|
||||
this._bufferOffset += this._index;
|
||||
this._index = 0;
|
||||
} else if (this._sectionStart === this._index) {
|
||||
//the section just started
|
||||
this._buffer = "";
|
||||
this._bufferOffset += this._index;
|
||||
this._index = 0;
|
||||
} else {
|
||||
//remove everything unnecessary
|
||||
this._buffer = this._buffer.substr(this._sectionStart);
|
||||
this._index -= this._sectionStart;
|
||||
this._bufferOffset += this._sectionStart;
|
||||
}
|
||||
this._sectionStart = 0;
|
||||
}
|
||||
}
|
||||
|
||||
//TODO make events conditional
|
||||
write(chunk: string) {
|
||||
if (this._ended) this._cbs.onerror(Error(".write() after done!"));
|
||||
this._buffer += chunk;
|
||||
this._parse();
|
||||
}
|
||||
|
||||
// Iterates through the buffer, calling the function corresponding to the current state.
|
||||
// States that are more likely to be hit are higher up, as a performance improvement.
|
||||
_parse() {
|
||||
while (this._index < this._buffer.length && this._running) {
|
||||
const c = this._buffer.charAt(this._index);
|
||||
if (this._state === State.Text) {
|
||||
this._stateText(c);
|
||||
} else if (this._state === State.InAttributeValueDq) {
|
||||
this._stateInAttributeValueDoubleQuotes(c);
|
||||
} else if (this._state === State.InAttributeName) {
|
||||
this._stateInAttributeName(c);
|
||||
} else if (this._state === State.InComment) {
|
||||
this._stateInComment(c);
|
||||
} else if (this._state === State.BeforeAttributeName) {
|
||||
this._stateBeforeAttributeName(c);
|
||||
} else if (this._state === State.InTagName) {
|
||||
this._stateInTagName(c);
|
||||
} else if (this._state === State.InClosingTagName) {
|
||||
this._stateInClosingTagName(c);
|
||||
} else if (this._state === State.BeforeTagName) {
|
||||
this._stateBeforeTagName(c);
|
||||
} else if (this._state === State.AfterAttributeName) {
|
||||
this._stateAfterAttributeName(c);
|
||||
} else if (this._state === State.InAttributeValueSq) {
|
||||
this._stateInAttributeValueSingleQuotes(c);
|
||||
} else if (this._state === State.BeforeAttributeValue) {
|
||||
this._stateBeforeAttributeValue(c);
|
||||
} else if (this._state === State.BeforeClosingTagName) {
|
||||
this._stateBeforeClosingTagName(c);
|
||||
} else if (this._state === State.AfterClosingTagName) {
|
||||
this._stateAfterClosingTagName(c);
|
||||
} else if (this._state === State.BeforeSpecial) {
|
||||
this._stateBeforeSpecial(c);
|
||||
} else if (this._state === State.AfterComment1) {
|
||||
this._stateAfterComment1(c);
|
||||
} else if (this._state === State.InAttributeValueNq) {
|
||||
this._stateInAttributeValueNoQuotes(c);
|
||||
} else if (this._state === State.InSelfClosingTag) {
|
||||
this._stateInSelfClosingTag(c);
|
||||
} else if (this._state === State.InDeclaration) {
|
||||
this._stateInDeclaration(c);
|
||||
} else if (this._state === State.BeforeDeclaration) {
|
||||
this._stateBeforeDeclaration(c);
|
||||
} else if (this._state === State.AfterComment2) {
|
||||
this._stateAfterComment2(c);
|
||||
} else if (this._state === State.BeforeComment) {
|
||||
this._stateBeforeComment(c);
|
||||
} else if (this._state === State.BeforeSpecialEnd) {
|
||||
this._stateBeforeSpecialEnd(c);
|
||||
} else if (this._state === State.AfterScript1) {
|
||||
stateAfterScript1(this, c);
|
||||
} else if (this._state === State.AfterScript2) {
|
||||
stateAfterScript2(this, c);
|
||||
} else if (this._state === State.AfterScript3) {
|
||||
stateAfterScript3(this, c);
|
||||
} else if (this._state === State.BeforeScript1) {
|
||||
stateBeforeScript1(this, c);
|
||||
} else if (this._state === State.BeforeScript2) {
|
||||
stateBeforeScript2(this, c);
|
||||
} else if (this._state === State.BeforeScript3) {
|
||||
stateBeforeScript3(this, c);
|
||||
} else if (this._state === State.BeforeScript4) {
|
||||
stateBeforeScript4(this, c);
|
||||
} else if (this._state === State.BeforeScript5) {
|
||||
this._stateBeforeScript5(c);
|
||||
} else if (this._state === State.AfterScript4) {
|
||||
stateAfterScript4(this, c);
|
||||
} else if (this._state === State.AfterScript5) {
|
||||
this._stateAfterScript5(c);
|
||||
} else if (this._state === State.BeforeStyle1) {
|
||||
stateBeforeStyle1(this, c);
|
||||
} else if (this._state === State.InCdata) {
|
||||
this._stateInCdata(c);
|
||||
} else if (this._state === State.BeforeStyle2) {
|
||||
stateBeforeStyle2(this, c);
|
||||
} else if (this._state === State.BeforeStyle3) {
|
||||
stateBeforeStyle3(this, c);
|
||||
} else if (this._state === State.BeforeStyle4) {
|
||||
this._stateBeforeStyle4(c);
|
||||
} else if (this._state === State.AfterStyle1) {
|
||||
stateAfterStyle1(this, c);
|
||||
} else if (this._state === State.AfterStyle2) {
|
||||
stateAfterStyle2(this, c);
|
||||
} else if (this._state === State.AfterStyle3) {
|
||||
stateAfterStyle3(this, c);
|
||||
} else if (this._state === State.AfterStyle4) {
|
||||
this._stateAfterStyle4(c);
|
||||
} else if (this._state === State.InProcessingInstruction) {
|
||||
this._stateInProcessingInstruction(c);
|
||||
} else if (this._state === State.InNamedEntity) {
|
||||
this._stateInNamedEntity(c);
|
||||
} else if (this._state === State.BeforeCdata1) {
|
||||
stateBeforeCdata1(this, c);
|
||||
} else if (this._state === State.BeforeEntity) {
|
||||
stateBeforeEntity(this, c);
|
||||
} else if (this._state === State.BeforeCdata2) {
|
||||
stateBeforeCdata2(this, c);
|
||||
} else if (this._state === State.BeforeCdata3) {
|
||||
stateBeforeCdata3(this, c);
|
||||
} else if (this._state === State.AfterCdata1) {
|
||||
this._stateAfterCdata1(c);
|
||||
} else if (this._state === State.AfterCdata2) {
|
||||
this._stateAfterCdata2(c);
|
||||
} else if (this._state === State.BeforeCdata4) {
|
||||
stateBeforeCdata4(this, c);
|
||||
} else if (this._state === State.BeforeCdata5) {
|
||||
stateBeforeCdata5(this, c);
|
||||
} else if (this._state === State.BeforeCdata6) {
|
||||
this._stateBeforeCdata6(c);
|
||||
} else if (this._state === State.InHexEntity) {
|
||||
this._stateInHexEntity(c);
|
||||
} else if (this._state === State.InNumericEntity) {
|
||||
this._stateInNumericEntity(c);
|
||||
} else if (this._state === State.BeforeNumericEntity) {
|
||||
stateBeforeNumericEntity(this, c);
|
||||
} else {
|
||||
this._cbs.onerror(Error("unknown _state"), this._state);
|
||||
}
|
||||
this._index++;
|
||||
}
|
||||
this._cleanup();
|
||||
}
|
||||
pause() {
|
||||
this._running = false;
|
||||
}
|
||||
resume() {
|
||||
this._running = true;
|
||||
if (this._index < this._buffer.length) {
|
||||
this._parse();
|
||||
}
|
||||
if (this._ended) {
|
||||
this._finish();
|
||||
}
|
||||
}
|
||||
end(chunk?: string) {
|
||||
if (this._ended) this._cbs.onerror(Error(".end() after done!"));
|
||||
if (chunk) this.write(chunk);
|
||||
this._ended = true;
|
||||
if (this._running) this._finish();
|
||||
}
|
||||
_finish() {
|
||||
//if there is remaining data, emit it in a reasonable way
|
||||
if (this._sectionStart < this._index) {
|
||||
this._handleTrailingData();
|
||||
}
|
||||
this._cbs.onend();
|
||||
}
|
||||
_handleTrailingData() {
|
||||
const data = this._buffer.substr(this._sectionStart);
|
||||
if (
|
||||
this._state === State.InCdata ||
|
||||
this._state === State.AfterCdata1 ||
|
||||
this._state === State.AfterCdata2
|
||||
) {
|
||||
this._cbs.oncdata(data);
|
||||
} else if (
|
||||
this._state === State.InComment ||
|
||||
this._state === State.AfterComment1 ||
|
||||
this._state === State.AfterComment2
|
||||
) {
|
||||
this._cbs.oncomment(data);
|
||||
} else if (this._state === State.InNamedEntity && !this._xmlMode) {
|
||||
this._parseLegacyEntity();
|
||||
if (this._sectionStart < this._index) {
|
||||
this._state = this._baseState;
|
||||
this._handleTrailingData();
|
||||
}
|
||||
} else if (this._state === State.InNumericEntity && !this._xmlMode) {
|
||||
this._decodeNumericEntity(2, 10);
|
||||
if (this._sectionStart < this._index) {
|
||||
this._state = this._baseState;
|
||||
this._handleTrailingData();
|
||||
}
|
||||
} else if (this._state === State.InHexEntity && !this._xmlMode) {
|
||||
this._decodeNumericEntity(3, 16);
|
||||
if (this._sectionStart < this._index) {
|
||||
this._state = this._baseState;
|
||||
this._handleTrailingData();
|
||||
}
|
||||
} else if (
|
||||
this._state !== State.InTagName &&
|
||||
this._state !== State.BeforeAttributeName &&
|
||||
this._state !== State.BeforeAttributeValue &&
|
||||
this._state !== State.AfterAttributeName &&
|
||||
this._state !== State.InAttributeName &&
|
||||
this._state !== State.InAttributeValueSq &&
|
||||
this._state !== State.InAttributeValueDq &&
|
||||
this._state !== State.InAttributeValueNq &&
|
||||
this._state !== State.InClosingTagName
|
||||
) {
|
||||
this._cbs.ontext(data);
|
||||
}
|
||||
//else, ignore remaining data
|
||||
//TODO add a way to remove current tag
|
||||
}
|
||||
getAbsoluteIndex(): number {
|
||||
return this._bufferOffset + this._index;
|
||||
}
|
||||
_getSection(): string {
|
||||
return this._buffer.substring(this._sectionStart, this._index);
|
||||
}
|
||||
_emitToken(name: "onopentagname" | "onclosetag" | "onattribdata") {
|
||||
this._cbs[name](this._getSection());
|
||||
this._sectionStart = -1;
|
||||
}
|
||||
_emitPartial(value: string) {
|
||||
if (this._baseState !== State.Text) {
|
||||
this._cbs.onattribdata(value); //TODO implement the new event
|
||||
} else {
|
||||
this._cbs.ontext(value);
|
||||
}
|
||||
}
|
||||
}
|
14
packages/fork-htmlparser2/src/WritableStream.spec.ts
Normal file
14
packages/fork-htmlparser2/src/WritableStream.spec.ts
Normal file
@ -0,0 +1,14 @@
|
||||
import { WritableStream } from "./WritableStream";
|
||||
|
||||
describe("WritableStream", () => {
|
||||
test("should decode fragmented unicode characters", () => {
|
||||
const ontext = jest.fn();
|
||||
const stream = new WritableStream({ ontext });
|
||||
|
||||
stream.write(Buffer.from([0xe2, 0x82]));
|
||||
stream.write(Buffer.from([0xac]));
|
||||
stream.end();
|
||||
|
||||
expect(ontext).toBeCalledWith("€");
|
||||
});
|
||||
});
|
34
packages/fork-htmlparser2/src/WritableStream.ts
Normal file
34
packages/fork-htmlparser2/src/WritableStream.ts
Normal file
@ -0,0 +1,34 @@
|
||||
import { Parser, Handler, ParserOptions } from "./Parser";
|
||||
import { Writable } from "stream";
|
||||
import { StringDecoder } from "string_decoder";
|
||||
|
||||
// Following the example in https://nodejs.org/api/stream.html#stream_decoding_buffers_in_a_writable_stream
|
||||
function isBuffer(_chunk: string | Buffer, encoding: string): _chunk is Buffer {
|
||||
return encoding === "buffer";
|
||||
}
|
||||
|
||||
/**
|
||||
* WritableStream makes the `Parser` interface available as a NodeJS stream.
|
||||
*
|
||||
* @see Parser
|
||||
*/
|
||||
export class WritableStream extends Writable {
|
||||
_parser: Parser;
|
||||
_decoder = new StringDecoder();
|
||||
|
||||
constructor(cbs: Partial<Handler>, options?: ParserOptions) {
|
||||
super({ decodeStrings: false });
|
||||
this._parser = new Parser(cbs, options);
|
||||
}
|
||||
|
||||
_write(chunk: string | Buffer, encoding: string, cb: () => void) {
|
||||
if (isBuffer(chunk, encoding)) chunk = this._decoder.write(chunk);
|
||||
this._parser.write(chunk);
|
||||
cb();
|
||||
}
|
||||
|
||||
_final(cb: () => void) {
|
||||
this._parser.end(this._decoder.end());
|
||||
cb();
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- http://en.wikipedia.org/wiki/Atom_%28standard%29 -->
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<subtitle>A subtitle.</subtitle>
|
||||
<link href="http://example.org/feed/" rel="self" />
|
||||
<link href="http://example.org/" />
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
<email>johndoe@example.com</email>
|
||||
</author>
|
||||
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03" />
|
||||
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
|
||||
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<content type="html"><p>Some content.</p></content>
|
||||
</entry>
|
||||
|
||||
</feed>
|
@ -0,0 +1,16 @@
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Attributes test</title>
|
||||
</head>
|
||||
<body>
|
||||
<!-- Normal attributes -->
|
||||
<button id="test0" class="value0" title="value1">class="value0" title="value1"</button>
|
||||
|
||||
<!-- Attributes with no quotes or value -->
|
||||
<button id="test1" class=value2 disabled>class=value2 disabled</button>
|
||||
|
||||
<!-- Attributes with no space between them. No valid, but accepted by the browser -->
|
||||
<button id="test2" class="value4"title="value5">class="value4"title="value5"</button>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1 @@
|
||||
<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>
|
@ -0,0 +1,63 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">
|
||||
<channel rdf:about="https://github.com/fb55/htmlparser2/">
|
||||
<title>A title to parse and remember</title>
|
||||
<link>https://github.com/fb55/htmlparser2/</link>
|
||||
<description/>
|
||||
<dc:language>en-us</dc:language>
|
||||
<dc:rights>Copyright 2015 the authors</dc:rights>
|
||||
<dc:publisher>webmaster@thisisafakedoma.in</dc:publisher>
|
||||
<dc:creator>webmaster@thisisafakedoma.in</dc:creator>
|
||||
<dc:source>https://github.com/fb55/htmlparser2/</dc:source>
|
||||
<dc:title>A title to parse and remember</dc:title>
|
||||
<dc:type>Collection</dc:type>
|
||||
<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>
|
||||
<syn:updateFrequency>4</syn:updateFrequency>
|
||||
<syn:updatePeriod>hourly</syn:updatePeriod>
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li rdf:resource="http://somefakesite/path/to/something.html"/>
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
</channel>
|
||||
<item rdf:about="http://somefakesite/path/to/something.html">
|
||||
<title><![CDATA[ Fast HTML Parsing ]]></title>
|
||||
<link>
|
||||
http://somefakesite/path/to/something.html
|
||||
</link>
|
||||
<description><![CDATA[
|
||||
Great test content<br>A link: <a href="http://github.com">Github</a>
|
||||
]]></description>
|
||||
<dc:date>2011-11-04T09:35:17-07:00</dc:date>
|
||||
<dc:language>en-us</dc:language>
|
||||
<dc:rights>Copyright 2015 the authors</dc:rights>
|
||||
<dc:source>
|
||||
http://somefakesite/path/to/something.html
|
||||
</dc:source>
|
||||
<dc:title><![CDATA[ Fast HTML Parsing ]]></dc:title>
|
||||
<dc:type>text</dc:type>
|
||||
<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>
|
||||
</item>
|
||||
<item rdf:about="http://somefakesite/path/to/something-else.html">
|
||||
<title><![CDATA[
|
||||
This space intentionally left blank
|
||||
]]></title>
|
||||
<link>
|
||||
http://somefakesite/path/to/something-else.html
|
||||
</link>
|
||||
<description><![CDATA[
|
||||
The early bird gets the worm
|
||||
]]></description>
|
||||
<dc:date>2011-11-04T09:34:54-07:00</dc:date>
|
||||
<dc:language>en-us</dc:language>
|
||||
<dc:rights>Copyright 2015 the authors</dc:rights>
|
||||
<dc:source>
|
||||
http://somefakesite/path/to/something-else.html
|
||||
</dc:source>
|
||||
<dc:title><![CDATA[
|
||||
This space intentionally left blank
|
||||
]]></dc:title>
|
||||
<dc:type>text</dc:type>
|
||||
<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>
|
||||
</item>
|
||||
</rdf:RDF>
|
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0"?>
|
||||
<!-- http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Liftoff News</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/</link>
|
||||
<description>Liftoff to Space Exploration.</description>
|
||||
<language>en-us</language>
|
||||
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
|
||||
|
||||
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
|
||||
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
|
||||
<generator>Weblog Editor 2.0</generator>
|
||||
<managingEditor>editor@example.com</managingEditor>
|
||||
<webMaster>webmaster@example.com</webMaster>
|
||||
<item>
|
||||
|
||||
<title>Star City</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
||||
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
||||
|
||||
</item>
|
||||
<item>
|
||||
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description>
|
||||
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
|
||||
|
||||
</item>
|
||||
<item>
|
||||
<title>The Engine That Does More</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
|
||||
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
|
||||
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
|
||||
|
||||
</item>
|
||||
<item>
|
||||
<title>Astronauts' Dirty Laundry</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
|
||||
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
|
||||
<pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
|
||||
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
@ -0,0 +1,19 @@
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>SVG test</title>
|
||||
</head>
|
||||
<body>
|
||||
<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<title>Test</title>
|
||||
<animate />
|
||||
<polygon />
|
||||
<g>
|
||||
<path>
|
||||
<title>x</title>
|
||||
<animate />
|
||||
</path>
|
||||
</g>
|
||||
</svg>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,35 @@
|
||||
{
|
||||
"name": "simple",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<h1 class=test>adsf</h1>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["h1"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["class", "test"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"h1",
|
||||
{
|
||||
"class": "test"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["adsf"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["h1"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
{
|
||||
"name": "Template script tags",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<p><script type=\"text/template\"><h1>Heading1</h1></script></p>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["p"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["p", {}]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["script"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["type", "text/template"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"script",
|
||||
{
|
||||
"type": "text/template"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["<h1>Heading1</h1>"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["script"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["p"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
{
|
||||
"name": "Lowercase tags",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {
|
||||
"lowerCaseTags": true
|
||||
}
|
||||
},
|
||||
"html": "<H1 class=test>adsf</H1>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["h1"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["class", "test"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"h1",
|
||||
{
|
||||
"class": "test"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["adsf"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["h1"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
{
|
||||
"name": "CDATA",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": { "xmlMode": true }
|
||||
},
|
||||
"html": "<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag><![CD>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["tag"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["tag", {}]
|
||||
},
|
||||
{
|
||||
"event": "cdatastart",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": [" asdf ><asdf></adsf><> fo"]
|
||||
},
|
||||
{
|
||||
"event": "cdataend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["tag"]
|
||||
},
|
||||
{
|
||||
"event": "processinginstruction",
|
||||
"data": ["![CD", "![CD"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "CDATA (inside special)",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<script>/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/</script>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["script"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["script", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["script"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "leading lt",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": ">a>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "text",
|
||||
"data": [">a>"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
{
|
||||
"name": "Self-closing tags",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<a href=http://test.com/>Foo</a><hr / >",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["a"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["href", "http://test.com/"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"a",
|
||||
{
|
||||
"href": "http://test.com/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Foo"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["a"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["hr"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["hr", {}]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["hr"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,85 @@
|
||||
{
|
||||
"name": "Implicit close tags",
|
||||
"options": {},
|
||||
"html": "<ol><li class=test><div><table style=width:100%><tr><th>TH<td colspan=2><h3>Heading</h3><tr><td><div>Div</div><td><div>Div2</div></table></div><li><div><h3>Heading 2</h3></div></li></ol><p>Para<h4>Heading 4</h4><p><ul><li>Hi<li>bye</ul>",
|
||||
"expected": [
|
||||
{ "event": "opentagname", "data": ["ol"] },
|
||||
{ "event": "opentag", "data": ["ol", {}] },
|
||||
{ "event": "opentagname", "data": ["li"] },
|
||||
{ "event": "attribute", "data": ["class", "test"] },
|
||||
{ "event": "opentag", "data": ["li", { "class": "test" }] },
|
||||
{ "event": "opentagname", "data": ["div"] },
|
||||
{ "event": "opentag", "data": ["div", {}] },
|
||||
{ "event": "opentagname", "data": ["table"] },
|
||||
{ "event": "attribute", "data": ["style", "width:100%"] },
|
||||
{ "event": "opentag", "data": ["table", { "style": "width:100%" }] },
|
||||
{ "event": "opentagname", "data": ["tr"] },
|
||||
{ "event": "opentag", "data": ["tr", {}] },
|
||||
{ "event": "opentagname", "data": ["th"] },
|
||||
{ "event": "opentag", "data": ["th", {}] },
|
||||
{ "event": "text", "data": ["TH"] },
|
||||
{ "event": "closetag", "data": ["th"] },
|
||||
{ "event": "opentagname", "data": ["td"] },
|
||||
{ "event": "attribute", "data": ["colspan", "2"] },
|
||||
{ "event": "opentag", "data": ["td", { "colspan": "2" }] },
|
||||
{ "event": "opentagname", "data": ["h3"] },
|
||||
{ "event": "opentag", "data": ["h3", {}] },
|
||||
{ "event": "text", "data": ["Heading"] },
|
||||
{ "event": "closetag", "data": ["h3"] },
|
||||
{ "event": "closetag", "data": ["td"] },
|
||||
{ "event": "closetag", "data": ["tr"] },
|
||||
{ "event": "opentagname", "data": ["tr"] },
|
||||
{ "event": "opentag", "data": ["tr", {}] },
|
||||
{ "event": "opentagname", "data": ["td"] },
|
||||
{ "event": "opentag", "data": ["td", {}] },
|
||||
{ "event": "opentagname", "data": ["div"] },
|
||||
{ "event": "opentag", "data": ["div", {}] },
|
||||
{ "event": "text", "data": ["Div"] },
|
||||
{ "event": "closetag", "data": ["div"] },
|
||||
{ "event": "closetag", "data": ["td"] },
|
||||
{ "event": "opentagname", "data": ["td"] },
|
||||
{ "event": "opentag", "data": ["td", {}] },
|
||||
{ "event": "opentagname", "data": ["div"] },
|
||||
{ "event": "opentag", "data": ["div", {}] },
|
||||
{ "event": "text", "data": ["Div2"] },
|
||||
{ "event": "closetag", "data": ["div"] },
|
||||
{ "event": "closetag", "data": ["td"] },
|
||||
{ "event": "closetag", "data": ["tr"] },
|
||||
{ "event": "closetag", "data": ["table"] },
|
||||
{ "event": "closetag", "data": ["div"] },
|
||||
{ "event": "closetag", "data": ["li"] },
|
||||
{ "event": "opentagname", "data": ["li"] },
|
||||
{ "event": "opentag", "data": ["li", {}] },
|
||||
{ "event": "opentagname", "data": ["div"] },
|
||||
{ "event": "opentag", "data": ["div", {}] },
|
||||
{ "event": "opentagname", "data": ["h3"] },
|
||||
{ "event": "opentag", "data": ["h3", {}] },
|
||||
{ "event": "text", "data": ["Heading 2"] },
|
||||
{ "event": "closetag", "data": ["h3"] },
|
||||
{ "event": "closetag", "data": ["div"] },
|
||||
{ "event": "closetag", "data": ["li"] },
|
||||
{ "event": "closetag", "data": ["ol"] },
|
||||
{ "event": "opentagname", "data": ["p"] },
|
||||
{ "event": "opentag", "data": ["p", {}] },
|
||||
{ "event": "text", "data": ["Para"] },
|
||||
{ "event": "closetag", "data": ["p"] },
|
||||
{ "event": "opentagname", "data": ["h4"] },
|
||||
{ "event": "opentag", "data": ["h4", {}] },
|
||||
{ "event": "text", "data": ["Heading 4"] },
|
||||
{ "event": "closetag", "data": ["h4"] },
|
||||
{ "event": "opentagname", "data": ["p"] },
|
||||
{ "event": "opentag", "data": ["p", {}] },
|
||||
{ "event": "closetag", "data": ["p"] },
|
||||
{ "event": "opentagname", "data": ["ul"] },
|
||||
{ "event": "opentag", "data": ["ul", {}] },
|
||||
{ "event": "opentagname", "data": ["li"] },
|
||||
{ "event": "opentag", "data": ["li", {}] },
|
||||
{ "event": "text", "data": ["Hi"] },
|
||||
{ "event": "closetag", "data": ["li"] },
|
||||
{ "event": "opentagname", "data": ["li"] },
|
||||
{ "event": "opentag", "data": ["li", {}] },
|
||||
{ "event": "text", "data": ["bye"] },
|
||||
{ "event": "closetag", "data": ["li"] },
|
||||
{ "event": "closetag", "data": ["ul"] }
|
||||
]
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
{
|
||||
"name": "attributes (no white space, no value, no quotes)",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<button class=\"test0\"title=\"test1\" disabled value=test2>adsf</button>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["button"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["class", "test0"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["title", "test1"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["disabled", ""]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["value", "test2"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"button",
|
||||
{
|
||||
"class": "test0",
|
||||
"title": "test1",
|
||||
"disabled": "",
|
||||
"value": "test2"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["adsf"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["button"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
{
|
||||
"name": "crazy attribute",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<p < = '' FAIL>stuff</p><a",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["p"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["<", ""]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["fail", ""]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"p",
|
||||
{
|
||||
"<": "",
|
||||
"fail": ""
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["stuff"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["p"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
{
|
||||
"name": "Scripts creating other scripts",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<p><script>var str = '<script></'+'script>';</script></p>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["p"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["p", {}]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["script"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["script", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["var str = '<script></'+'script>';"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["script"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["p"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "Long comment ending",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<meta id='before'><!-- text ---><meta id='after'>",
|
||||
"expected": [
|
||||
{ "event": "opentagname", "data": ["meta"] },
|
||||
{ "event": "attribute", "data": ["id", "before"] },
|
||||
{ "event": "opentag", "data": ["meta", { "id": "before" }] },
|
||||
{ "event": "closetag", "data": ["meta"] },
|
||||
{ "event": "comment", "data": [" text -"] },
|
||||
{ "event": "commentend", "data": [] },
|
||||
{ "event": "opentagname", "data": ["meta"] },
|
||||
{ "event": "attribute", "data": ["id", "after"] },
|
||||
{ "event": "opentag", "data": ["meta", { "id": "after" }] },
|
||||
{ "event": "closetag", "data": ["meta"] }
|
||||
]
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "Long CDATA ending",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": { "xmlMode": true }
|
||||
},
|
||||
"html": "<before /><tag><![CDATA[ text ]]]></tag><after />",
|
||||
"expected": [
|
||||
{ "event": "opentagname", "data": ["before"] },
|
||||
{ "event": "opentag", "data": ["before", {}] },
|
||||
{ "event": "closetag", "data": ["before"] },
|
||||
{ "event": "opentagname", "data": ["tag"] },
|
||||
{ "event": "opentag", "data": ["tag", {}] },
|
||||
{ "event": "cdatastart", "data": [] },
|
||||
{ "event": "text", "data": [" text ]"] },
|
||||
{ "event": "cdataend", "data": [] },
|
||||
{ "event": "closetag", "data": ["tag"] },
|
||||
{ "event": "opentagname", "data": ["after"] },
|
||||
{ "event": "opentag", "data": ["after", {}] },
|
||||
{ "event": "closetag", "data": ["after"] }
|
||||
]
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
{
|
||||
"name": "Implicit open p and br tags",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<div>Hallo</p>World</br></ignore></div></p></br>",
|
||||
"expected": [
|
||||
{ "event": "opentagname", "data": ["div"] },
|
||||
{ "event": "opentag", "data": ["div", {}] },
|
||||
{ "event": "text", "data": ["Hallo"] },
|
||||
{ "event": "opentagname", "data": ["p"] },
|
||||
{ "event": "opentag", "data": ["p", {}] },
|
||||
{ "event": "closetag", "data": ["p"] },
|
||||
{ "event": "text", "data": ["World"] },
|
||||
{ "event": "opentagname", "data": ["br"] },
|
||||
{ "event": "opentag", "data": ["br", {}] },
|
||||
{ "event": "closetag", "data": ["br"] },
|
||||
{ "event": "closetag", "data": ["div"] },
|
||||
{ "event": "opentagname", "data": ["p"] },
|
||||
{ "event": "opentag", "data": ["p", {}] },
|
||||
{ "event": "closetag", "data": ["p"] },
|
||||
{ "event": "opentagname", "data": ["br"] },
|
||||
{ "event": "opentag", "data": ["br", {}] },
|
||||
{ "event": "closetag", "data": ["br"] }
|
||||
]
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "lt followed by whitespace",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "a < b",
|
||||
"expected": [
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["a < b"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
{
|
||||
"name": "double attribute",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<h1 class=test class=boo></h1>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["h1"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["class", "test"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["class", "boo"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"h1",
|
||||
{
|
||||
"class": "test"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["h1"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "numeric entities",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": { "decodeEntities": true }
|
||||
},
|
||||
"html": "abcdfg&#x;h",
|
||||
"expected": [
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["abcdfg&#x;h"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "legacy entities",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": { "decodeEntities": true }
|
||||
},
|
||||
"html": "&elíe&eer;s<er",
|
||||
"expected": [
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["&el\u00EDe&eer;s<er"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "named entities",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": { "decodeEntities": true }
|
||||
},
|
||||
"html": "&el<er∳foo&bar",
|
||||
"expected": [
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["&el<er\u2233foo&bar"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "xml entities",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": { "decodeEntities": true, "xmlMode": true }
|
||||
},
|
||||
"html": "&>&<üabcde",
|
||||
"expected": [
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["&>&<üaجde"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
{
|
||||
"name": "entity in attribute",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": { "decodeEntities": true }
|
||||
},
|
||||
"html": "<a href='http://example.com/page?param=value¶m2¶m3=<val&; & &'>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["a"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": [
|
||||
"href",
|
||||
"http://example.com/page?param=value¶m2¶m3=<val&; & &"
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"a",
|
||||
{
|
||||
"href": "http://example.com/page?param=value¶m2¶m3=<val&; & &"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["a"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
{
|
||||
"name": "double brackets",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<<princess-purpose>>testing</princess-purpose>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["<"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["princess-purpose"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["princess-purpose", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": [">testing"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["princess-purpose"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "legacy entities",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": { "decodeEntities": true }
|
||||
},
|
||||
"html": "M&M",
|
||||
"expected": [
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["M&M"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,87 @@
|
||||
{
|
||||
"name": "Special special tags",
|
||||
"options": {},
|
||||
"html": "<sCriPT></scripter</soo</sCript><STyLE></styler</STylE><sCiPt><stylee><scriptee><soo>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["script"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["script", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["</scripter</soo"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["script"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["style"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["style", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["</styler"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["style"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["scipt"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["scipt", {}]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["stylee"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["stylee", {}]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["scriptee"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["scriptee", {}]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["soo"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["soo", {}]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["soo"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["scriptee"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["stylee"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["scipt"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
{
|
||||
"name": "Empty tag name",
|
||||
"options": {},
|
||||
"html": "< ></ >",
|
||||
"expected": [
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["< ></ >"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
{
|
||||
"name": "Not quite closed",
|
||||
"options": {},
|
||||
"html": "<foo /bar></foo bar>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["foo"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["bar", ""]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"foo",
|
||||
{
|
||||
"bar": ""
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["foo"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
{
|
||||
"name": "Entities in attributes",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": { "decodeEntities": true }
|
||||
},
|
||||
"html": "<foo bar=& baz=\"&\" boo='&' noo=>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["foo"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["bar", "&"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["baz", "&"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["boo", "&"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["noo", ""]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"foo",
|
||||
{
|
||||
"bar": "&",
|
||||
"baz": "&",
|
||||
"boo": "&",
|
||||
"noo": ""
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["foo"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "CDATA in HTML",
|
||||
"options": {},
|
||||
"html": "<![CDATA[ foo ]]>",
|
||||
"expected": [
|
||||
{ "event": "comment", "data": ["[CDATA[ foo ]]"] },
|
||||
{ "event": "commentend", "data": [] }
|
||||
]
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "Comment edge-cases",
|
||||
"options": {},
|
||||
"html": "<!-foo><!-- --- --><!--foo",
|
||||
"expected": [
|
||||
{
|
||||
"event": "processinginstruction",
|
||||
"data": ["!-foo", "!-foo"]
|
||||
},
|
||||
{ "event": "comment", "data": [" --- "] },
|
||||
{ "event": "commentend", "data": [] },
|
||||
{ "event": "comment", "data": ["foo"] },
|
||||
{ "event": "commentend", "data": [] }
|
||||
]
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
{
|
||||
"name": "CDATA edge-cases",
|
||||
"options": {
|
||||
"parser": { "recognizeCDATA": true }
|
||||
},
|
||||
"html": "<![CDATA><![CDATA[[]]sdaf]]><![CDATA[foo",
|
||||
"expected": [
|
||||
{
|
||||
"event": "processinginstruction",
|
||||
"data": ["![cdata", "![CDATA"]
|
||||
},
|
||||
{ "event": "cdatastart", "data": [] },
|
||||
{ "event": "text", "data": ["[]]sdaf"] },
|
||||
{ "event": "cdataend", "data": [] },
|
||||
{ "event": "cdatastart", "data": [] },
|
||||
{ "event": "text", "data": ["foo"] },
|
||||
{ "event": "cdataend", "data": [] }
|
||||
]
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "Comment false ending",
|
||||
"options": {},
|
||||
"html": "<!-- a-b-> -->",
|
||||
"expected": [
|
||||
{ "event": "comment", "data": [" a-b-> "] },
|
||||
{ "event": "commentend", "data": [] }
|
||||
]
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "Scripts ending with <",
|
||||
"options": {
|
||||
"handler": {},
|
||||
"parser": {}
|
||||
},
|
||||
"html": "<script><</script>",
|
||||
"expected": [
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["script"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["script", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["<"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["script"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
{
|
||||
"name": "CDATA more edge-cases",
|
||||
"options": {
|
||||
"parser": { "recognizeCDATA": true }
|
||||
},
|
||||
"html": "<![CDATA[foo]bar]>baz]]>",
|
||||
"expected": [
|
||||
{ "event": "cdatastart", "data": [] },
|
||||
{ "event": "text", "data": ["foo]bar]>baz"] },
|
||||
{ "event": "cdataend", "data": [] }
|
||||
]
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
{
|
||||
"name": "RSS (2.0)",
|
||||
"file": "RSS_Example.xml",
|
||||
"useSnapshot": true
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
{
|
||||
"name": "Atom (1.0)",
|
||||
"file": "Atom_Example.xml",
|
||||
"useSnapshot": true
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
{
|
||||
"name": "RDF test",
|
||||
"file": "RDF_Example.xml",
|
||||
"useSnapshot": true
|
||||
}
|
@ -0,0 +1,55 @@
|
||||
{
|
||||
"name": "Basic html",
|
||||
"options": {},
|
||||
"file": "Basic.html",
|
||||
"expected": [
|
||||
{
|
||||
"event": "processinginstruction",
|
||||
"data": ["!doctype", "!DOCTYPE html"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["html"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["html", {}]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["The Title"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["body"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["body", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Hello world"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["body"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["html"]
|
||||
}
|
||||
]
|
||||
}
|
722
packages/fork-htmlparser2/src/__fixtures__/Stream/02-RSS.json
Normal file
722
packages/fork-htmlparser2/src/__fixtures__/Stream/02-RSS.json
Normal file
@ -0,0 +1,722 @@
|
||||
{
|
||||
"name": "RSS feed",
|
||||
"options": { "xmlMode": true },
|
||||
"file": "RSS_Example.xml",
|
||||
"expected": [
|
||||
{
|
||||
"event": "processinginstruction",
|
||||
"data": ["?xml", "?xml version=\"1.0\"?"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "comment",
|
||||
"data": [
|
||||
" http://cyber.law.harvard.edu/rss/examples/rss2sample.xml "
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "commentend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["rss"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["version", "2.0"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"rss",
|
||||
{
|
||||
"version": "2.0"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["channel"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["channel", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Liftoff News"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["link", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["http://liftoff.msfc.nasa.gov/"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["description", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Liftoff to Space Exploration."]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["language"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["language", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["en-us"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["language"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["pubDate", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Tue, 10 Jun 2003 04:00:00 GMT"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["lastBuildDate"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["lastBuildDate", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Tue, 10 Jun 2003 09:41:01 GMT"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["lastBuildDate"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["docs"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["docs", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["http://blogs.law.harvard.edu/tech/rss"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["docs"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["generator"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["generator", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Weblog Editor 2.0"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["generator"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["managingEditor"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["managingEditor", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["editor@example.com"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["managingEditor"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["webMaster"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["webMaster", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["webmaster@example.com"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["webMaster"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["item", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Star City"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["link", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["description", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": [
|
||||
"How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\">Star City</a>."
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["pubDate", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Tue, 03 Jun 2003 09:39:21 GMT"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["guid"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["guid", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["guid"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n "]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["item", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["description", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": [
|
||||
"Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\">partial eclipse of the Sun</a> on Saturday, May 31st."
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["pubDate", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Fri, 30 May 2003 11:06:42 GMT"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["guid"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["guid", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["http://liftoff.msfc.nasa.gov/2003/05/30.html#item572"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["guid"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n "]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["item", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["The Engine That Does More"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["link", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["description", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": [
|
||||
"Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that."
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["pubDate", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Tue, 27 May 2003 08:37:32 GMT"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["guid"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["guid", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["http://liftoff.msfc.nasa.gov/2003/05/27.html#item571"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["guid"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n "]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["item", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Astronauts' Dirty Laundry"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["link", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["description", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": [
|
||||
"Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options."
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["pubDate", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Tue, 20 May 2003 08:56:02 GMT"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["pubDate"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["guid"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["guid", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["http://liftoff.msfc.nasa.gov/2003/05/20.html#item570"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["guid"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n "]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n "]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["channel"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["rss"]
|
||||
}
|
||||
]
|
||||
}
|
462
packages/fork-htmlparser2/src/__fixtures__/Stream/03-Atom.json
Normal file
462
packages/fork-htmlparser2/src/__fixtures__/Stream/03-Atom.json
Normal file
@ -0,0 +1,462 @@
|
||||
{
|
||||
"name": "Atom feed",
|
||||
"options": { "xmlMode": true },
|
||||
"file": "Atom_Example.xml",
|
||||
"expected": [
|
||||
{
|
||||
"event": "processinginstruction",
|
||||
"data": ["?xml", "?xml version=\"1.0\" encoding=\"utf-8\"?"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "comment",
|
||||
"data": [" http://en.wikipedia.org/wiki/Atom_%28standard%29 "]
|
||||
},
|
||||
{
|
||||
"event": "commentend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["feed"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns", "http://www.w3.org/2005/Atom"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"feed",
|
||||
{
|
||||
"xmlns": "http://www.w3.org/2005/Atom"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Example Feed"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["subtitle"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["subtitle", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["A subtitle."]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["subtitle"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["href", "http://example.org/feed/"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["rel", "self"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"link",
|
||||
{
|
||||
"href": "http://example.org/feed/",
|
||||
"rel": "self"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["href", "http://example.org/"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"link",
|
||||
{
|
||||
"href": "http://example.org/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["id"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["id", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["id"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["updated"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["updated", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["2003-12-13T18:30:02Z"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["updated"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["author"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["author", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["name"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["name", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["John Doe"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["name"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["email"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["email", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["johndoe@example.com"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["email"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["author"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["entry"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["entry", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Atom-Powered Robots Run Amok"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["href", "http://example.org/2003/12/13/atom03"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"link",
|
||||
{
|
||||
"href": "http://example.org/2003/12/13/atom03"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["rel", "alternate"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["type", "text/html"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["href", "http://example.org/2003/12/13/atom03.html"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"link",
|
||||
{
|
||||
"rel": "alternate",
|
||||
"type": "text/html",
|
||||
"href": "http://example.org/2003/12/13/atom03.html"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["rel", "edit"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["href", "http://example.org/2003/12/13/atom03/edit"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"link",
|
||||
{
|
||||
"rel": "edit",
|
||||
"href": "http://example.org/2003/12/13/atom03/edit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["id"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["id", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["id"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["updated"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["updated", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["2003-12-13T18:30:02Z"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["updated"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["content"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["type", "html"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"content",
|
||||
{
|
||||
"type": "html"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["p"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["p", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Some content."]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["p"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["content"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["entry"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["feed"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
}
|
||||
]
|
||||
}
|
950
packages/fork-htmlparser2/src/__fixtures__/Stream/04-RDF.json
Normal file
950
packages/fork-htmlparser2/src/__fixtures__/Stream/04-RDF.json
Normal file
@ -0,0 +1,950 @@
|
||||
{
|
||||
"name": "RDF feed",
|
||||
"options": { "xmlMode": true },
|
||||
"file": "RDF_Example.xml",
|
||||
"expected": [
|
||||
{
|
||||
"event": "processinginstruction",
|
||||
"data": ["?xml", "?xml version=\"1.0\" encoding=\"UTF-8\"?"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["rdf:RDF"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns:rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns", "http://purl.org/rss/1.0/"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns:ev", "http://purl.org/rss/1.0/modules/event/"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": [
|
||||
"xmlns:content",
|
||||
"http://purl.org/rss/1.0/modules/content/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns:taxo", "http://purl.org/rss/1.0/modules/taxonomy/"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns:dc", "http://purl.org/dc/elements/1.1/"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": [
|
||||
"xmlns:syn",
|
||||
"http://purl.org/rss/1.0/modules/syndication/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns:dcterms", "http://purl.org/dc/terms/"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns:admin", "http://webns.net/mvcb/"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"rdf:RDF",
|
||||
{
|
||||
"xmlns:rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
"xmlns": "http://purl.org/rss/1.0/",
|
||||
"xmlns:ev": "http://purl.org/rss/1.0/modules/event/",
|
||||
"xmlns:content": "http://purl.org/rss/1.0/modules/content/",
|
||||
"xmlns:taxo": "http://purl.org/rss/1.0/modules/taxonomy/",
|
||||
"xmlns:dc": "http://purl.org/dc/elements/1.1/",
|
||||
"xmlns:syn": "http://purl.org/rss/1.0/modules/syndication/",
|
||||
"xmlns:dcterms": "http://purl.org/dc/terms/",
|
||||
"xmlns:admin": "http://webns.net/mvcb/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["channel"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["rdf:about", "https://github.com/fb55/htmlparser2/"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"channel",
|
||||
{
|
||||
"rdf:about": "https://github.com/fb55/htmlparser2/"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["A title to parse and remember"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["link", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["https://github.com/fb55/htmlparser2/"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["description", {}]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:language"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:language", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["en-us"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:language"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:rights"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:rights", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Copyright 2015 the authors"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:rights"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:publisher"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:publisher", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["webmaster@thisisafakedoma.in"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:publisher"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:creator"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:creator", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["webmaster@thisisafakedoma.in"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:creator"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:source"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:source", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["https://github.com/fb55/htmlparser2/"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:source"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["A title to parse and remember"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:type"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:type", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Collection"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:type"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["syn:updateBase"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["syn:updateBase", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["2011-11-04T09:39:10-07:00"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["syn:updateBase"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["syn:updateFrequency"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["syn:updateFrequency", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["4"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["syn:updateFrequency"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["syn:updatePeriod"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["syn:updatePeriod", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["hourly"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["syn:updatePeriod"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["items"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["items", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["rdf:Seq"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["rdf:Seq", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["rdf:li"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": [
|
||||
"rdf:resource",
|
||||
"http://somefakesite/path/to/something.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"rdf:li",
|
||||
{
|
||||
"rdf:resource": "http://somefakesite/path/to/something.html"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["rdf:li"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["rdf:Seq"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["items"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["channel"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["rdf:about", "http://somefakesite/path/to/something.html"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"item",
|
||||
{
|
||||
"rdf:about": "http://somefakesite/path/to/something.html"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "cdatastart",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": [" Fast HTML Parsing "]
|
||||
},
|
||||
{
|
||||
"event": "cdataend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["link", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\nhttp://somefakesite/path/to/something.html\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["description", {}]
|
||||
},
|
||||
{
|
||||
"event": "cdatastart",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": [
|
||||
"\nGreat test content<br>A link: <a href=\"http://github.com\">Github</a>\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "cdataend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:date"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:date", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["2011-11-04T09:35:17-07:00"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:date"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:language"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:language", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["en-us"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:language"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:rights"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:rights", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Copyright 2015 the authors"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:rights"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:source"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:source", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\nhttp://somefakesite/path/to/something.html\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:source"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:title", {}]
|
||||
},
|
||||
{
|
||||
"event": "cdatastart",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": [" Fast HTML Parsing "]
|
||||
},
|
||||
{
|
||||
"event": "cdataend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:type"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:type", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["text"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:type"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dcterms:issued"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dcterms:issued", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["2011-11-04T09:35:17-07:00"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dcterms:issued"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": [
|
||||
"rdf:about",
|
||||
"http://somefakesite/path/to/something-else.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"item",
|
||||
{
|
||||
"rdf:about": "http://somefakesite/path/to/something-else.html"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "cdatastart",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\nThis space intentionally left blank\n"]
|
||||
},
|
||||
{
|
||||
"event": "cdataend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["link", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\nhttp://somefakesite/path/to/something-else.html\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["link"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["description", {}]
|
||||
},
|
||||
{
|
||||
"event": "cdatastart",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\nThe early bird gets the worm\n"]
|
||||
},
|
||||
{
|
||||
"event": "cdataend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["description"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:date"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:date", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["2011-11-04T09:34:54-07:00"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:date"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:language"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:language", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["en-us"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:language"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:rights"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:rights", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Copyright 2015 the authors"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:rights"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:source"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:source", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\nhttp://somefakesite/path/to/something-else.html\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:source"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:title", {}]
|
||||
},
|
||||
{
|
||||
"event": "cdatastart",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\nThis space intentionally left blank\n"]
|
||||
},
|
||||
{
|
||||
"event": "cdataend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dc:type"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dc:type", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["text"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dc:type"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["dcterms:issued"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["dcterms:issued", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["2011-11-04T09:34:54-07:00"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["dcterms:issued"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["item"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["rdf:RDF"]
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,246 @@
|
||||
{
|
||||
"name": "Attributes",
|
||||
"options": {},
|
||||
"file": "Attributes.html",
|
||||
"expected": [
|
||||
{
|
||||
"event": "processinginstruction",
|
||||
"data": ["!doctype", "!doctype html"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["html"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["html", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["head"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["head", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Attributes test"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["head"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["body"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["body", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "comment",
|
||||
"data": [" Normal attributes "]
|
||||
},
|
||||
{
|
||||
"event": "commentend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["button"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["id", "test0"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["class", "value0"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["title", "value1"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"button",
|
||||
{
|
||||
"id": "test0",
|
||||
"class": "value0",
|
||||
"title": "value1"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["class=\"value0\" title=\"value1\""]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["button"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "comment",
|
||||
"data": [" Attributes with no quotes or value "]
|
||||
},
|
||||
{
|
||||
"event": "commentend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["button"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["id", "test1"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["class", "value2"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["disabled", ""]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"button",
|
||||
{
|
||||
"id": "test1",
|
||||
"class": "value2",
|
||||
"disabled": ""
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["class=value2 disabled"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["button"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "comment",
|
||||
"data": [
|
||||
" Attributes with no space between them. No valid, but accepted by the browser "
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "commentend",
|
||||
"data": []
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["button"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["id", "test2"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["class", "value4"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["title", "value5"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"button",
|
||||
{
|
||||
"id": "test2",
|
||||
"class": "value4",
|
||||
"title": "value5"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["class=\"value4\"title=\"value5\""]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["button"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["body"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["html"]
|
||||
}
|
||||
]
|
||||
}
|
261
packages/fork-htmlparser2/src/__fixtures__/Stream/06-Svg.json
Normal file
261
packages/fork-htmlparser2/src/__fixtures__/Stream/06-Svg.json
Normal file
@ -0,0 +1,261 @@
|
||||
{
|
||||
"name": "SVG",
|
||||
"file": "Svg.html",
|
||||
"expected": [
|
||||
{
|
||||
"event": "processinginstruction",
|
||||
"data": ["!doctype", "!doctype html"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["html"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["html", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["head"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["head", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["SVG test"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["head"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["body"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["body", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["svg"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["version", "1.1"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns", "http://www.w3.org/2000/svg"]
|
||||
},
|
||||
{
|
||||
"event": "attribute",
|
||||
"data": ["xmlns:xlink", "http://www.w3.org/1999/xlink"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": [
|
||||
"svg",
|
||||
{
|
||||
"version": "1.1",
|
||||
"xmlns": "http://www.w3.org/2000/svg",
|
||||
"xmlns:xlink": "http://www.w3.org/1999/xlink"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["Test"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["animate"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["animate", {}]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["animate"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["polygon"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["polygon", {}]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["polygon"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["g"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["g", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["path"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["path", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["title", {}]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["x"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["title"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "opentagname",
|
||||
"data": ["animate"]
|
||||
},
|
||||
{
|
||||
"event": "opentag",
|
||||
"data": ["animate", {}]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["animate"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["path"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["g"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n\t"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["svg"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["body"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
},
|
||||
{
|
||||
"event": "closetag",
|
||||
"data": ["html"]
|
||||
},
|
||||
{
|
||||
"event": "text",
|
||||
"data": ["\n"]
|
||||
}
|
||||
]
|
||||
}
|
116
packages/fork-htmlparser2/src/__fixtures__/test-helper.ts
Normal file
116
packages/fork-htmlparser2/src/__fixtures__/test-helper.ts
Normal file
@ -0,0 +1,116 @@
|
||||
import { Parser, Handler, ParserOptions } from "../Parser";
|
||||
import { CollectingHandler } from "../CollectingHandler";
|
||||
import { DomHandlerOptions } from "..";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
export function writeToParser(
|
||||
handler: Partial<Handler>,
|
||||
options: ParserOptions | undefined,
|
||||
data: string
|
||||
) {
|
||||
const parser = new Parser(handler, options);
|
||||
// First, try to run the test via chunks
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
parser.write(data.charAt(i));
|
||||
}
|
||||
parser.end();
|
||||
// Then, parse everything
|
||||
parser.parseComplete(data);
|
||||
}
|
||||
|
||||
interface Event {
|
||||
event: string;
|
||||
data: unknown[];
|
||||
}
|
||||
|
||||
// Returns a tree structure
|
||||
export function getEventCollector(
|
||||
cb: (error: Error | null, events?: Event[]) => void
|
||||
) {
|
||||
const handler = new CollectingHandler({
|
||||
onerror: cb,
|
||||
onend() {
|
||||
cb(null, handler.events.reduce(eventReducer, []));
|
||||
}
|
||||
});
|
||||
|
||||
return handler;
|
||||
}
|
||||
|
||||
function eventReducer(events: Event[], arr: [string, ...unknown[]]): Event[] {
|
||||
if (
|
||||
arr[0] === "onerror" ||
|
||||
arr[0] === "onend" ||
|
||||
arr[0] === "onparserinit"
|
||||
) {
|
||||
// ignore
|
||||
} else if (
|
||||
arr[0] === "ontext" &&
|
||||
events.length &&
|
||||
events[events.length - 1].event === "text"
|
||||
) {
|
||||
// Combine text nodes
|
||||
// @ts-ignore
|
||||
events[events.length - 1].data[0] += arr[1];
|
||||
} else {
|
||||
events.push({
|
||||
event: arr[0].substr(2),
|
||||
data: arr.slice(1)
|
||||
});
|
||||
}
|
||||
|
||||
return events;
|
||||
}
|
||||
|
||||
function getCallback(file: TestFile, done: (err?: Error | null) => void) {
|
||||
let repeated = false;
|
||||
|
||||
return (err: null | Error, actual?: {} | {}[]) => {
|
||||
expect(err).toBeNull();
|
||||
if (file.useSnapshot) {
|
||||
expect(actual).toMatchSnapshot();
|
||||
} else {
|
||||
expect(actual).toEqual(file.expected);
|
||||
}
|
||||
|
||||
if (repeated) done();
|
||||
else repeated = true;
|
||||
};
|
||||
}
|
||||
|
||||
interface TestFile {
|
||||
name: string;
|
||||
options: {
|
||||
parser?: ParserOptions;
|
||||
handler?: DomHandlerOptions;
|
||||
} & Partial<ParserOptions>;
|
||||
html: string;
|
||||
file: string;
|
||||
useSnapshot?: boolean;
|
||||
expected?: {} | {}[];
|
||||
}
|
||||
|
||||
export function createSuite(
|
||||
name: string,
|
||||
getResult: (
|
||||
file: TestFile,
|
||||
done: (error: Error | null, actual?: {} | {}[]) => void
|
||||
) => void
|
||||
) {
|
||||
describe(name, readDir);
|
||||
|
||||
function readDir() {
|
||||
const dir = path.join(__dirname, name);
|
||||
|
||||
fs.readdirSync(dir)
|
||||
.filter(file => !file.startsWith(".") && !file.startsWith("_"))
|
||||
.map(name => path.join(dir, name))
|
||||
.map(require)
|
||||
.forEach(runTest);
|
||||
}
|
||||
|
||||
function runTest(file: TestFile) {
|
||||
test(file.name, done => getResult(file, getCallback(file, done)));
|
||||
}
|
||||
}
|
@ -0,0 +1,205 @@
|
||||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||
|
||||
exports[`Feeds Atom (1.0) 1`] = `
|
||||
Object {
|
||||
"author": "johndoe@example.com",
|
||||
"description": "A subtitle.",
|
||||
"id": "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6",
|
||||
"items": Array [
|
||||
Object {
|
||||
"description": "Some content.",
|
||||
"id": "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
|
||||
"link": "http://example.org/2003/12/13/atom03",
|
||||
"pubDate": 2003-12-13T18:30:02.000Z,
|
||||
"title": "Atom-Powered Robots Run Amok",
|
||||
},
|
||||
],
|
||||
"link": "http://example.org/feed/",
|
||||
"title": "Example Feed",
|
||||
"type": "atom",
|
||||
"updated": 2003-12-13T18:30:02.000Z,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`Feeds Atom (1.0) 2`] = `
|
||||
Object {
|
||||
"author": "johndoe@example.com",
|
||||
"description": "A subtitle.",
|
||||
"id": "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6",
|
||||
"items": Array [
|
||||
Object {
|
||||
"description": "Some content.",
|
||||
"id": "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
|
||||
"link": "http://example.org/2003/12/13/atom03",
|
||||
"pubDate": 2003-12-13T18:30:02.000Z,
|
||||
"title": "Atom-Powered Robots Run Amok",
|
||||
},
|
||||
],
|
||||
"link": "http://example.org/feed/",
|
||||
"title": "Example Feed",
|
||||
"type": "atom",
|
||||
"updated": 2003-12-13T18:30:02.000Z,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`Feeds RDF test 1`] = `
|
||||
Object {
|
||||
"id": "",
|
||||
"items": Array [
|
||||
Object {
|
||||
"description": "Great test content<br>A link: <a href=\\"http://github.com\\">Github</a>",
|
||||
"link": "http://somefakesite/path/to/something.html",
|
||||
"title": "Fast HTML Parsing",
|
||||
},
|
||||
Object {
|
||||
"description": "The early bird gets the worm",
|
||||
"link": "http://somefakesite/path/to/something-else.html",
|
||||
"title": "This space intentionally left blank",
|
||||
},
|
||||
],
|
||||
"link": "https://github.com/fb55/htmlparser2/",
|
||||
"title": "A title to parse and remember",
|
||||
"type": "rdf",
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`Feeds RDF test 2`] = `
|
||||
Object {
|
||||
"id": "",
|
||||
"items": Array [
|
||||
Object {
|
||||
"description": "Great test content<br>A link: <a href=\\"http://github.com\\">Github</a>",
|
||||
"link": "http://somefakesite/path/to/something.html",
|
||||
"title": "Fast HTML Parsing",
|
||||
},
|
||||
Object {
|
||||
"description": "The early bird gets the worm",
|
||||
"link": "http://somefakesite/path/to/something-else.html",
|
||||
"title": "This space intentionally left blank",
|
||||
},
|
||||
],
|
||||
"link": "https://github.com/fb55/htmlparser2/",
|
||||
"title": "A title to parse and remember",
|
||||
"type": "rdf",
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`Feeds RSS (2.0) 1`] = `
|
||||
Object {
|
||||
"author": "editor@example.com",
|
||||
"description": "Liftoff to Space Exploration.",
|
||||
"id": "",
|
||||
"items": Array [
|
||||
Object {
|
||||
"description": "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href=\\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\\">Star City</a>.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573",
|
||||
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp",
|
||||
"pubDate": 2003-06-03T09:39:21.000Z,
|
||||
"title": "Star City",
|
||||
},
|
||||
Object {
|
||||
"description": "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href=\\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\\">partial eclipse of the Sun</a> on Saturday, May 31st.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572",
|
||||
"pubDate": 2003-05-30T11:06:42.000Z,
|
||||
},
|
||||
Object {
|
||||
"description": "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571",
|
||||
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp",
|
||||
"pubDate": 2003-05-27T08:37:32.000Z,
|
||||
"title": "The Engine That Does More",
|
||||
},
|
||||
Object {
|
||||
"description": "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570",
|
||||
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp",
|
||||
"pubDate": 2003-05-20T08:56:02.000Z,
|
||||
"title": "Astronauts' Dirty Laundry",
|
||||
},
|
||||
],
|
||||
"link": "http://liftoff.msfc.nasa.gov/",
|
||||
"title": "Liftoff News",
|
||||
"type": "rss",
|
||||
"updated": 2003-06-10T09:41:01.000Z,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`Feeds RSS (2.0) 2`] = `
|
||||
Object {
|
||||
"author": "editor@example.com",
|
||||
"description": "Liftoff to Space Exploration.",
|
||||
"id": "",
|
||||
"items": Array [
|
||||
Object {
|
||||
"description": "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href=\\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\\">Star City</a>.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573",
|
||||
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp",
|
||||
"pubDate": 2003-06-03T09:39:21.000Z,
|
||||
"title": "Star City",
|
||||
},
|
||||
Object {
|
||||
"description": "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href=\\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\\">partial eclipse of the Sun</a> on Saturday, May 31st.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572",
|
||||
"pubDate": 2003-05-30T11:06:42.000Z,
|
||||
},
|
||||
Object {
|
||||
"description": "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571",
|
||||
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp",
|
||||
"pubDate": 2003-05-27T08:37:32.000Z,
|
||||
"title": "The Engine That Does More",
|
||||
},
|
||||
Object {
|
||||
"description": "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570",
|
||||
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp",
|
||||
"pubDate": 2003-05-20T08:56:02.000Z,
|
||||
"title": "Astronauts' Dirty Laundry",
|
||||
},
|
||||
],
|
||||
"link": "http://liftoff.msfc.nasa.gov/",
|
||||
"title": "Liftoff News",
|
||||
"type": "rss",
|
||||
"updated": 2003-06-10T09:41:01.000Z,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`parseFeed (rssFeed) 1`] = `
|
||||
Object {
|
||||
"author": "editor@example.com",
|
||||
"description": "Liftoff to Space Exploration.",
|
||||
"id": "",
|
||||
"items": Array [
|
||||
Object {
|
||||
"description": "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href=\\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\\">Star City</a>.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573",
|
||||
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp",
|
||||
"pubDate": 2003-06-03T09:39:21.000Z,
|
||||
"title": "Star City",
|
||||
},
|
||||
Object {
|
||||
"description": "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href=\\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\\">partial eclipse of the Sun</a> on Saturday, May 31st.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572",
|
||||
"pubDate": 2003-05-30T11:06:42.000Z,
|
||||
},
|
||||
Object {
|
||||
"description": "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571",
|
||||
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp",
|
||||
"pubDate": 2003-05-27T08:37:32.000Z,
|
||||
"title": "The Engine That Does More",
|
||||
},
|
||||
Object {
|
||||
"description": "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.",
|
||||
"id": "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570",
|
||||
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp",
|
||||
"pubDate": 2003-05-20T08:56:02.000Z,
|
||||
"title": "Astronauts' Dirty Laundry",
|
||||
},
|
||||
],
|
||||
"link": "http://liftoff.msfc.nasa.gov/",
|
||||
"title": "Liftoff News",
|
||||
"type": "rss",
|
||||
"updated": 2003-06-10T09:41:01.000Z,
|
||||
}
|
||||
`;
|
125
packages/fork-htmlparser2/src/__snapshots__/index.spec.ts.snap
Normal file
125
packages/fork-htmlparser2/src/__snapshots__/index.spec.ts.snap
Normal file
@ -0,0 +1,125 @@
|
||||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||
|
||||
exports[`Index createDomStream 1`] = `
|
||||
Array [
|
||||
DataNode {
|
||||
"data": "&This is text",
|
||||
"endIndex": null,
|
||||
"next": DataNode {
|
||||
"data": " and comments ",
|
||||
"endIndex": null,
|
||||
"next": <tags />,
|
||||
"parent": null,
|
||||
"prev": [Circular],
|
||||
"startIndex": null,
|
||||
"type": "comment",
|
||||
},
|
||||
"parent": null,
|
||||
"prev": null,
|
||||
"startIndex": null,
|
||||
"type": "text",
|
||||
},
|
||||
DataNode {
|
||||
"data": " and comments ",
|
||||
"endIndex": null,
|
||||
"next": <tags />,
|
||||
"parent": null,
|
||||
"prev": DataNode {
|
||||
"data": "&This is text",
|
||||
"endIndex": null,
|
||||
"next": [Circular],
|
||||
"parent": null,
|
||||
"prev": null,
|
||||
"startIndex": null,
|
||||
"type": "text",
|
||||
},
|
||||
"startIndex": null,
|
||||
"type": "comment",
|
||||
},
|
||||
<tags />,
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`Index parseDOM 1`] = `
|
||||
Array [
|
||||
<a
|
||||
foo=""
|
||||
>
|
||||
<b>
|
||||
<c>
|
||||
ProcessingInstruction {
|
||||
"data": "?foo",
|
||||
"endIndex": null,
|
||||
"name": "?foo",
|
||||
"next": DataNode {
|
||||
"data": "Yay!",
|
||||
"endIndex": null,
|
||||
"next": null,
|
||||
"parent": <c>
|
||||
[Circular]
|
||||
[Circular]
|
||||
</c>,
|
||||
"prev": [Circular],
|
||||
"startIndex": null,
|
||||
"type": "text",
|
||||
},
|
||||
"parent": <c>
|
||||
[Circular]
|
||||
DataNode {
|
||||
"data": "Yay!",
|
||||
"endIndex": null,
|
||||
"next": null,
|
||||
"parent": <c>
|
||||
[Circular]
|
||||
[Circular]
|
||||
</c>,
|
||||
"prev": [Circular],
|
||||
"startIndex": null,
|
||||
"type": "text",
|
||||
}
|
||||
</c>,
|
||||
"prev": null,
|
||||
"startIndex": null,
|
||||
"type": "directive",
|
||||
}
|
||||
DataNode {
|
||||
"data": "Yay!",
|
||||
"endIndex": null,
|
||||
"next": null,
|
||||
"parent": <c>
|
||||
ProcessingInstruction {
|
||||
"data": "?foo",
|
||||
"endIndex": null,
|
||||
"name": "?foo",
|
||||
"next": [Circular],
|
||||
"parent": <c>
|
||||
[Circular]
|
||||
[Circular]
|
||||
</c>,
|
||||
"prev": null,
|
||||
"startIndex": null,
|
||||
"type": "directive",
|
||||
}
|
||||
[Circular]
|
||||
</c>,
|
||||
"prev": ProcessingInstruction {
|
||||
"data": "?foo",
|
||||
"endIndex": null,
|
||||
"name": "?foo",
|
||||
"next": [Circular],
|
||||
"parent": <c>
|
||||
[Circular]
|
||||
[Circular]
|
||||
</c>,
|
||||
"prev": null,
|
||||
"startIndex": null,
|
||||
"type": "directive",
|
||||
},
|
||||
"startIndex": null,
|
||||
"type": "text",
|
||||
}
|
||||
</c>
|
||||
</b>
|
||||
</a>,
|
||||
]
|
||||
`;
|
9
packages/fork-htmlparser2/src/__tests__/events.ts
Normal file
9
packages/fork-htmlparser2/src/__tests__/events.ts
Normal file
@ -0,0 +1,9 @@
|
||||
import * as helper from "../__fixtures__/test-helper";
|
||||
|
||||
helper.createSuite("Events", (test, cb) =>
|
||||
helper.writeToParser(
|
||||
helper.getEventCollector(cb),
|
||||
test.options.parser,
|
||||
test.html
|
||||
)
|
||||
);
|
33
packages/fork-htmlparser2/src/__tests__/stream.ts
Normal file
33
packages/fork-htmlparser2/src/__tests__/stream.ts
Normal file
@ -0,0 +1,33 @@
|
||||
import * as helper from "../__fixtures__/test-helper";
|
||||
import { WritableStream } from "../WritableStream";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
helper.createSuite("Stream", (test, cb) => {
|
||||
const filePath = path.join(
|
||||
__dirname,
|
||||
"..",
|
||||
"__fixtures__",
|
||||
"Documents",
|
||||
test.file
|
||||
);
|
||||
|
||||
fs.createReadStream(filePath)
|
||||
.pipe(
|
||||
new WritableStream(
|
||||
helper.getEventCollector((err, events) => {
|
||||
cb(err, events);
|
||||
|
||||
const handler = helper.getEventCollector(cb);
|
||||
const stream = new WritableStream(handler, test.options);
|
||||
|
||||
fs.readFile(filePath, (err, data) => {
|
||||
if (err) throw err;
|
||||
stream.end(data);
|
||||
});
|
||||
}),
|
||||
test.options
|
||||
)
|
||||
)
|
||||
.on("error", cb);
|
||||
});
|
36
packages/fork-htmlparser2/src/index.spec.ts
Normal file
36
packages/fork-htmlparser2/src/index.spec.ts
Normal file
@ -0,0 +1,36 @@
|
||||
import { parseDOM, createDomStream } from ".";
|
||||
import { Element } from "domhandler";
|
||||
|
||||
// Add an `attributes` prop to the Element for now, to make it possible for Jest to render DOM nodes.
|
||||
Object.defineProperty(Element.prototype, "attributes", {
|
||||
get() {
|
||||
return Object.keys(this.attribs).map(name => ({
|
||||
name,
|
||||
value: this.attribs[name]
|
||||
}));
|
||||
},
|
||||
configurable: true,
|
||||
enumerable: false
|
||||
});
|
||||
|
||||
describe("Index", () => {
|
||||
test("parseDOM", () => {
|
||||
const dom = parseDOM("<a foo><b><c><?foo>Yay!");
|
||||
expect(dom).toMatchSnapshot();
|
||||
});
|
||||
|
||||
test("createDomStream", done => {
|
||||
const domStream = createDomStream((err, dom) => {
|
||||
expect(err).toBeNull();
|
||||
expect(dom).toMatchSnapshot();
|
||||
|
||||
done();
|
||||
});
|
||||
|
||||
for (const c of "&This is text<!-- and comments --><tags>") {
|
||||
domStream.write(c);
|
||||
}
|
||||
|
||||
domStream.end();
|
||||
});
|
||||
});
|
77
packages/fork-htmlparser2/src/index.ts
Normal file
77
packages/fork-htmlparser2/src/index.ts
Normal file
@ -0,0 +1,77 @@
|
||||
import { Parser, ParserOptions } from "./Parser";
|
||||
export { Parser, ParserOptions };
|
||||
|
||||
import { DomHandler, DomHandlerOptions, Node, Element } from "domhandler";
|
||||
|
||||
export { DomHandler, DomHandlerOptions };
|
||||
|
||||
type Options = ParserOptions & DomHandlerOptions;
|
||||
|
||||
// Helper methods
|
||||
|
||||
/**
|
||||
* Parses data, returns the resulting DOM.
|
||||
*
|
||||
* @param data The data that should be parsed.
|
||||
* @param options Optional options for the parser and DOM builder.
|
||||
*/
|
||||
export function parseDOM(data: string, options?: Options): Node[] {
|
||||
const handler = new DomHandler(void 0, options);
|
||||
new Parser(handler, options).end(data);
|
||||
return handler.dom;
|
||||
}
|
||||
/**
|
||||
* Creates a parser instance, with an attached DOM handler.
|
||||
*
|
||||
* @param cb A callback that will be called once parsing has been completed.
|
||||
* @param options Optional options for the parser and DOM builder.
|
||||
* @param elementCb An optional callback that will be called every time a tag has been completed inside of the DOM.
|
||||
*/
|
||||
export function createDomStream(
|
||||
cb: (error: Error | null, dom: Node[]) => void,
|
||||
options?: Options,
|
||||
elementCb?: (element: Element) => void
|
||||
) {
|
||||
const handler = new DomHandler(cb, options, elementCb);
|
||||
return new Parser(handler, options);
|
||||
}
|
||||
|
||||
export { default as Tokenizer } from "./Tokenizer";
|
||||
import * as ElementType from "domelementtype";
|
||||
export { ElementType };
|
||||
|
||||
/**
|
||||
* List of all events that the parser emits.
|
||||
*
|
||||
* Format: eventname: number of arguments.
|
||||
*/
|
||||
export const EVENTS = {
|
||||
attribute: 2,
|
||||
cdatastart: 0,
|
||||
cdataend: 0,
|
||||
text: 1,
|
||||
processinginstruction: 2,
|
||||
comment: 1,
|
||||
commentend: 0,
|
||||
closetag: 1,
|
||||
opentag: 2,
|
||||
opentagname: 1,
|
||||
error: 1,
|
||||
end: 0
|
||||
};
|
||||
|
||||
/*
|
||||
All of the following exports exist for backwards-compatibility.
|
||||
They should probably be removed eventually.
|
||||
*/
|
||||
|
||||
export * from "./FeedHandler";
|
||||
export * from "./WritableStream";
|
||||
export * from "./CollectingHandler";
|
||||
|
||||
import * as DomUtils from "domutils";
|
||||
export { DomUtils };
|
||||
|
||||
// Old names for Dom- & FeedHandler
|
||||
export { DomHandler as DefaultHandler };
|
||||
export { FeedHandler as RssHandler } from "./FeedHandler";
|
Reference in New Issue
Block a user