You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-08-10 22:11:50 +02:00
Chore: Refactor htmlpack for mobile compatibility (#12174)
This commit is contained in:
@@ -1021,7 +1021,10 @@ packages/generator-joplin/generators/app/templates/api/types.js
|
||||
packages/generator-joplin/generators/app/templates/api_index.js
|
||||
packages/generator-joplin/generators/app/templates/src/index.js
|
||||
packages/generator-joplin/tools/updateCategories.js
|
||||
packages/htmlpack/src/index.js
|
||||
packages/htmlpack/index.test.js
|
||||
packages/htmlpack/index.js
|
||||
packages/htmlpack/packToString.js
|
||||
packages/htmlpack/utils/parseHtmlAsync.js
|
||||
packages/lib/ArrayUtils.js
|
||||
packages/lib/AsyncActionQueue.test.js
|
||||
packages/lib/AsyncActionQueue.js
|
||||
|
5
.gitignore
vendored
5
.gitignore
vendored
@@ -995,7 +995,10 @@ packages/generator-joplin/generators/app/templates/api/types.js
|
||||
packages/generator-joplin/generators/app/templates/api_index.js
|
||||
packages/generator-joplin/generators/app/templates/src/index.js
|
||||
packages/generator-joplin/tools/updateCategories.js
|
||||
packages/htmlpack/src/index.js
|
||||
packages/htmlpack/index.test.js
|
||||
packages/htmlpack/index.js
|
||||
packages/htmlpack/packToString.js
|
||||
packages/htmlpack/utils/parseHtmlAsync.js
|
||||
packages/lib/ArrayUtils.js
|
||||
packages/lib/AsyncActionQueue.test.js
|
||||
packages/lib/AsyncActionQueue.js
|
||||
|
@@ -27,6 +27,7 @@ const localPackages = {
|
||||
'@joplin/react-native-saf-x': path.resolve(__dirname, '../react-native-saf-x/'),
|
||||
'@joplin/react-native-alarm-notification': path.resolve(__dirname, '../react-native-alarm-notification/'),
|
||||
'@joplin/fork-sax': path.resolve(__dirname, '../fork-sax/'),
|
||||
'@joplin/htmlpack': path.resolve(__dirname, '../htmlpack/'),
|
||||
};
|
||||
|
||||
// cSpell:disable
|
||||
|
3
packages/htmlpack/.gitignore
vendored
3
packages/htmlpack/.gitignore
vendored
@@ -1 +1,2 @@
|
||||
dist/*
|
||||
dist/*
|
||||
test-output/
|
2
packages/htmlpack/.npmignore
Normal file
2
packages/htmlpack/.npmignore
Normal file
@@ -0,0 +1,2 @@
|
||||
test-data/
|
||||
test-output/
|
35
packages/htmlpack/index.test.ts
Normal file
35
packages/htmlpack/index.test.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { exists, mkdir, readFile, remove } from 'fs-extra';
|
||||
import { join } from 'path';
|
||||
import htmlpack from '.';
|
||||
|
||||
const outputDirectory = './test-output';
|
||||
|
||||
describe('htmlpack/index', () => {
|
||||
beforeEach(async () => {
|
||||
if (await exists(outputDirectory)) {
|
||||
await remove(outputDirectory);
|
||||
}
|
||||
await mkdir(outputDirectory);
|
||||
});
|
||||
|
||||
test('should convert HTML into a single file', async () => {
|
||||
const outputFile = join(outputDirectory, 'output.html');
|
||||
await htmlpack(join('test-data', 'index.html'), outputFile);
|
||||
|
||||
const outputContent = await readFile(outputFile, 'utf8');
|
||||
expect(outputContent).toBe(`
|
||||
<html>
|
||||
<head>
|
||||
<style>* {
|
||||
color: red;
|
||||
}</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Test</h1>
|
||||
<a href="data:text/plain;base64,UmVzb3VyY2Uu" download="resource.txt">Test link.</a>
|
||||
<img src="" alt="test image"/>
|
||||
<p>Test paragraph</p>
|
||||
</body>
|
||||
</html>`);
|
||||
});
|
||||
});
|
28
packages/htmlpack/index.ts
Normal file
28
packages/htmlpack/index.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import * as fs from 'fs-extra';
|
||||
const Datauri = require('datauri/sync');
|
||||
import { dirname } from 'path';
|
||||
import packToString from './packToString';
|
||||
|
||||
const dataUriEncode = (filePath: string): string => {
|
||||
const result = Datauri(filePath);
|
||||
return result.content;
|
||||
};
|
||||
|
||||
export default async function htmlpack(inputFile: string, outputFile: string): Promise<void> {
|
||||
const inputHtml = await fs.readFile(inputFile, 'utf8');
|
||||
const baseDir = dirname(inputFile);
|
||||
|
||||
const output = await packToString(baseDir, inputHtml, {
|
||||
exists(path: string) {
|
||||
return fs.exists(path);
|
||||
},
|
||||
readFileText(path: string) {
|
||||
return fs.readFile(path, 'utf8');
|
||||
},
|
||||
async readFileDataUri(path: string) {
|
||||
return dataUriEncode(path);
|
||||
},
|
||||
});
|
||||
|
||||
await fs.writeFile(outputFile, output, 'utf8');
|
||||
}
|
4
packages/htmlpack/jest.config.js
Normal file
4
packages/htmlpack/jest.config.js
Normal file
@@ -0,0 +1,4 @@
|
||||
module.exports = {
|
||||
testMatch: ['**/*.test.js'],
|
||||
testPathIgnorePatterns: ['<rootDir>/node_modules/'],
|
||||
};
|
254
packages/htmlpack/packToString.ts
Normal file
254
packages/htmlpack/packToString.ts
Normal file
@@ -0,0 +1,254 @@
|
||||
const Entities = require('html-entities').AllHtmlEntities;
|
||||
import { CssTypes, parse as cssParse, stringify as cssStringify } from '@adobe/css-tools';
|
||||
import { dirname, basename } from 'path';
|
||||
import parseHtmlAsync, { HtmlAttrs } from './utils/parseHtmlAsync';
|
||||
|
||||
const selfClosingElements = [
|
||||
'area',
|
||||
'base',
|
||||
'basefont',
|
||||
'br',
|
||||
'col',
|
||||
'command',
|
||||
'embed',
|
||||
'frame',
|
||||
'hr',
|
||||
'img',
|
||||
'input',
|
||||
'isindex',
|
||||
'keygen',
|
||||
'link',
|
||||
'meta',
|
||||
'param',
|
||||
'source',
|
||||
'track',
|
||||
'wbr',
|
||||
];
|
||||
|
||||
const htmlentities = (s: string): string => {
|
||||
const output = (new Entities()).encode(s);
|
||||
return output.replace(/	/ig, '\t');
|
||||
};
|
||||
|
||||
const attributesHtml = (attrs: HtmlAttrs) => {
|
||||
const output: string[] = [];
|
||||
|
||||
for (const n in attrs) {
|
||||
if (!attrs.hasOwnProperty(n)) continue;
|
||||
output.push(`${n}="${htmlentities(attrs[n])}"`);
|
||||
}
|
||||
|
||||
return output.join(' ');
|
||||
};
|
||||
|
||||
const attrValue = (attrs: HtmlAttrs, name: string): string => {
|
||||
if (!attrs[name]) return '';
|
||||
return attrs[name];
|
||||
};
|
||||
|
||||
const isSelfClosingTag = (tagName: string) => {
|
||||
return selfClosingElements.includes(tagName.toLowerCase());
|
||||
};
|
||||
|
||||
export type FileApi = {
|
||||
exists(path: string): Promise<boolean>;
|
||||
readFileText(path: string): Promise<string>;
|
||||
readFileDataUri(path: string): Promise<string>;
|
||||
};
|
||||
|
||||
// packToString should be able to run in React Native -- don't use fs-extra.
|
||||
const packToString = async (baseDir: string, inputFileText: string, fs: FileApi) => {
|
||||
const readFileDataUriSafe = async (path: string) => {
|
||||
try {
|
||||
return await fs.readFileDataUri(path);
|
||||
} catch (error) {
|
||||
// If the file path is invalid, the Datauri will throw an exception.
|
||||
// Instead, since we can just ignore that particular file.
|
||||
// Fixes https://github.com/laurent22/joplin/issues/8305
|
||||
return '';
|
||||
}
|
||||
};
|
||||
|
||||
const processCssContent = async (cssBaseDir: string, content: string) => {
|
||||
const o = cssParse(content, {
|
||||
silent: false,
|
||||
});
|
||||
|
||||
for (const rule of o.stylesheet.rules) {
|
||||
if (rule.type === 'font-face') {
|
||||
for (const declaration of rule.declarations) {
|
||||
if (declaration.type === CssTypes.comment) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (declaration.property === 'src') {
|
||||
const replacements = new Map<string, string>();
|
||||
const replacementTasks: Promise<void>[] = [];
|
||||
declaration.value.replace(/url\((.*?)\)/g, (match: string, url: string) => {
|
||||
if (replacements.has(url)) return match;
|
||||
replacements.set(url, match);
|
||||
|
||||
replacementTasks.push((async () => {
|
||||
const cssFilePath = `${cssBaseDir}/${url}`;
|
||||
let replacement;
|
||||
if (await fs.exists(cssFilePath)) {
|
||||
replacement = `url(${await readFileDataUriSafe(cssFilePath)})`;
|
||||
} else {
|
||||
replacement = `url(${url})`;
|
||||
}
|
||||
replacements.set(url, replacement);
|
||||
})());
|
||||
|
||||
return match;
|
||||
});
|
||||
|
||||
await Promise.all(replacementTasks);
|
||||
|
||||
declaration.value = declaration.value.replace(/url\((.*?)\)/g, (_match: string, url: string) => {
|
||||
return replacements.get(url);
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return cssStringify(o);
|
||||
};
|
||||
|
||||
const processLinkTag = async (_name: string, attrs: HtmlAttrs) => {
|
||||
const href = attrValue(attrs, 'href');
|
||||
if (!href) return null;
|
||||
|
||||
const filePath = `${baseDir}/${href}`;
|
||||
|
||||
if (!await fs.exists(filePath)) return null;
|
||||
const content = await fs.readFileText(filePath);
|
||||
return `<style>${await processCssContent(dirname(filePath), content)}</style>`;
|
||||
};
|
||||
|
||||
const processScriptTag = async (_name: string, attrs: HtmlAttrs) => {
|
||||
const src = attrValue(attrs, 'src');
|
||||
if (!src) return null;
|
||||
|
||||
const scriptFilePath = `${baseDir}/${src}`;
|
||||
let content = await fs.readFileText(scriptFilePath);
|
||||
|
||||
// There's no simple way to insert arbitrary content in <script> tags.
|
||||
// Encoding HTML entities doesn't work because the JS parser will not decode
|
||||
// them before parsing. We also can't put the code verbatim since it may
|
||||
// contain strings such as `</script>` or `<!--` which would break the HTML
|
||||
// file.
|
||||
//
|
||||
// So it seems the only way is to escape these specific sequences with a
|
||||
// backslash. It shouldn't break the JS code and should allow the HTML
|
||||
// parser to work as expected.
|
||||
//
|
||||
// https://stackoverflow.com/a/41302266/561309
|
||||
|
||||
content = content.replace(/<script>/g, '<\\script>');
|
||||
content = content.replace(/<\/script>/g, '<\\/script>');
|
||||
content = content.replace(/<!--/g, '<\\!--');
|
||||
|
||||
return `<script>${content}</script>`;
|
||||
};
|
||||
|
||||
const processImgTag = async (_name: string, attrs: HtmlAttrs) => {
|
||||
const src = attrValue(attrs, 'src');
|
||||
if (!src) return null;
|
||||
|
||||
const filePath = `${baseDir}/${src}`;
|
||||
if (!await fs.exists(filePath)) return null;
|
||||
|
||||
const modAttrs = { ...attrs };
|
||||
delete modAttrs.src;
|
||||
return `<img src="${await readFileDataUriSafe(filePath)}" ${attributesHtml(modAttrs)}/>`;
|
||||
};
|
||||
|
||||
const processAnchorTag = async (_name: string, attrs: HtmlAttrs) => {
|
||||
const href = attrValue(attrs, 'href');
|
||||
if (!href) return null;
|
||||
|
||||
const filePath = `${baseDir}/${href}`;
|
||||
if (!await fs.exists(filePath)) return null;
|
||||
|
||||
const modAttrs = { ...attrs };
|
||||
modAttrs.href = await readFileDataUriSafe(filePath);
|
||||
modAttrs.download = basename(href);
|
||||
return `<a ${attributesHtml(modAttrs)}>`;
|
||||
};
|
||||
|
||||
const output: string[] = [];
|
||||
|
||||
interface Tag {
|
||||
name: string;
|
||||
}
|
||||
|
||||
const tagStack: Tag[] = [];
|
||||
|
||||
const currentTag = () => {
|
||||
if (!tagStack.length) return { name: '', processed: false };
|
||||
return tagStack[tagStack.length - 1];
|
||||
};
|
||||
|
||||
await parseHtmlAsync(inputFileText, {
|
||||
onopentag: async (name: string, attrs: HtmlAttrs) => {
|
||||
name = name.toLowerCase();
|
||||
|
||||
let processedResult = '';
|
||||
|
||||
if (name === 'link') {
|
||||
processedResult = await processLinkTag(name, attrs);
|
||||
}
|
||||
|
||||
if (name === 'script') {
|
||||
processedResult = await processScriptTag(name, attrs);
|
||||
}
|
||||
|
||||
if (name === 'img') {
|
||||
processedResult = await processImgTag(name, attrs);
|
||||
}
|
||||
|
||||
if (name === 'a') {
|
||||
processedResult = await processAnchorTag(name, attrs);
|
||||
}
|
||||
|
||||
tagStack.push({ name });
|
||||
|
||||
if (processedResult) {
|
||||
output.push(processedResult);
|
||||
} else {
|
||||
let attrHtml = attributesHtml(attrs);
|
||||
if (attrHtml) attrHtml = ` ${attrHtml}`;
|
||||
const closingSign = isSelfClosingTag(name) ? '/>' : '>';
|
||||
output.push(`<${name}${attrHtml}${closingSign}`);
|
||||
}
|
||||
},
|
||||
|
||||
ontext: async (decodedText: string) => {
|
||||
if (currentTag().name === 'style') {
|
||||
// For CSS, we have to put the style as-is inside the tag because if we html-entities encode
|
||||
// it, it's not going to work. But it's ok because JavaScript won't run within the style tag.
|
||||
// Ideally CSS should be loaded from an external file.
|
||||
output.push(decodedText);
|
||||
} else {
|
||||
output.push(htmlentities(decodedText));
|
||||
}
|
||||
},
|
||||
|
||||
onclosetag: async (name: string) => {
|
||||
const current = currentTag();
|
||||
|
||||
if (current.name === name.toLowerCase()) tagStack.pop();
|
||||
|
||||
if (isSelfClosingTag(name)) return;
|
||||
output.push(`</${name}>`);
|
||||
},
|
||||
|
||||
});
|
||||
|
||||
return output.join('');
|
||||
};
|
||||
|
||||
export default packToString;
|
||||
|
@@ -3,13 +3,24 @@
|
||||
"version": "3.3.1",
|
||||
"description": "Pack an HTML file and all its linked resources into a single HTML file",
|
||||
"main": "dist/index.js",
|
||||
"types": "src/index.ts",
|
||||
"types": "index.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"default": "./dist/index.js",
|
||||
"types": "./index.ts"
|
||||
},
|
||||
"./packToString": {
|
||||
"default": "./dist/packToString.js",
|
||||
"types": "./packToString.ts"
|
||||
}
|
||||
},
|
||||
"publishConfig": {
|
||||
"access": "public"
|
||||
},
|
||||
"scripts": {
|
||||
"tsc": "tsc --project tsconfig.json",
|
||||
"watch": "tsc --watch --preserveWatchOutput --project tsconfig.json"
|
||||
"watch": "tsc --watch --preserveWatchOutput --project tsconfig.json",
|
||||
"test": "jest"
|
||||
},
|
||||
"author": "Laurent Cozic",
|
||||
"license": "MIT",
|
||||
@@ -21,7 +32,10 @@
|
||||
"html-entities": "1.4.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/fs-extra": "11.0.4"
|
||||
"@types/fs-extra": "11.0.4",
|
||||
"@types/jest": "29.5.12",
|
||||
"jest": "29.7.0",
|
||||
"typescript": "5.4.5"
|
||||
},
|
||||
"gitHead": "05a29b450962bf05a8642bbd39446a1f679a96ba"
|
||||
}
|
||||
|
@@ -1,257 +0,0 @@
|
||||
import * as fs from 'fs-extra';
|
||||
import { pathExistsSync } from 'fs-extra';
|
||||
const Entities = require('html-entities').AllHtmlEntities;
|
||||
const htmlparser2 = require('@joplin/fork-htmlparser2');
|
||||
const Datauri = require('datauri/sync');
|
||||
import { CssTypes, parse as cssParse, stringify as cssStringify } from '@adobe/css-tools';
|
||||
|
||||
const selfClosingElements = [
|
||||
'area',
|
||||
'base',
|
||||
'basefont',
|
||||
'br',
|
||||
'col',
|
||||
'command',
|
||||
'embed',
|
||||
'frame',
|
||||
'hr',
|
||||
'img',
|
||||
'input',
|
||||
'isindex',
|
||||
'keygen',
|
||||
'link',
|
||||
'meta',
|
||||
'param',
|
||||
'source',
|
||||
'track',
|
||||
'wbr',
|
||||
];
|
||||
|
||||
const htmlentities = (s: string): string => {
|
||||
const output = (new Entities()).encode(s);
|
||||
return output.replace(/	/ig, '\t');
|
||||
};
|
||||
|
||||
const dataUriEncode = (filePath: string): string => {
|
||||
try {
|
||||
const result = Datauri(filePath);
|
||||
return result.content;
|
||||
} catch (error) {
|
||||
// If the file path is invalid, the Datauri will throw an exception.
|
||||
// Instead, since we can just ignore that particular file.
|
||||
// Fixes https://github.com/laurent22/joplin/issues/8305
|
||||
return '';
|
||||
}
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
||||
const attributesHtml = (attr: any) => {
|
||||
const output = [];
|
||||
|
||||
for (const n in attr) {
|
||||
if (!attr.hasOwnProperty(n)) continue;
|
||||
output.push(`${n}="${htmlentities(attr[n])}"`);
|
||||
}
|
||||
|
||||
return output.join(' ');
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
||||
const attrValue = (attrs: any, name: string): string => {
|
||||
if (!attrs[name]) return '';
|
||||
return attrs[name];
|
||||
};
|
||||
|
||||
const isSelfClosingTag = (tagName: string) => {
|
||||
return selfClosingElements.includes(tagName.toLowerCase());
|
||||
};
|
||||
|
||||
const processCssContent = (cssBaseDir: string, content: string): string => {
|
||||
const o = cssParse(content, {
|
||||
silent: false,
|
||||
});
|
||||
|
||||
for (const rule of o.stylesheet.rules) {
|
||||
if (rule.type === 'font-face') {
|
||||
for (const declaration of rule.declarations) {
|
||||
if (declaration.type === CssTypes.comment) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (declaration.property === 'src') {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
||||
declaration.value = declaration.value.replace(/url\((.*?)\)/g, (_v: any, url: string) => {
|
||||
const cssFilePath = `${cssBaseDir}/${url}`;
|
||||
if (fs.existsSync(cssFilePath)) {
|
||||
return `url(${dataUriEncode(cssFilePath)})`;
|
||||
} else {
|
||||
return `url(${url})`;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return cssStringify(o);
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
||||
const processLinkTag = (baseDir: string, _name: string, attrs: any): string => {
|
||||
const href = attrValue(attrs, 'href');
|
||||
if (!href) return null;
|
||||
|
||||
const filePath = `${baseDir}/${href}`;
|
||||
|
||||
if (!pathExistsSync(filePath)) return null;
|
||||
const content = fs.readFileSync(filePath, 'utf8');
|
||||
return `<style>${processCssContent(dirname(filePath), content)}</style>`;
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
||||
const processScriptTag = (baseDir: string, _name: string, attrs: any): string => {
|
||||
const src = attrValue(attrs, 'src');
|
||||
if (!src) return null;
|
||||
|
||||
const scriptFilePath = `${baseDir}/${src}`;
|
||||
let content = fs.readFileSync(scriptFilePath, 'utf8');
|
||||
|
||||
// There's no simple way to insert arbitrary content in <script> tags.
|
||||
// Encoding HTML entities doesn't work because the JS parser will not decode
|
||||
// them before parsing. We also can't put the code verbatim since it may
|
||||
// contain strings such as `</script>` or `<!--` which would break the HTML
|
||||
// file.
|
||||
//
|
||||
// So it seems the only way is to escape these specific sequences with a
|
||||
// backslash. It shouldn't break the JS code and should allow the HTML
|
||||
// parser to work as expected.
|
||||
//
|
||||
// https://stackoverflow.com/a/41302266/561309
|
||||
|
||||
content = content.replace(/<script>/g, '<\\script>');
|
||||
content = content.replace(/<\/script>/g, '<\\/script>');
|
||||
content = content.replace(/<!--/g, '<\\!--');
|
||||
|
||||
return `<script>${content}</script>`;
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
||||
const processImgTag = (baseDir: string, _name: string, attrs: any): string => {
|
||||
const src = attrValue(attrs, 'src');
|
||||
if (!src) return null;
|
||||
|
||||
const filePath = `${baseDir}/${src}`;
|
||||
if (!fs.existsSync(filePath)) return null;
|
||||
|
||||
const modAttrs = { ...attrs };
|
||||
delete modAttrs.src;
|
||||
return `<img src="${dataUriEncode(filePath)}" ${attributesHtml(modAttrs)}/>`;
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
||||
const processAnchorTag = (baseDir: string, _name: string, attrs: any): string => {
|
||||
const href = attrValue(attrs, 'href');
|
||||
if (!href) return null;
|
||||
|
||||
const filePath = `${baseDir}/${href}`;
|
||||
if (!fs.existsSync(filePath)) return null;
|
||||
|
||||
const modAttrs = { ...attrs };
|
||||
modAttrs.href = dataUriEncode(filePath);
|
||||
modAttrs.download = basename(filePath);
|
||||
return `<a ${attributesHtml(modAttrs)}>`;
|
||||
};
|
||||
|
||||
function basename(path: string) {
|
||||
if (!path) throw new Error('Path is empty');
|
||||
const s = path.split(/\/|\\/);
|
||||
return s[s.length - 1];
|
||||
}
|
||||
|
||||
function dirname(path: string) {
|
||||
if (!path) throw new Error('Path is empty');
|
||||
const s = path.split(/\/|\\/);
|
||||
s.pop();
|
||||
return s.join('/');
|
||||
}
|
||||
|
||||
export default async function htmlpack(inputFile: string, outputFile: string): Promise<void> {
|
||||
const inputHtml = await fs.readFile(inputFile, 'utf8');
|
||||
const baseDir = dirname(inputFile);
|
||||
|
||||
const output: string[] = [];
|
||||
|
||||
interface Tag {
|
||||
name: string;
|
||||
}
|
||||
|
||||
const tagStack: Tag[] = [];
|
||||
|
||||
const currentTag = () => {
|
||||
if (!tagStack.length) return { name: '', processed: false };
|
||||
return tagStack[tagStack.length - 1];
|
||||
};
|
||||
|
||||
const parser = new htmlparser2.Parser({
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
||||
onopentag: (name: string, attrs: any) => {
|
||||
name = name.toLowerCase();
|
||||
|
||||
let processedResult = '';
|
||||
|
||||
if (name === 'link') {
|
||||
processedResult = processLinkTag(baseDir, name, attrs);
|
||||
}
|
||||
|
||||
if (name === 'script') {
|
||||
processedResult = processScriptTag(baseDir, name, attrs);
|
||||
}
|
||||
|
||||
if (name === 'img') {
|
||||
processedResult = processImgTag(baseDir, name, attrs);
|
||||
}
|
||||
|
||||
if (name === 'a') {
|
||||
processedResult = processAnchorTag(baseDir, name, attrs);
|
||||
}
|
||||
|
||||
tagStack.push({ name });
|
||||
|
||||
if (processedResult) {
|
||||
output.push(processedResult);
|
||||
} else {
|
||||
let attrHtml = attributesHtml(attrs);
|
||||
if (attrHtml) attrHtml = ` ${attrHtml}`;
|
||||
const closingSign = isSelfClosingTag(name) ? '/>' : '>';
|
||||
output.push(`<${name}${attrHtml}${closingSign}`);
|
||||
}
|
||||
},
|
||||
|
||||
ontext: (decodedText: string) => {
|
||||
if (currentTag().name === 'style') {
|
||||
// For CSS, we have to put the style as-is inside the tag because if we html-entities encode
|
||||
// it, it's not going to work. But it's ok because JavaScript won't run within the style tag.
|
||||
// Ideally CSS should be loaded from an external file.
|
||||
output.push(decodedText);
|
||||
} else {
|
||||
output.push(htmlentities(decodedText));
|
||||
}
|
||||
},
|
||||
|
||||
onclosetag: (name: string) => {
|
||||
const current = currentTag();
|
||||
|
||||
if (current.name === name.toLowerCase()) tagStack.pop();
|
||||
|
||||
if (isSelfClosingTag(name)) return;
|
||||
output.push(`</${name}>`);
|
||||
},
|
||||
|
||||
}, { decodeEntities: true });
|
||||
|
||||
parser.write(inputHtml);
|
||||
parser.end();
|
||||
|
||||
await fs.writeFile(outputFile, output.join(''), 'utf8');
|
||||
}
|
1
packages/htmlpack/test-data/image.svg
Normal file
1
packages/htmlpack/test-data/image.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg viewBox="-95 -96 208 208" width="208" height="208" version="1.1" baseProfile="full" xmlns="http://www.w3.org/2000/svg"><text style="font-size: 64px; fill: red;">Test</text></svg>
|
After Width: | Height: | Size: 183 B |
12
packages/htmlpack/test-data/index.html
Normal file
12
packages/htmlpack/test-data/index.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="./style.css" rel="stylesheet"/>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Test</h1>
|
||||
<a href="./resource.txt">Test link.</a>
|
||||
<img src="./image.svg" alt="test image"/>
|
||||
<p>Test paragraph</p>
|
||||
</body>
|
||||
</html>
|
1
packages/htmlpack/test-data/resource.txt
Normal file
1
packages/htmlpack/test-data/resource.txt
Normal file
@@ -0,0 +1 @@
|
||||
Resource.
|
3
packages/htmlpack/test-data/style.css
Normal file
3
packages/htmlpack/test-data/style.css
Normal file
@@ -0,0 +1,3 @@
|
||||
* {
|
||||
color: red;
|
||||
}
|
79
packages/htmlpack/utils/parseHtmlAsync.ts
Normal file
79
packages/htmlpack/utils/parseHtmlAsync.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
const htmlparser2 = require('@joplin/fork-htmlparser2');
|
||||
|
||||
export type HtmlAttrs = Record<string, string>;
|
||||
|
||||
interface Callbacks {
|
||||
onopentag: (name: string, attrs: HtmlAttrs)=> Promise<void>;
|
||||
ontext: (text: string)=> Promise<void>;
|
||||
onclosetag: (name: string)=> Promise<void>;
|
||||
}
|
||||
|
||||
enum EventTypes {
|
||||
OpenTag,
|
||||
Text,
|
||||
CloseTag,
|
||||
}
|
||||
|
||||
interface OpenTagEvent {
|
||||
type: EventTypes.OpenTag;
|
||||
name: string;
|
||||
attrs: HtmlAttrs;
|
||||
}
|
||||
|
||||
interface TextEvent {
|
||||
type: EventTypes.Text;
|
||||
decodedText: string;
|
||||
}
|
||||
|
||||
interface CloseTagEvent {
|
||||
type: EventTypes.CloseTag;
|
||||
name: string;
|
||||
}
|
||||
|
||||
type ParserEvent = OpenTagEvent|TextEvent|CloseTagEvent;
|
||||
|
||||
const parseHtmlAsync = async (html: string, callbacks: Callbacks) => {
|
||||
const events: ParserEvent[] = [];
|
||||
const parser = new htmlparser2.Parser({
|
||||
onopentag: (name: string, attrs: HtmlAttrs) => {
|
||||
events.push({
|
||||
type: EventTypes.OpenTag,
|
||||
name,
|
||||
attrs,
|
||||
});
|
||||
},
|
||||
|
||||
ontext: (decodedText: string) => {
|
||||
events.push({
|
||||
type: EventTypes.Text,
|
||||
decodedText,
|
||||
});
|
||||
},
|
||||
|
||||
onclosetag: (name: string) => {
|
||||
events.push({
|
||||
type: EventTypes.CloseTag,
|
||||
name,
|
||||
});
|
||||
},
|
||||
|
||||
}, { decodeEntities: true });
|
||||
|
||||
parser.write(html);
|
||||
parser.end();
|
||||
|
||||
for (const event of events) {
|
||||
if (event.type === EventTypes.OpenTag) {
|
||||
await callbacks.onopentag(event.name, event.attrs);
|
||||
} else if (event.type === EventTypes.CloseTag) {
|
||||
await callbacks.onclosetag(event.name);
|
||||
} else if (event.type === EventTypes.Text) {
|
||||
await callbacks.ontext(event.decodedText);
|
||||
} else {
|
||||
const exhaustivenessCheck: never = event;
|
||||
throw new Error(`Unknown event type: ${exhaustivenessCheck}`);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
export default parseHtmlAsync;
|
Reference in New Issue
Block a user