You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-11-23 22:36:32 +02:00
Chore: Refactor htmlpack for mobile compatibility (#12174)
This commit is contained in:
@@ -1021,7 +1021,10 @@ packages/generator-joplin/generators/app/templates/api/types.js
|
|||||||
packages/generator-joplin/generators/app/templates/api_index.js
|
packages/generator-joplin/generators/app/templates/api_index.js
|
||||||
packages/generator-joplin/generators/app/templates/src/index.js
|
packages/generator-joplin/generators/app/templates/src/index.js
|
||||||
packages/generator-joplin/tools/updateCategories.js
|
packages/generator-joplin/tools/updateCategories.js
|
||||||
packages/htmlpack/src/index.js
|
packages/htmlpack/index.test.js
|
||||||
|
packages/htmlpack/index.js
|
||||||
|
packages/htmlpack/packToString.js
|
||||||
|
packages/htmlpack/utils/parseHtmlAsync.js
|
||||||
packages/lib/ArrayUtils.js
|
packages/lib/ArrayUtils.js
|
||||||
packages/lib/AsyncActionQueue.test.js
|
packages/lib/AsyncActionQueue.test.js
|
||||||
packages/lib/AsyncActionQueue.js
|
packages/lib/AsyncActionQueue.js
|
||||||
|
|||||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -995,7 +995,10 @@ packages/generator-joplin/generators/app/templates/api/types.js
|
|||||||
packages/generator-joplin/generators/app/templates/api_index.js
|
packages/generator-joplin/generators/app/templates/api_index.js
|
||||||
packages/generator-joplin/generators/app/templates/src/index.js
|
packages/generator-joplin/generators/app/templates/src/index.js
|
||||||
packages/generator-joplin/tools/updateCategories.js
|
packages/generator-joplin/tools/updateCategories.js
|
||||||
packages/htmlpack/src/index.js
|
packages/htmlpack/index.test.js
|
||||||
|
packages/htmlpack/index.js
|
||||||
|
packages/htmlpack/packToString.js
|
||||||
|
packages/htmlpack/utils/parseHtmlAsync.js
|
||||||
packages/lib/ArrayUtils.js
|
packages/lib/ArrayUtils.js
|
||||||
packages/lib/AsyncActionQueue.test.js
|
packages/lib/AsyncActionQueue.test.js
|
||||||
packages/lib/AsyncActionQueue.js
|
packages/lib/AsyncActionQueue.js
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ const localPackages = {
|
|||||||
'@joplin/react-native-saf-x': path.resolve(__dirname, '../react-native-saf-x/'),
|
'@joplin/react-native-saf-x': path.resolve(__dirname, '../react-native-saf-x/'),
|
||||||
'@joplin/react-native-alarm-notification': path.resolve(__dirname, '../react-native-alarm-notification/'),
|
'@joplin/react-native-alarm-notification': path.resolve(__dirname, '../react-native-alarm-notification/'),
|
||||||
'@joplin/fork-sax': path.resolve(__dirname, '../fork-sax/'),
|
'@joplin/fork-sax': path.resolve(__dirname, '../fork-sax/'),
|
||||||
|
'@joplin/htmlpack': path.resolve(__dirname, '../htmlpack/'),
|
||||||
};
|
};
|
||||||
|
|
||||||
// cSpell:disable
|
// cSpell:disable
|
||||||
|
|||||||
3
packages/htmlpack/.gitignore
vendored
3
packages/htmlpack/.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
dist/*
|
dist/*
|
||||||
|
test-output/
|
||||||
2
packages/htmlpack/.npmignore
Normal file
2
packages/htmlpack/.npmignore
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
test-data/
|
||||||
|
test-output/
|
||||||
35
packages/htmlpack/index.test.ts
Normal file
35
packages/htmlpack/index.test.ts
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import { exists, mkdir, readFile, remove } from 'fs-extra';
|
||||||
|
import { join } from 'path';
|
||||||
|
import htmlpack from '.';
|
||||||
|
|
||||||
|
const outputDirectory = './test-output';
|
||||||
|
|
||||||
|
describe('htmlpack/index', () => {
|
||||||
|
beforeEach(async () => {
|
||||||
|
if (await exists(outputDirectory)) {
|
||||||
|
await remove(outputDirectory);
|
||||||
|
}
|
||||||
|
await mkdir(outputDirectory);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should convert HTML into a single file', async () => {
|
||||||
|
const outputFile = join(outputDirectory, 'output.html');
|
||||||
|
await htmlpack(join('test-data', 'index.html'), outputFile);
|
||||||
|
|
||||||
|
const outputContent = await readFile(outputFile, 'utf8');
|
||||||
|
expect(outputContent).toBe(`
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<style>* {
|
||||||
|
color: red;
|
||||||
|
}</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Test</h1>
|
||||||
|
<a href="data:text/plain;base64,UmVzb3VyY2Uu" download="resource.txt">Test link.</a>
|
||||||
|
<img src="data:image/svg+xml;base64,PHN2ZyB2aWV3Qm94PSItOTUgLTk2IDIwOCAyMDgiIHdpZHRoPSIyMDgiIGhlaWdodD0iMjA4IiB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPjx0ZXh0IHN0eWxlPSJmb250LXNpemU6IDY0cHg7IGZpbGw6IHJlZDsiPlRlc3Q8L3RleHQ+PC9zdmc+" alt="test image"/>
|
||||||
|
<p>Test paragraph</p>
|
||||||
|
</body>
|
||||||
|
</html>`);
|
||||||
|
});
|
||||||
|
});
|
||||||
28
packages/htmlpack/index.ts
Normal file
28
packages/htmlpack/index.ts
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import * as fs from 'fs-extra';
|
||||||
|
const Datauri = require('datauri/sync');
|
||||||
|
import { dirname } from 'path';
|
||||||
|
import packToString from './packToString';
|
||||||
|
|
||||||
|
const dataUriEncode = (filePath: string): string => {
|
||||||
|
const result = Datauri(filePath);
|
||||||
|
return result.content;
|
||||||
|
};
|
||||||
|
|
||||||
|
export default async function htmlpack(inputFile: string, outputFile: string): Promise<void> {
|
||||||
|
const inputHtml = await fs.readFile(inputFile, 'utf8');
|
||||||
|
const baseDir = dirname(inputFile);
|
||||||
|
|
||||||
|
const output = await packToString(baseDir, inputHtml, {
|
||||||
|
exists(path: string) {
|
||||||
|
return fs.exists(path);
|
||||||
|
},
|
||||||
|
readFileText(path: string) {
|
||||||
|
return fs.readFile(path, 'utf8');
|
||||||
|
},
|
||||||
|
async readFileDataUri(path: string) {
|
||||||
|
return dataUriEncode(path);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await fs.writeFile(outputFile, output, 'utf8');
|
||||||
|
}
|
||||||
4
packages/htmlpack/jest.config.js
Normal file
4
packages/htmlpack/jest.config.js
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
module.exports = {
|
||||||
|
testMatch: ['**/*.test.js'],
|
||||||
|
testPathIgnorePatterns: ['<rootDir>/node_modules/'],
|
||||||
|
};
|
||||||
254
packages/htmlpack/packToString.ts
Normal file
254
packages/htmlpack/packToString.ts
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
const Entities = require('html-entities').AllHtmlEntities;
|
||||||
|
import { CssTypes, parse as cssParse, stringify as cssStringify } from '@adobe/css-tools';
|
||||||
|
import { dirname, basename } from 'path';
|
||||||
|
import parseHtmlAsync, { HtmlAttrs } from './utils/parseHtmlAsync';
|
||||||
|
|
||||||
|
const selfClosingElements = [
|
||||||
|
'area',
|
||||||
|
'base',
|
||||||
|
'basefont',
|
||||||
|
'br',
|
||||||
|
'col',
|
||||||
|
'command',
|
||||||
|
'embed',
|
||||||
|
'frame',
|
||||||
|
'hr',
|
||||||
|
'img',
|
||||||
|
'input',
|
||||||
|
'isindex',
|
||||||
|
'keygen',
|
||||||
|
'link',
|
||||||
|
'meta',
|
||||||
|
'param',
|
||||||
|
'source',
|
||||||
|
'track',
|
||||||
|
'wbr',
|
||||||
|
];
|
||||||
|
|
||||||
|
const htmlentities = (s: string): string => {
|
||||||
|
const output = (new Entities()).encode(s);
|
||||||
|
return output.replace(/	/ig, '\t');
|
||||||
|
};
|
||||||
|
|
||||||
|
const attributesHtml = (attrs: HtmlAttrs) => {
|
||||||
|
const output: string[] = [];
|
||||||
|
|
||||||
|
for (const n in attrs) {
|
||||||
|
if (!attrs.hasOwnProperty(n)) continue;
|
||||||
|
output.push(`${n}="${htmlentities(attrs[n])}"`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return output.join(' ');
|
||||||
|
};
|
||||||
|
|
||||||
|
const attrValue = (attrs: HtmlAttrs, name: string): string => {
|
||||||
|
if (!attrs[name]) return '';
|
||||||
|
return attrs[name];
|
||||||
|
};
|
||||||
|
|
||||||
|
const isSelfClosingTag = (tagName: string) => {
|
||||||
|
return selfClosingElements.includes(tagName.toLowerCase());
|
||||||
|
};
|
||||||
|
|
||||||
|
export type FileApi = {
|
||||||
|
exists(path: string): Promise<boolean>;
|
||||||
|
readFileText(path: string): Promise<string>;
|
||||||
|
readFileDataUri(path: string): Promise<string>;
|
||||||
|
};
|
||||||
|
|
||||||
|
// packToString should be able to run in React Native -- don't use fs-extra.
|
||||||
|
const packToString = async (baseDir: string, inputFileText: string, fs: FileApi) => {
|
||||||
|
const readFileDataUriSafe = async (path: string) => {
|
||||||
|
try {
|
||||||
|
return await fs.readFileDataUri(path);
|
||||||
|
} catch (error) {
|
||||||
|
// If the file path is invalid, the Datauri will throw an exception.
|
||||||
|
// Instead, since we can just ignore that particular file.
|
||||||
|
// Fixes https://github.com/laurent22/joplin/issues/8305
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const processCssContent = async (cssBaseDir: string, content: string) => {
|
||||||
|
const o = cssParse(content, {
|
||||||
|
silent: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const rule of o.stylesheet.rules) {
|
||||||
|
if (rule.type === 'font-face') {
|
||||||
|
for (const declaration of rule.declarations) {
|
||||||
|
if (declaration.type === CssTypes.comment) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (declaration.property === 'src') {
|
||||||
|
const replacements = new Map<string, string>();
|
||||||
|
const replacementTasks: Promise<void>[] = [];
|
||||||
|
declaration.value.replace(/url\((.*?)\)/g, (match: string, url: string) => {
|
||||||
|
if (replacements.has(url)) return match;
|
||||||
|
replacements.set(url, match);
|
||||||
|
|
||||||
|
replacementTasks.push((async () => {
|
||||||
|
const cssFilePath = `${cssBaseDir}/${url}`;
|
||||||
|
let replacement;
|
||||||
|
if (await fs.exists(cssFilePath)) {
|
||||||
|
replacement = `url(${await readFileDataUriSafe(cssFilePath)})`;
|
||||||
|
} else {
|
||||||
|
replacement = `url(${url})`;
|
||||||
|
}
|
||||||
|
replacements.set(url, replacement);
|
||||||
|
})());
|
||||||
|
|
||||||
|
return match;
|
||||||
|
});
|
||||||
|
|
||||||
|
await Promise.all(replacementTasks);
|
||||||
|
|
||||||
|
declaration.value = declaration.value.replace(/url\((.*?)\)/g, (_match: string, url: string) => {
|
||||||
|
return replacements.get(url);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return cssStringify(o);
|
||||||
|
};
|
||||||
|
|
||||||
|
const processLinkTag = async (_name: string, attrs: HtmlAttrs) => {
|
||||||
|
const href = attrValue(attrs, 'href');
|
||||||
|
if (!href) return null;
|
||||||
|
|
||||||
|
const filePath = `${baseDir}/${href}`;
|
||||||
|
|
||||||
|
if (!await fs.exists(filePath)) return null;
|
||||||
|
const content = await fs.readFileText(filePath);
|
||||||
|
return `<style>${await processCssContent(dirname(filePath), content)}</style>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const processScriptTag = async (_name: string, attrs: HtmlAttrs) => {
|
||||||
|
const src = attrValue(attrs, 'src');
|
||||||
|
if (!src) return null;
|
||||||
|
|
||||||
|
const scriptFilePath = `${baseDir}/${src}`;
|
||||||
|
let content = await fs.readFileText(scriptFilePath);
|
||||||
|
|
||||||
|
// There's no simple way to insert arbitrary content in <script> tags.
|
||||||
|
// Encoding HTML entities doesn't work because the JS parser will not decode
|
||||||
|
// them before parsing. We also can't put the code verbatim since it may
|
||||||
|
// contain strings such as `</script>` or `<!--` which would break the HTML
|
||||||
|
// file.
|
||||||
|
//
|
||||||
|
// So it seems the only way is to escape these specific sequences with a
|
||||||
|
// backslash. It shouldn't break the JS code and should allow the HTML
|
||||||
|
// parser to work as expected.
|
||||||
|
//
|
||||||
|
// https://stackoverflow.com/a/41302266/561309
|
||||||
|
|
||||||
|
content = content.replace(/<script>/g, '<\\script>');
|
||||||
|
content = content.replace(/<\/script>/g, '<\\/script>');
|
||||||
|
content = content.replace(/<!--/g, '<\\!--');
|
||||||
|
|
||||||
|
return `<script>${content}</script>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const processImgTag = async (_name: string, attrs: HtmlAttrs) => {
|
||||||
|
const src = attrValue(attrs, 'src');
|
||||||
|
if (!src) return null;
|
||||||
|
|
||||||
|
const filePath = `${baseDir}/${src}`;
|
||||||
|
if (!await fs.exists(filePath)) return null;
|
||||||
|
|
||||||
|
const modAttrs = { ...attrs };
|
||||||
|
delete modAttrs.src;
|
||||||
|
return `<img src="${await readFileDataUriSafe(filePath)}" ${attributesHtml(modAttrs)}/>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const processAnchorTag = async (_name: string, attrs: HtmlAttrs) => {
|
||||||
|
const href = attrValue(attrs, 'href');
|
||||||
|
if (!href) return null;
|
||||||
|
|
||||||
|
const filePath = `${baseDir}/${href}`;
|
||||||
|
if (!await fs.exists(filePath)) return null;
|
||||||
|
|
||||||
|
const modAttrs = { ...attrs };
|
||||||
|
modAttrs.href = await readFileDataUriSafe(filePath);
|
||||||
|
modAttrs.download = basename(href);
|
||||||
|
return `<a ${attributesHtml(modAttrs)}>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const output: string[] = [];
|
||||||
|
|
||||||
|
interface Tag {
|
||||||
|
name: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const tagStack: Tag[] = [];
|
||||||
|
|
||||||
|
const currentTag = () => {
|
||||||
|
if (!tagStack.length) return { name: '', processed: false };
|
||||||
|
return tagStack[tagStack.length - 1];
|
||||||
|
};
|
||||||
|
|
||||||
|
await parseHtmlAsync(inputFileText, {
|
||||||
|
onopentag: async (name: string, attrs: HtmlAttrs) => {
|
||||||
|
name = name.toLowerCase();
|
||||||
|
|
||||||
|
let processedResult = '';
|
||||||
|
|
||||||
|
if (name === 'link') {
|
||||||
|
processedResult = await processLinkTag(name, attrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name === 'script') {
|
||||||
|
processedResult = await processScriptTag(name, attrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name === 'img') {
|
||||||
|
processedResult = await processImgTag(name, attrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name === 'a') {
|
||||||
|
processedResult = await processAnchorTag(name, attrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
tagStack.push({ name });
|
||||||
|
|
||||||
|
if (processedResult) {
|
||||||
|
output.push(processedResult);
|
||||||
|
} else {
|
||||||
|
let attrHtml = attributesHtml(attrs);
|
||||||
|
if (attrHtml) attrHtml = ` ${attrHtml}`;
|
||||||
|
const closingSign = isSelfClosingTag(name) ? '/>' : '>';
|
||||||
|
output.push(`<${name}${attrHtml}${closingSign}`);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
ontext: async (decodedText: string) => {
|
||||||
|
if (currentTag().name === 'style') {
|
||||||
|
// For CSS, we have to put the style as-is inside the tag because if we html-entities encode
|
||||||
|
// it, it's not going to work. But it's ok because JavaScript won't run within the style tag.
|
||||||
|
// Ideally CSS should be loaded from an external file.
|
||||||
|
output.push(decodedText);
|
||||||
|
} else {
|
||||||
|
output.push(htmlentities(decodedText));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
onclosetag: async (name: string) => {
|
||||||
|
const current = currentTag();
|
||||||
|
|
||||||
|
if (current.name === name.toLowerCase()) tagStack.pop();
|
||||||
|
|
||||||
|
if (isSelfClosingTag(name)) return;
|
||||||
|
output.push(`</${name}>`);
|
||||||
|
},
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
return output.join('');
|
||||||
|
};
|
||||||
|
|
||||||
|
export default packToString;
|
||||||
|
|
||||||
@@ -3,13 +3,24 @@
|
|||||||
"version": "3.3.1",
|
"version": "3.3.1",
|
||||||
"description": "Pack an HTML file and all its linked resources into a single HTML file",
|
"description": "Pack an HTML file and all its linked resources into a single HTML file",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "src/index.ts",
|
"types": "index.ts",
|
||||||
|
"exports": {
|
||||||
|
".": {
|
||||||
|
"default": "./dist/index.js",
|
||||||
|
"types": "./index.ts"
|
||||||
|
},
|
||||||
|
"./packToString": {
|
||||||
|
"default": "./dist/packToString.js",
|
||||||
|
"types": "./packToString.ts"
|
||||||
|
}
|
||||||
|
},
|
||||||
"publishConfig": {
|
"publishConfig": {
|
||||||
"access": "public"
|
"access": "public"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"tsc": "tsc --project tsconfig.json",
|
"tsc": "tsc --project tsconfig.json",
|
||||||
"watch": "tsc --watch --preserveWatchOutput --project tsconfig.json"
|
"watch": "tsc --watch --preserveWatchOutput --project tsconfig.json",
|
||||||
|
"test": "jest"
|
||||||
},
|
},
|
||||||
"author": "Laurent Cozic",
|
"author": "Laurent Cozic",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
@@ -21,7 +32,10 @@
|
|||||||
"html-entities": "1.4.0"
|
"html-entities": "1.4.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/fs-extra": "11.0.4"
|
"@types/fs-extra": "11.0.4",
|
||||||
|
"@types/jest": "29.5.12",
|
||||||
|
"jest": "29.7.0",
|
||||||
|
"typescript": "5.4.5"
|
||||||
},
|
},
|
||||||
"gitHead": "05a29b450962bf05a8642bbd39446a1f679a96ba"
|
"gitHead": "05a29b450962bf05a8642bbd39446a1f679a96ba"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,257 +0,0 @@
|
|||||||
import * as fs from 'fs-extra';
|
|
||||||
import { pathExistsSync } from 'fs-extra';
|
|
||||||
const Entities = require('html-entities').AllHtmlEntities;
|
|
||||||
const htmlparser2 = require('@joplin/fork-htmlparser2');
|
|
||||||
const Datauri = require('datauri/sync');
|
|
||||||
import { CssTypes, parse as cssParse, stringify as cssStringify } from '@adobe/css-tools';
|
|
||||||
|
|
||||||
const selfClosingElements = [
|
|
||||||
'area',
|
|
||||||
'base',
|
|
||||||
'basefont',
|
|
||||||
'br',
|
|
||||||
'col',
|
|
||||||
'command',
|
|
||||||
'embed',
|
|
||||||
'frame',
|
|
||||||
'hr',
|
|
||||||
'img',
|
|
||||||
'input',
|
|
||||||
'isindex',
|
|
||||||
'keygen',
|
|
||||||
'link',
|
|
||||||
'meta',
|
|
||||||
'param',
|
|
||||||
'source',
|
|
||||||
'track',
|
|
||||||
'wbr',
|
|
||||||
];
|
|
||||||
|
|
||||||
const htmlentities = (s: string): string => {
|
|
||||||
const output = (new Entities()).encode(s);
|
|
||||||
return output.replace(/	/ig, '\t');
|
|
||||||
};
|
|
||||||
|
|
||||||
const dataUriEncode = (filePath: string): string => {
|
|
||||||
try {
|
|
||||||
const result = Datauri(filePath);
|
|
||||||
return result.content;
|
|
||||||
} catch (error) {
|
|
||||||
// If the file path is invalid, the Datauri will throw an exception.
|
|
||||||
// Instead, since we can just ignore that particular file.
|
|
||||||
// Fixes https://github.com/laurent22/joplin/issues/8305
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
|
||||||
const attributesHtml = (attr: any) => {
|
|
||||||
const output = [];
|
|
||||||
|
|
||||||
for (const n in attr) {
|
|
||||||
if (!attr.hasOwnProperty(n)) continue;
|
|
||||||
output.push(`${n}="${htmlentities(attr[n])}"`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return output.join(' ');
|
|
||||||
};
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
|
||||||
const attrValue = (attrs: any, name: string): string => {
|
|
||||||
if (!attrs[name]) return '';
|
|
||||||
return attrs[name];
|
|
||||||
};
|
|
||||||
|
|
||||||
const isSelfClosingTag = (tagName: string) => {
|
|
||||||
return selfClosingElements.includes(tagName.toLowerCase());
|
|
||||||
};
|
|
||||||
|
|
||||||
const processCssContent = (cssBaseDir: string, content: string): string => {
|
|
||||||
const o = cssParse(content, {
|
|
||||||
silent: false,
|
|
||||||
});
|
|
||||||
|
|
||||||
for (const rule of o.stylesheet.rules) {
|
|
||||||
if (rule.type === 'font-face') {
|
|
||||||
for (const declaration of rule.declarations) {
|
|
||||||
if (declaration.type === CssTypes.comment) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (declaration.property === 'src') {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
|
||||||
declaration.value = declaration.value.replace(/url\((.*?)\)/g, (_v: any, url: string) => {
|
|
||||||
const cssFilePath = `${cssBaseDir}/${url}`;
|
|
||||||
if (fs.existsSync(cssFilePath)) {
|
|
||||||
return `url(${dataUriEncode(cssFilePath)})`;
|
|
||||||
} else {
|
|
||||||
return `url(${url})`;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return cssStringify(o);
|
|
||||||
};
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
|
||||||
const processLinkTag = (baseDir: string, _name: string, attrs: any): string => {
|
|
||||||
const href = attrValue(attrs, 'href');
|
|
||||||
if (!href) return null;
|
|
||||||
|
|
||||||
const filePath = `${baseDir}/${href}`;
|
|
||||||
|
|
||||||
if (!pathExistsSync(filePath)) return null;
|
|
||||||
const content = fs.readFileSync(filePath, 'utf8');
|
|
||||||
return `<style>${processCssContent(dirname(filePath), content)}</style>`;
|
|
||||||
};
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
|
||||||
const processScriptTag = (baseDir: string, _name: string, attrs: any): string => {
|
|
||||||
const src = attrValue(attrs, 'src');
|
|
||||||
if (!src) return null;
|
|
||||||
|
|
||||||
const scriptFilePath = `${baseDir}/${src}`;
|
|
||||||
let content = fs.readFileSync(scriptFilePath, 'utf8');
|
|
||||||
|
|
||||||
// There's no simple way to insert arbitrary content in <script> tags.
|
|
||||||
// Encoding HTML entities doesn't work because the JS parser will not decode
|
|
||||||
// them before parsing. We also can't put the code verbatim since it may
|
|
||||||
// contain strings such as `</script>` or `<!--` which would break the HTML
|
|
||||||
// file.
|
|
||||||
//
|
|
||||||
// So it seems the only way is to escape these specific sequences with a
|
|
||||||
// backslash. It shouldn't break the JS code and should allow the HTML
|
|
||||||
// parser to work as expected.
|
|
||||||
//
|
|
||||||
// https://stackoverflow.com/a/41302266/561309
|
|
||||||
|
|
||||||
content = content.replace(/<script>/g, '<\\script>');
|
|
||||||
content = content.replace(/<\/script>/g, '<\\/script>');
|
|
||||||
content = content.replace(/<!--/g, '<\\!--');
|
|
||||||
|
|
||||||
return `<script>${content}</script>`;
|
|
||||||
};
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
|
||||||
const processImgTag = (baseDir: string, _name: string, attrs: any): string => {
|
|
||||||
const src = attrValue(attrs, 'src');
|
|
||||||
if (!src) return null;
|
|
||||||
|
|
||||||
const filePath = `${baseDir}/${src}`;
|
|
||||||
if (!fs.existsSync(filePath)) return null;
|
|
||||||
|
|
||||||
const modAttrs = { ...attrs };
|
|
||||||
delete modAttrs.src;
|
|
||||||
return `<img src="${dataUriEncode(filePath)}" ${attributesHtml(modAttrs)}/>`;
|
|
||||||
};
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
|
||||||
const processAnchorTag = (baseDir: string, _name: string, attrs: any): string => {
|
|
||||||
const href = attrValue(attrs, 'href');
|
|
||||||
if (!href) return null;
|
|
||||||
|
|
||||||
const filePath = `${baseDir}/${href}`;
|
|
||||||
if (!fs.existsSync(filePath)) return null;
|
|
||||||
|
|
||||||
const modAttrs = { ...attrs };
|
|
||||||
modAttrs.href = dataUriEncode(filePath);
|
|
||||||
modAttrs.download = basename(filePath);
|
|
||||||
return `<a ${attributesHtml(modAttrs)}>`;
|
|
||||||
};
|
|
||||||
|
|
||||||
function basename(path: string) {
|
|
||||||
if (!path) throw new Error('Path is empty');
|
|
||||||
const s = path.split(/\/|\\/);
|
|
||||||
return s[s.length - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
function dirname(path: string) {
|
|
||||||
if (!path) throw new Error('Path is empty');
|
|
||||||
const s = path.split(/\/|\\/);
|
|
||||||
s.pop();
|
|
||||||
return s.join('/');
|
|
||||||
}
|
|
||||||
|
|
||||||
export default async function htmlpack(inputFile: string, outputFile: string): Promise<void> {
|
|
||||||
const inputHtml = await fs.readFile(inputFile, 'utf8');
|
|
||||||
const baseDir = dirname(inputFile);
|
|
||||||
|
|
||||||
const output: string[] = [];
|
|
||||||
|
|
||||||
interface Tag {
|
|
||||||
name: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
const tagStack: Tag[] = [];
|
|
||||||
|
|
||||||
const currentTag = () => {
|
|
||||||
if (!tagStack.length) return { name: '', processed: false };
|
|
||||||
return tagStack[tagStack.length - 1];
|
|
||||||
};
|
|
||||||
|
|
||||||
const parser = new htmlparser2.Parser({
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
|
||||||
onopentag: (name: string, attrs: any) => {
|
|
||||||
name = name.toLowerCase();
|
|
||||||
|
|
||||||
let processedResult = '';
|
|
||||||
|
|
||||||
if (name === 'link') {
|
|
||||||
processedResult = processLinkTag(baseDir, name, attrs);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (name === 'script') {
|
|
||||||
processedResult = processScriptTag(baseDir, name, attrs);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (name === 'img') {
|
|
||||||
processedResult = processImgTag(baseDir, name, attrs);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (name === 'a') {
|
|
||||||
processedResult = processAnchorTag(baseDir, name, attrs);
|
|
||||||
}
|
|
||||||
|
|
||||||
tagStack.push({ name });
|
|
||||||
|
|
||||||
if (processedResult) {
|
|
||||||
output.push(processedResult);
|
|
||||||
} else {
|
|
||||||
let attrHtml = attributesHtml(attrs);
|
|
||||||
if (attrHtml) attrHtml = ` ${attrHtml}`;
|
|
||||||
const closingSign = isSelfClosingTag(name) ? '/>' : '>';
|
|
||||||
output.push(`<${name}${attrHtml}${closingSign}`);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
ontext: (decodedText: string) => {
|
|
||||||
if (currentTag().name === 'style') {
|
|
||||||
// For CSS, we have to put the style as-is inside the tag because if we html-entities encode
|
|
||||||
// it, it's not going to work. But it's ok because JavaScript won't run within the style tag.
|
|
||||||
// Ideally CSS should be loaded from an external file.
|
|
||||||
output.push(decodedText);
|
|
||||||
} else {
|
|
||||||
output.push(htmlentities(decodedText));
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
onclosetag: (name: string) => {
|
|
||||||
const current = currentTag();
|
|
||||||
|
|
||||||
if (current.name === name.toLowerCase()) tagStack.pop();
|
|
||||||
|
|
||||||
if (isSelfClosingTag(name)) return;
|
|
||||||
output.push(`</${name}>`);
|
|
||||||
},
|
|
||||||
|
|
||||||
}, { decodeEntities: true });
|
|
||||||
|
|
||||||
parser.write(inputHtml);
|
|
||||||
parser.end();
|
|
||||||
|
|
||||||
await fs.writeFile(outputFile, output.join(''), 'utf8');
|
|
||||||
}
|
|
||||||
1
packages/htmlpack/test-data/image.svg
Normal file
1
packages/htmlpack/test-data/image.svg
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<svg viewBox="-95 -96 208 208" width="208" height="208" version="1.1" baseProfile="full" xmlns="http://www.w3.org/2000/svg"><text style="font-size: 64px; fill: red;">Test</text></svg>
|
||||||
|
After Width: | Height: | Size: 183 B |
12
packages/htmlpack/test-data/index.html
Normal file
12
packages/htmlpack/test-data/index.html
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<link href="./style.css" rel="stylesheet"/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Test</h1>
|
||||||
|
<a href="./resource.txt">Test link.</a>
|
||||||
|
<img src="./image.svg" alt="test image"/>
|
||||||
|
<p>Test paragraph</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
1
packages/htmlpack/test-data/resource.txt
Normal file
1
packages/htmlpack/test-data/resource.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Resource.
|
||||||
3
packages/htmlpack/test-data/style.css
Normal file
3
packages/htmlpack/test-data/style.css
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
* {
|
||||||
|
color: red;
|
||||||
|
}
|
||||||
79
packages/htmlpack/utils/parseHtmlAsync.ts
Normal file
79
packages/htmlpack/utils/parseHtmlAsync.ts
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
const htmlparser2 = require('@joplin/fork-htmlparser2');
|
||||||
|
|
||||||
|
export type HtmlAttrs = Record<string, string>;
|
||||||
|
|
||||||
|
interface Callbacks {
|
||||||
|
onopentag: (name: string, attrs: HtmlAttrs)=> Promise<void>;
|
||||||
|
ontext: (text: string)=> Promise<void>;
|
||||||
|
onclosetag: (name: string)=> Promise<void>;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum EventTypes {
|
||||||
|
OpenTag,
|
||||||
|
Text,
|
||||||
|
CloseTag,
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OpenTagEvent {
|
||||||
|
type: EventTypes.OpenTag;
|
||||||
|
name: string;
|
||||||
|
attrs: HtmlAttrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TextEvent {
|
||||||
|
type: EventTypes.Text;
|
||||||
|
decodedText: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CloseTagEvent {
|
||||||
|
type: EventTypes.CloseTag;
|
||||||
|
name: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
type ParserEvent = OpenTagEvent|TextEvent|CloseTagEvent;
|
||||||
|
|
||||||
|
const parseHtmlAsync = async (html: string, callbacks: Callbacks) => {
|
||||||
|
const events: ParserEvent[] = [];
|
||||||
|
const parser = new htmlparser2.Parser({
|
||||||
|
onopentag: (name: string, attrs: HtmlAttrs) => {
|
||||||
|
events.push({
|
||||||
|
type: EventTypes.OpenTag,
|
||||||
|
name,
|
||||||
|
attrs,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
ontext: (decodedText: string) => {
|
||||||
|
events.push({
|
||||||
|
type: EventTypes.Text,
|
||||||
|
decodedText,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
onclosetag: (name: string) => {
|
||||||
|
events.push({
|
||||||
|
type: EventTypes.CloseTag,
|
||||||
|
name,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
}, { decodeEntities: true });
|
||||||
|
|
||||||
|
parser.write(html);
|
||||||
|
parser.end();
|
||||||
|
|
||||||
|
for (const event of events) {
|
||||||
|
if (event.type === EventTypes.OpenTag) {
|
||||||
|
await callbacks.onopentag(event.name, event.attrs);
|
||||||
|
} else if (event.type === EventTypes.CloseTag) {
|
||||||
|
await callbacks.onclosetag(event.name);
|
||||||
|
} else if (event.type === EventTypes.Text) {
|
||||||
|
await callbacks.ontext(event.decodedText);
|
||||||
|
} else {
|
||||||
|
const exhaustivenessCheck: never = event;
|
||||||
|
throw new Error(`Unknown event type: ${exhaustivenessCheck}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export default parseHtmlAsync;
|
||||||
@@ -9017,9 +9017,12 @@ __metadata:
|
|||||||
"@adobe/css-tools": 4.4.2
|
"@adobe/css-tools": 4.4.2
|
||||||
"@joplin/fork-htmlparser2": ^4.1.58
|
"@joplin/fork-htmlparser2": ^4.1.58
|
||||||
"@types/fs-extra": 11.0.4
|
"@types/fs-extra": 11.0.4
|
||||||
|
"@types/jest": 29.5.12
|
||||||
datauri: 4.1.0
|
datauri: 4.1.0
|
||||||
fs-extra: 11.2.0
|
fs-extra: 11.2.0
|
||||||
html-entities: 1.4.0
|
html-entities: 1.4.0
|
||||||
|
jest: 29.7.0
|
||||||
|
typescript: 5.4.5
|
||||||
languageName: unknown
|
languageName: unknown
|
||||||
linkType: soft
|
linkType: soft
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user