1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-11-23 22:36:32 +02:00
Files
joplin/packages/utils/html.ts

151 lines
3.7 KiB
TypeScript
Raw Normal View History

import { Link } from './types';
const Entities = require('html-entities').AllHtmlEntities;
const htmlparser2 = require('@joplin/fork-htmlparser2');
const selfClosingElements = [
'area',
'base',
'basefont',
'br',
'col',
'command',
'embed',
'frame',
'hr',
'img',
'input',
'isindex',
'keygen',
'link',
'meta',
'param',
'source',
'track',
'wbr',
];
const entitiesInstance = new Entities();
export const htmlentities = entitiesInstance.encode;
export const htmlentitiesDecode = entitiesInstance.decode;
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
export const attributesHtml = (attr: Record<string, any>) => {
const output = [];
for (const n in attr) {
if (!attr.hasOwnProperty(n)) continue;
output.push(`${n}="${htmlentities(attr[n])}"`);
}
return output.join(' ');
};
export const isSelfClosingTag = (tagName: string) => {
return selfClosingElements.includes(tagName.toLowerCase());
};
export const extractUrls = (html: string) => {
if (!html || !html.trim()) return [];
const output: Link[] = [];
let currentLink: Link|null = null;
const parser = new htmlparser2.Parser({
onopentag: (name: string, attrs: Record<string, string>) => {
if (name === 'a') {
currentLink = {
url: attrs && attrs.href ? attrs.href : '',
title: '',
};
}
},
ontext: (text: string) => {
if (currentLink) currentLink.title += text;
},
onclosetag: (name: string) => {
if (name === 'a') {
if (!currentLink) throw new Error('Found a closing anchor tag without an opening one');
output.push(currentLink);
currentLink = null;
}
},
}, { decodeEntities: true });
parser.write(html);
parser.end();
return output;
};
const parseDimensionAttribute = (value: string) => {
const regex = /^([0-9]*\.?[0-9]+)\s*(in|cm|mm|pt|pc|px)?$/i;
const m = value.trim().match(regex);
if (!m) return null;
const num = parseFloat(m[1]);
const unit = m[2]?.toLowerCase() || 'px';
return { num, unit };
};
const dimensionAttributeInPixels = (value: string) => {
const parsed = parseDimensionAttribute(value);
if (!parsed) {
return null;
} else {
switch (parsed.unit) {
case 'px':
return parsed.num;
case 'in':
return parsed.num * 96;
case 'cm':
return parsed.num * 96 / 2.54;
case 'mm':
return parsed.num * 96 / 25.4;
case 'pt':
return parsed.num * 96 / 72;
case 'pc':
return parsed.num * 16;
default:
return null;
}
}
};
// Currently this function only fix the width and height attributes: those should be specified in
// pixels, however certain application (such as Evernote) occasionally specify them in inches. When
// that happens, and we import it, Electron is going to ignore the unit and assume pixels. So "1in"
// becomes 1 pixel. So the function below is used to convert those invalid values to actual pixel
// values by converting them properly.
//
// Currently only used in import-enex-html-gen.js and tested there.
//
// Ref: https://html.spec.whatwg.org/multipage/embedded-content-other.html#dimension-attributes
export const fixAttributes = (attributes: Record<string, string>) => {
const output: Record<string, string> = {};
for (const [keyRaw, value] of Object.entries(attributes)) {
const key = keyRaw.toLowerCase();
let finalValue = value;
if (key === 'width' || key === 'height') {
const pixelValue = dimensionAttributeInPixels(value);
if (pixelValue === null) {
// Skip if the value can't be parsed, which means the image will display at its real
// size. Better than letting bad values go through as it may cause rendering issues.
continue;
}
finalValue = pixelValue.toString();
}
output[keyRaw] = finalValue;
}
return output;
};