You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-11-23 22:36:32 +02:00
151 lines
3.7 KiB
TypeScript
151 lines
3.7 KiB
TypeScript
import { Link } from './types';
|
|
|
|
const Entities = require('html-entities').AllHtmlEntities;
|
|
const htmlparser2 = require('@joplin/fork-htmlparser2');
|
|
|
|
const selfClosingElements = [
|
|
'area',
|
|
'base',
|
|
'basefont',
|
|
'br',
|
|
'col',
|
|
'command',
|
|
'embed',
|
|
'frame',
|
|
'hr',
|
|
'img',
|
|
'input',
|
|
'isindex',
|
|
'keygen',
|
|
'link',
|
|
'meta',
|
|
'param',
|
|
'source',
|
|
'track',
|
|
'wbr',
|
|
];
|
|
|
|
const entitiesInstance = new Entities();
|
|
|
|
export const htmlentities = entitiesInstance.encode;
|
|
export const htmlentitiesDecode = entitiesInstance.decode;
|
|
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
|
export const attributesHtml = (attr: Record<string, any>) => {
|
|
const output = [];
|
|
|
|
for (const n in attr) {
|
|
if (!attr.hasOwnProperty(n)) continue;
|
|
output.push(`${n}="${htmlentities(attr[n])}"`);
|
|
}
|
|
|
|
return output.join(' ');
|
|
};
|
|
|
|
export const isSelfClosingTag = (tagName: string) => {
|
|
return selfClosingElements.includes(tagName.toLowerCase());
|
|
};
|
|
|
|
export const extractUrls = (html: string) => {
|
|
if (!html || !html.trim()) return [];
|
|
|
|
const output: Link[] = [];
|
|
let currentLink: Link|null = null;
|
|
|
|
const parser = new htmlparser2.Parser({
|
|
|
|
onopentag: (name: string, attrs: Record<string, string>) => {
|
|
if (name === 'a') {
|
|
currentLink = {
|
|
url: attrs && attrs.href ? attrs.href : '',
|
|
title: '',
|
|
};
|
|
}
|
|
},
|
|
|
|
ontext: (text: string) => {
|
|
if (currentLink) currentLink.title += text;
|
|
},
|
|
|
|
onclosetag: (name: string) => {
|
|
if (name === 'a') {
|
|
if (!currentLink) throw new Error('Found a closing anchor tag without an opening one');
|
|
output.push(currentLink);
|
|
currentLink = null;
|
|
}
|
|
},
|
|
|
|
}, { decodeEntities: true });
|
|
|
|
parser.write(html);
|
|
parser.end();
|
|
|
|
return output;
|
|
};
|
|
|
|
const parseDimensionAttribute = (value: string) => {
|
|
const regex = /^([0-9]*\.?[0-9]+)\s*(in|cm|mm|pt|pc|px)?$/i;
|
|
const m = value.trim().match(regex);
|
|
if (!m) return null;
|
|
const num = parseFloat(m[1]);
|
|
const unit = m[2]?.toLowerCase() || 'px';
|
|
return { num, unit };
|
|
};
|
|
|
|
const dimensionAttributeInPixels = (value: string) => {
|
|
const parsed = parseDimensionAttribute(value);
|
|
if (!parsed) {
|
|
return null;
|
|
} else {
|
|
switch (parsed.unit) {
|
|
case 'px':
|
|
return parsed.num;
|
|
case 'in':
|
|
return parsed.num * 96;
|
|
case 'cm':
|
|
return parsed.num * 96 / 2.54;
|
|
case 'mm':
|
|
return parsed.num * 96 / 25.4;
|
|
case 'pt':
|
|
return parsed.num * 96 / 72;
|
|
case 'pc':
|
|
return parsed.num * 16;
|
|
default:
|
|
return null;
|
|
}
|
|
}
|
|
};
|
|
|
|
// Currently this function only fix the width and height attributes: those should be specified in
|
|
// pixels, however certain application (such as Evernote) occasionally specify them in inches. When
|
|
// that happens, and we import it, Electron is going to ignore the unit and assume pixels. So "1in"
|
|
// becomes 1 pixel. So the function below is used to convert those invalid values to actual pixel
|
|
// values by converting them properly.
|
|
//
|
|
// Currently only used in import-enex-html-gen.js and tested there.
|
|
//
|
|
// Ref: https://html.spec.whatwg.org/multipage/embedded-content-other.html#dimension-attributes
|
|
export const fixAttributes = (attributes: Record<string, string>) => {
|
|
const output: Record<string, string> = {};
|
|
for (const [keyRaw, value] of Object.entries(attributes)) {
|
|
const key = keyRaw.toLowerCase();
|
|
let finalValue = value;
|
|
|
|
if (key === 'width' || key === 'height') {
|
|
const pixelValue = dimensionAttributeInPixels(value);
|
|
|
|
if (pixelValue === null) {
|
|
// Skip if the value can't be parsed, which means the image will display at its real
|
|
// size. Better than letting bad values go through as it may cause rendering issues.
|
|
continue;
|
|
}
|
|
|
|
finalValue = pixelValue.toString();
|
|
}
|
|
|
|
output[keyRaw] = finalValue;
|
|
}
|
|
|
|
return output;
|
|
};
|