mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-24 10:27:10 +02:00
parent
0c50a5ab9b
commit
c0bc4c38c3
@ -32,6 +32,15 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function escapeHtml(s) {
|
||||||
|
return s
|
||||||
|
.replace(/&/g, '&')
|
||||||
|
.replace(/</g, '<')
|
||||||
|
.replace(/>/g, '>')
|
||||||
|
.replace(/"/g, '"')
|
||||||
|
.replace(/'/g, ''');
|
||||||
|
}
|
||||||
|
|
||||||
function pageTitle() {
|
function pageTitle() {
|
||||||
const titleElements = document.getElementsByTagName('title');
|
const titleElements = document.getElementsByTagName('title');
|
||||||
if (titleElements.length) return titleElements[0].text.trim();
|
if (titleElements.length) return titleElements[0].text.trim();
|
||||||
@ -204,6 +213,16 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nodeName === 'embed') {
|
||||||
|
const src = absoluteUrl(node.src);
|
||||||
|
node.setAttribute('src', src);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nodeName === 'object') {
|
||||||
|
const data = absoluteUrl(node.data);
|
||||||
|
node.setAttribute('data', data);
|
||||||
|
}
|
||||||
|
|
||||||
cleanUpElement(convertToMarkup, node, imageSizes, imageIndexes);
|
cleanUpElement(convertToMarkup, node, imageSizes, imageIndexes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -317,6 +336,9 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
function readabilityProcess() {
|
function readabilityProcess() {
|
||||||
|
|
||||||
|
if (isPagePdf()) throw new Error('Could not parse PDF document with Readability');
|
||||||
|
|
||||||
// eslint-disable-next-line no-undef
|
// eslint-disable-next-line no-undef
|
||||||
const readability = new Readability(documentForReadability());
|
const readability = new Readability(documentForReadability());
|
||||||
const article = readability.parse();
|
const article = readability.parse();
|
||||||
@ -329,6 +351,14 @@
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isPagePdf() {
|
||||||
|
return document.contentType == 'application/pdf';
|
||||||
|
}
|
||||||
|
|
||||||
|
function embedPageUrl() {
|
||||||
|
return `<embed src="${escapeHtml(window.location.href)}" type="${escapeHtml(document.contentType)}" />`;
|
||||||
|
}
|
||||||
|
|
||||||
async function prepareCommandResponse(command) {
|
async function prepareCommandResponse(command) {
|
||||||
console.info(`Got command: ${command.name}`);
|
console.info(`Got command: ${command.name}`);
|
||||||
const shouldSendToJoplin = !!command.shouldSendToJoplin;
|
const shouldSendToJoplin = !!command.shouldSendToJoplin;
|
||||||
@ -375,6 +405,10 @@
|
|||||||
|
|
||||||
} else if (command.name === 'completePageHtml') {
|
} else if (command.name === 'completePageHtml') {
|
||||||
|
|
||||||
|
if (isPagePdf()) {
|
||||||
|
return clippedContentResponse(pageTitle(), embedPageUrl(), getImageSizes(document), getAnchorNames(document));
|
||||||
|
}
|
||||||
|
|
||||||
hardcodePreStyles(document);
|
hardcodePreStyles(document);
|
||||||
addSvgClass(document);
|
addSvgClass(document);
|
||||||
preProcessDocument(document);
|
preProcessDocument(document);
|
||||||
|
@ -2,17 +2,20 @@ const TurndownService = require('@joplin/turndown');
|
|||||||
const turndownPluginGfm = require('@joplin/turndown-plugin-gfm').gfm;
|
const turndownPluginGfm = require('@joplin/turndown-plugin-gfm').gfm;
|
||||||
import markdownUtils from './markdownUtils';
|
import markdownUtils from './markdownUtils';
|
||||||
|
|
||||||
|
const pdfUrlRegex = /[\s\S]*?\.pdf$/i;
|
||||||
|
|
||||||
export interface ParseOptions {
|
export interface ParseOptions {
|
||||||
anchorNames?: string[];
|
anchorNames?: string[];
|
||||||
preserveImageTagsWithSize?: boolean;
|
preserveImageTagsWithSize?: boolean;
|
||||||
baseUrl?: string;
|
baseUrl?: string;
|
||||||
disableEscapeContent?: boolean;
|
disableEscapeContent?: boolean;
|
||||||
|
convertEmbeddedPdfsToLinks?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default class HtmlToMd {
|
export default class HtmlToMd {
|
||||||
|
|
||||||
public parse(html: string, options: ParseOptions = {}) {
|
public parse(html: string, options: ParseOptions = {}) {
|
||||||
const turndown = new TurndownService({
|
const turndownOpts: any = {
|
||||||
headingStyle: 'atx',
|
headingStyle: 'atx',
|
||||||
anchorNames: options.anchorNames ? options.anchorNames.map(n => n.trim().toLowerCase()) : [],
|
anchorNames: options.anchorNames ? options.anchorNames.map(n => n.trim().toLowerCase()) : [],
|
||||||
codeBlockStyle: 'fenced',
|
codeBlockStyle: 'fenced',
|
||||||
@ -22,10 +25,36 @@ export default class HtmlToMd {
|
|||||||
strongDelimiter: '**',
|
strongDelimiter: '**',
|
||||||
br: '',
|
br: '',
|
||||||
disableEscapeContent: 'disableEscapeContent' in options ? options.disableEscapeContent : false,
|
disableEscapeContent: 'disableEscapeContent' in options ? options.disableEscapeContent : false,
|
||||||
});
|
};
|
||||||
|
if (options.convertEmbeddedPdfsToLinks) {
|
||||||
|
// Turndown ignores empty <object> tags, so we need to handle this case seperately
|
||||||
|
// https://github.com/mixmark-io/turndown/issues/293#issuecomment-588984202
|
||||||
|
turndownOpts.blankReplacement = (content: string, node: any) => {
|
||||||
|
if (node.matches('object')) {
|
||||||
|
return pdfRule.replacement(content, node, {});
|
||||||
|
}
|
||||||
|
return '\n\n';
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const turndown = new TurndownService(turndownOpts);
|
||||||
turndown.use(turndownPluginGfm);
|
turndown.use(turndownPluginGfm);
|
||||||
turndown.remove('script');
|
turndown.remove('script');
|
||||||
turndown.remove('style');
|
turndown.remove('style');
|
||||||
|
const pdfRule = {
|
||||||
|
filter: ['embed', 'object'],
|
||||||
|
replacement: function(_content: string, node: any, _options: any) {
|
||||||
|
// We are setting embedded_pdf as name so that we can later distingish them from normal links and create resources for them.
|
||||||
|
if (node.matches('embed') && node.getAttribute('src') && pdfUrlRegex.test(node.getAttribute('src'))) {
|
||||||
|
return `[embedded_pdf](${node.getAttribute('src')})`;
|
||||||
|
} else if (node.matches('object') && node.getAttribute('data') && pdfUrlRegex.test(node.getAttribute('data'))) {
|
||||||
|
return `[embedded_pdf](${node.getAttribute('data')})`;
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
},
|
||||||
|
};
|
||||||
|
if (options.convertEmbeddedPdfsToLinks) {
|
||||||
|
turndown.addRule('pdf', pdfRule);
|
||||||
|
}
|
||||||
let md = turndown.turndown(html);
|
let md = turndown.turndown(html);
|
||||||
if (options.baseUrl) md = markdownUtils.prependBaseUrl(md, options.baseUrl);
|
if (options.baseUrl) md = markdownUtils.prependBaseUrl(md, options.baseUrl);
|
||||||
return md;
|
return md;
|
||||||
|
@ -7,6 +7,9 @@ const { escapeHtml } = require('./string-utils.js');
|
|||||||
// https://stackoverflow.com/a/16119722/561309
|
// https://stackoverflow.com/a/16119722/561309
|
||||||
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
||||||
const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
||||||
|
const embedRegex = /<embed([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
||||||
|
const objectRegex = /<object([\s\S]*?)data=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
||||||
|
const pdfUrlRegex = /[\s\S]*?\.pdf$/i;
|
||||||
|
|
||||||
const selfClosingElements = [
|
const selfClosingElements = [
|
||||||
'area',
|
'area',
|
||||||
@ -61,6 +64,11 @@ class HtmlUtils {
|
|||||||
return this.extractUrls(imageRegex, html);
|
return this.extractUrls(imageRegex, html);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the **encoded** URLs, so to be useful they should be decoded again before use.
|
||||||
|
public extractPdfUrls(html: string) {
|
||||||
|
return [...this.extractUrls(embedRegex, html), ...this.extractUrls(objectRegex, html)].filter(url => pdfUrlRegex.test(url));
|
||||||
|
}
|
||||||
|
|
||||||
// Returns the **encoded** URLs, so to be useful they should be decoded again before use.
|
// Returns the **encoded** URLs, so to be useful they should be decoded again before use.
|
||||||
public extractAnchorUrls(html: string) {
|
public extractAnchorUrls(html: string) {
|
||||||
return this.extractUrls(anchorRegex, html);
|
return this.extractUrls(anchorRegex, html);
|
||||||
@ -87,6 +95,27 @@ class HtmlUtils {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public replaceEmbedUrls(html: string, callback: Function) {
|
||||||
|
if (!html) return '';
|
||||||
|
// We are adding the link as <a> since joplin disabled <embed>, <object> tags due to security reasons.
|
||||||
|
// See: CVE-2020-15930
|
||||||
|
html = html.replace(embedRegex, (_v: string, _before: string, src: string, _after: string) => {
|
||||||
|
const link = callback(src);
|
||||||
|
return `<a href="${link}">${escapeHtml(src)}</a>`;
|
||||||
|
});
|
||||||
|
html = html.replace(objectRegex, (_v: string, _before: string, src: string, _after: string) => {
|
||||||
|
const link = callback(src);
|
||||||
|
return `<a href="${link}">${escapeHtml(src)}</a>`;
|
||||||
|
});
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public replaceMediaUrls(html: string, callback: Function) {
|
||||||
|
html = this.replaceImageUrls(html, callback);
|
||||||
|
html = this.replaceEmbedUrls(html, callback);
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
// Note that the URLs provided by this function are URL-encoded, which is
|
// Note that the URLs provided by this function are URL-encoded, which is
|
||||||
// usually what you want for web URLs. But if they are file:// URLs and the
|
// usually what you want for web URLs. But if they are file:// URLs and the
|
||||||
// file path is going to be used, it will need to be unescaped first. The
|
// file path is going to be used, it will need to be unescaped first. The
|
||||||
|
@ -69,7 +69,7 @@ const markdownUtils = {
|
|||||||
},
|
},
|
||||||
|
|
||||||
// Returns the **encoded** URLs, so to be useful they should be decoded again before use.
|
// Returns the **encoded** URLs, so to be useful they should be decoded again before use.
|
||||||
extractFileUrls(md: string, onlyImage: boolean = false): Array<string> {
|
extractFileUrls(md: string, onlyType: string = null): Array<string> {
|
||||||
const markdownIt = new MarkdownIt();
|
const markdownIt = new MarkdownIt();
|
||||||
markdownIt.validateLink = validateLinks; // Necessary to support file:/// links
|
markdownIt.validateLink = validateLinks; // Necessary to support file:/// links
|
||||||
|
|
||||||
@ -77,10 +77,16 @@ const markdownUtils = {
|
|||||||
const tokens = markdownIt.parse(md, env);
|
const tokens = markdownIt.parse(md, env);
|
||||||
const output: string[] = [];
|
const output: string[] = [];
|
||||||
|
|
||||||
|
let linkType = onlyType;
|
||||||
|
if (linkType === 'pdf') linkType = 'link_open';
|
||||||
|
|
||||||
const searchUrls = (tokens: any[]) => {
|
const searchUrls = (tokens: any[]) => {
|
||||||
for (let i = 0; i < tokens.length; i++) {
|
for (let i = 0; i < tokens.length; i++) {
|
||||||
const token = tokens[i];
|
const token = tokens[i];
|
||||||
if ((onlyImage === true && token.type === 'image') || (onlyImage === false && (token.type === 'image' || token.type === 'link_open'))) {
|
if ((!onlyType && (token.type === 'link_open' || token.type === 'image')) || (!!onlyType && token.type === onlyType) || (onlyType == 'pdf' && token.type === 'link_open')) {
|
||||||
|
// Pdf embeds are a special case, they are represented as 'link_open' tokens but are marked with 'embedded_pdf' as link name by the parser
|
||||||
|
// We are making sure if its in the proper pdf link format, only then we add it to the list
|
||||||
|
if (onlyType === 'pdf' && !(tokens.length > i + 1 && tokens[i + 1].type === 'text' && tokens[i + 1].content === 'embedded_pdf')) continue;
|
||||||
for (let j = 0; j < token.attrs.length; j++) {
|
for (let j = 0; j < token.attrs.length; j++) {
|
||||||
const a = token.attrs[j];
|
const a = token.attrs[j];
|
||||||
if ((a[0] === 'src' || a[0] === 'href') && a.length >= 2 && a[1]) {
|
if ((a[0] === 'src' || a[0] === 'href') && a.length >= 2 && a[1]) {
|
||||||
@ -107,7 +113,11 @@ const markdownUtils = {
|
|||||||
},
|
},
|
||||||
|
|
||||||
extractImageUrls(md: string) {
|
extractImageUrls(md: string) {
|
||||||
return markdownUtils.extractFileUrls(md,true);
|
return markdownUtils.extractFileUrls(md, 'image');
|
||||||
|
},
|
||||||
|
|
||||||
|
extractPdfUrls(md: string) {
|
||||||
|
return markdownUtils.extractFileUrls(md, 'pdf');
|
||||||
},
|
},
|
||||||
|
|
||||||
// The match results has 5 items
|
// The match results has 5 items
|
||||||
|
@ -28,6 +28,17 @@ export class MarkupLanguageUtils {
|
|||||||
return urls;
|
return urls;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public extractPdfUrls(language: MarkupLanguage, text: string): string[] {
|
||||||
|
let urls: string[] = [];
|
||||||
|
if (language === MarkupLanguage.Any) {
|
||||||
|
urls = urls.concat(this.lib_(MarkupLanguage.Markdown).extractPdfUrls(text));
|
||||||
|
urls = urls.concat(this.lib_(MarkupLanguage.Html).extractPdfUrls(text));
|
||||||
|
} else {
|
||||||
|
urls = this.lib_(language).extractPdfUrls(text);
|
||||||
|
}
|
||||||
|
return urls;
|
||||||
|
}
|
||||||
|
|
||||||
// Create a new MarkupToHtml instance while injecting options specific to Joplin
|
// Create a new MarkupToHtml instance while injecting options specific to Joplin
|
||||||
// desktop and mobile applications.
|
// desktop and mobile applications.
|
||||||
public newMarkupToHtml(_plugins: PluginStates = null, options: Options = null) {
|
public newMarkupToHtml(_plugins: PluginStates = null, options: Options = null) {
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import { PaginationOrderDir } from '../../models/utils/types';
|
import { PaginationOrderDir } from '../../models/utils/types';
|
||||||
import Api, { RequestMethod } from '../../services/rest/Api';
|
import Api, { RequestMethod } from '../../services/rest/Api';
|
||||||
|
import { extractMediaUrls } from './routes/notes';
|
||||||
import shim from '../../shim';
|
import shim from '../../shim';
|
||||||
import { setupDatabaseAndSynchronizer, switchClient, checkThrowAsync, db, msleep, supportDir } from '../../testing/test-utils';
|
import { setupDatabaseAndSynchronizer, switchClient, checkThrowAsync, db, msleep, supportDir } from '../../testing/test-utils';
|
||||||
import Folder from '../../models/Folder';
|
import Folder from '../../models/Folder';
|
||||||
@ -9,6 +10,7 @@ import Tag from '../../models/Tag';
|
|||||||
import NoteTag from '../../models/NoteTag';
|
import NoteTag from '../../models/NoteTag';
|
||||||
import ResourceService from '../../services/ResourceService';
|
import ResourceService from '../../services/ResourceService';
|
||||||
import SearchEngine from '../../services/searchengine/SearchEngine';
|
import SearchEngine from '../../services/searchengine/SearchEngine';
|
||||||
|
const { MarkupToHtml } = require('@joplin/renderer');
|
||||||
import { ResourceEntity } from '../database/types';
|
import { ResourceEntity } from '../database/types';
|
||||||
|
|
||||||
const createFolderForPagination = async (num: number, time: number) => {
|
const createFolderForPagination = async (num: number, time: number) => {
|
||||||
@ -452,6 +454,47 @@ describe('services_rest_Api', function() {
|
|||||||
expect(response.body).toBe('**Bold text**');
|
expect(response.body).toBe('**Bold text**');
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
it('should extract media urls from body', (() => {
|
||||||
|
const tests = [
|
||||||
|
{
|
||||||
|
language: MarkupToHtml.MARKUP_LANGUAGE_HTML,
|
||||||
|
body: '<div> <img src="https://example.com/img.png" /> <embed src="https://example.com/sample.pdf"/> <object data="https://example.com/file.PDF"></object> </div>',
|
||||||
|
result: ['https://example.com/img.png', 'https://example.com/sample.pdf', 'https://example.com/file.PDF'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
language: MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN,
|
||||||
|
body: 'test text \n ![img 1](https://example.com/img1.png) [embedded_pdf](https://example.com/sample1.pdf) [embedded_pdf](https://example.com/file.PDF)',
|
||||||
|
result: ['https://example.com/img1.png', 'https://example.com/sample1.pdf', 'https://example.com/file.PDF'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
language: MarkupToHtml.MARKUP_LANGUAGE_HTML,
|
||||||
|
body: '<div> <embed src="https://example.com/sample"/> <embed /> <object data="https://example.com/file.pdfff"></object> <a href="https://test.com/file.pdf">Link</a> </div>',
|
||||||
|
result: [],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
tests.forEach((test) => {
|
||||||
|
const urls = extractMediaUrls(test.language, test.body);
|
||||||
|
expect(urls).toEqual(test.result);
|
||||||
|
});
|
||||||
|
}));
|
||||||
|
|
||||||
|
it('should create notes with pdf embeds', (async () => {
|
||||||
|
let response = null;
|
||||||
|
const f = await Folder.save({ title: 'pdf test1' });
|
||||||
|
|
||||||
|
response = await api.route(RequestMethod.POST, 'notes', null, JSON.stringify({
|
||||||
|
title: 'testing PDF embeds',
|
||||||
|
parent_id: f.id,
|
||||||
|
body_html: `<div> <embed src="file://${supportDir}/welcome.pdf" type="application/pdf" /> </div>`,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const resources = await Resource.all();
|
||||||
|
expect(resources.length).toBe(1);
|
||||||
|
|
||||||
|
const resource = resources[0];
|
||||||
|
expect(response.body.indexOf(resource.id) >= 0).toBe(true);
|
||||||
|
}));
|
||||||
|
|
||||||
it('should handle tokens', (async () => {
|
it('should handle tokens', (async () => {
|
||||||
api = new Api('mytoken');
|
api = new Api('mytoken');
|
||||||
|
|
||||||
|
@ -89,6 +89,7 @@ async function requestNoteToNote(requestNote: any) {
|
|||||||
output.body = await htmlToMdParser().parse(`<div>${requestNote.body_html}</div>`, {
|
output.body = await htmlToMdParser().parse(`<div>${requestNote.body_html}</div>`, {
|
||||||
baseUrl: baseUrl,
|
baseUrl: baseUrl,
|
||||||
anchorNames: requestNote.anchor_names ? requestNote.anchor_names : [],
|
anchorNames: requestNote.anchor_names ? requestNote.anchor_names : [],
|
||||||
|
convertEmbeddedPdfsToLinks: true,
|
||||||
});
|
});
|
||||||
output.markup_language = MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN;
|
output.markup_language = MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN;
|
||||||
}
|
}
|
||||||
@ -143,19 +144,20 @@ async function buildNoteStyleSheet(stylesheets: any[]) {
|
|||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function tryToGuessImageExtFromMimeType(response: any, imagePath: string) {
|
async function tryToGuessExtFromMimeType(response: any, mediaPath: string) {
|
||||||
const mimeType = mimeTypeFromHeaders(response.headers);
|
const mimeType = mimeTypeFromHeaders(response.headers);
|
||||||
if (!mimeType) return imagePath;
|
if (!mimeType) return mediaPath;
|
||||||
|
|
||||||
const newExt = mimeUtils.toFileExtension(mimeType);
|
const newExt = mimeUtils.toFileExtension(mimeType);
|
||||||
if (!newExt) return imagePath;
|
if (!newExt) return mediaPath;
|
||||||
|
|
||||||
const newImagePath = `${imagePath}.${newExt}`;
|
const newMediaPath = `${mediaPath}.${newExt}`;
|
||||||
await shim.fsDriver().move(imagePath, newImagePath);
|
await shim.fsDriver().move(mediaPath, newMediaPath);
|
||||||
return newImagePath;
|
return newMediaPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function downloadImage(url: string /* , allowFileProtocolImages */) {
|
async function downloadMediaFile(url: string /* , allowFileProtocolImages */) {
|
||||||
|
|
||||||
const tempDir = Setting.value('tempDir');
|
const tempDir = Setting.value('tempDir');
|
||||||
|
|
||||||
// The URL we get to download have been extracted from the Markdown document
|
// The URL we get to download have been extracted from the Markdown document
|
||||||
@ -163,6 +165,12 @@ async function downloadImage(url: string /* , allowFileProtocolImages */) {
|
|||||||
|
|
||||||
const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0;
|
const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0;
|
||||||
|
|
||||||
|
// PDFs and other heavy resoucres are often served as seperate files insted of data urls, its very unlikely to encounter a pdf as a data url
|
||||||
|
if (isDataUrl && !url.toLowerCase().startsWith('data:image/')) {
|
||||||
|
reg.logger().warn(`Resources in data URL format is only supported for images ${url}`);
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url);
|
const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url);
|
||||||
let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase());
|
let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase());
|
||||||
if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it.
|
if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it.
|
||||||
@ -170,38 +178,38 @@ async function downloadImage(url: string /* , allowFileProtocolImages */) {
|
|||||||
|
|
||||||
// Append a UUID because simply checking if the file exists is not enough since
|
// Append a UUID because simply checking if the file exists is not enough since
|
||||||
// multiple resources can be downloaded at the same time (race condition).
|
// multiple resources can be downloaded at the same time (race condition).
|
||||||
let imagePath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`;
|
let mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (isDataUrl) {
|
if (isDataUrl) {
|
||||||
await shim.imageFromDataUrl(url, imagePath);
|
await shim.imageFromDataUrl(url, mediaPath);
|
||||||
} else if (urlUtils.urlProtocol(url).toLowerCase() === 'file:') {
|
} else if (urlUtils.urlProtocol(url).toLowerCase() === 'file:') {
|
||||||
// Can't think of any reason to disallow this at this point
|
// Can't think of any reason to disallow this at this point
|
||||||
// if (!allowFileProtocolImages) throw new Error('For security reasons, this URL with file:// protocol cannot be downloaded');
|
// if (!allowFileProtocolImages) throw new Error('For security reasons, this URL with file:// protocol cannot be downloaded');
|
||||||
const localPath = fileUriToPath(url);
|
const localPath = fileUriToPath(url);
|
||||||
await shim.fsDriver().copy(localPath, imagePath);
|
await shim.fsDriver().copy(localPath, mediaPath);
|
||||||
} else {
|
} else {
|
||||||
const response = await shim.fetchBlob(url, { path: imagePath, maxRetry: 1 });
|
const response = await shim.fetchBlob(url, { path: mediaPath, maxRetry: 1 });
|
||||||
|
|
||||||
// If we could not find the file extension from the URL, try to get it
|
// If we could not find the file extension from the URL, try to get it
|
||||||
// now based on the Content-Type header.
|
// now based on the Content-Type header.
|
||||||
if (!fileExt) imagePath = await tryToGuessImageExtFromMimeType(response, imagePath);
|
if (!fileExt) mediaPath = await tryToGuessExtFromMimeType(response, mediaPath);
|
||||||
}
|
}
|
||||||
return imagePath;
|
return mediaPath;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
reg.logger().warn(`Cannot download image at ${url}`, error);
|
reg.logger().warn(`Cannot download image at ${url}`, error);
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function downloadImages(urls: string[] /* , allowFileProtocolImages:boolean */) {
|
async function downloadMediaFiles(urls: string[] /* , allowFileProtocolImages:boolean */) {
|
||||||
const PromisePool = require('es6-promise-pool');
|
const PromisePool = require('es6-promise-pool');
|
||||||
|
|
||||||
const output: any = {};
|
const output: any = {};
|
||||||
|
|
||||||
const downloadOne = async (url: string) => {
|
const downloadOne = async (url: string) => {
|
||||||
const imagePath = await downloadImage(url); // , allowFileProtocolImages);
|
const mediaPath = await downloadMediaFile(url); // , allowFileProtocolImages);
|
||||||
if (imagePath) output[url] = { path: imagePath, originalUrl: url };
|
if (mediaPath) output[url] = { path: mediaPath, originalUrl: url };
|
||||||
};
|
};
|
||||||
|
|
||||||
let urlIndex = 0;
|
let urlIndex = 0;
|
||||||
@ -245,27 +253,38 @@ async function removeTempFiles(urls: string[]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function replaceImageUrlsByResources(markupLanguage: number, md: string, urls: any, imageSizes: any) {
|
function replaceUrlsByResources(markupLanguage: number, md: string, urls: any, imageSizes: any) {
|
||||||
const imageSizesIndexes: any = {};
|
const imageSizesIndexes: any = {};
|
||||||
|
|
||||||
if (markupLanguage === MarkupToHtml.MARKUP_LANGUAGE_HTML) {
|
if (markupLanguage === MarkupToHtml.MARKUP_LANGUAGE_HTML) {
|
||||||
return htmlUtils.replaceImageUrls(md, (imageUrl: string) => {
|
return htmlUtils.replaceMediaUrls(md, (url: string) => {
|
||||||
const urlInfo: any = urls[imageUrl];
|
const urlInfo: any = urls[url];
|
||||||
if (!urlInfo || !urlInfo.resource) return imageUrl;
|
if (!urlInfo || !urlInfo.resource) return url;
|
||||||
return Resource.internalUrl(urlInfo.resource);
|
return Resource.internalUrl(urlInfo.resource);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// eslint-disable-next-line no-useless-escape
|
// eslint-disable-next-line no-useless-escape
|
||||||
return md.replace(/(!\[.*?\]\()([^\s\)]+)(.*?\))/g, (_match: any, before: string, imageUrl: string, after: string) => {
|
return md.replace(/(!?\[.*?\]\()([^\s\)]+)(.*?\))/g, (_match: any, before: string, url: string, after: string) => {
|
||||||
const urlInfo = urls[imageUrl];
|
let type = 'link';
|
||||||
if (!urlInfo || !urlInfo.resource) return before + imageUrl + after;
|
if (before.startsWith('[embedded_pdf]')) {
|
||||||
if (!(urlInfo.originalUrl in imageSizesIndexes)) imageSizesIndexes[urlInfo.originalUrl] = 0;
|
type = 'pdf';
|
||||||
|
} else if (before.startsWith('![')) {
|
||||||
|
type = 'image';
|
||||||
|
}
|
||||||
|
|
||||||
|
const urlInfo = urls[url];
|
||||||
|
if (type === 'link' || !urlInfo || !urlInfo.resource) return before + url + after;
|
||||||
|
|
||||||
const resourceUrl = Resource.internalUrl(urlInfo.resource);
|
const resourceUrl = Resource.internalUrl(urlInfo.resource);
|
||||||
const imageSizesCollection = imageSizes[urlInfo.originalUrl];
|
if (type === 'pdf') {
|
||||||
|
return `[${markdownUtils.escapeLinkUrl(url)}](${resourceUrl}${after}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(urlInfo.originalUrl in imageSizesIndexes)) imageSizesIndexes[urlInfo.originalUrl] = 0;
|
||||||
|
const imageSizesCollection = imageSizes[urlInfo.originalUrl];
|
||||||
if (!imageSizesCollection) {
|
if (!imageSizesCollection) {
|
||||||
// In some cases, we won't find the image size information for that particular URL. Normally
|
// Either its not an image or we don't know the size of the image
|
||||||
|
// In some cases, we won't find the image size information for that particular image URL. Normally
|
||||||
// it will only happen when using the "Clip simplified page" feature, which can modify the
|
// it will only happen when using the "Clip simplified page" feature, which can modify the
|
||||||
// image URLs (for example it will select a smaller size resolution). In that case, it's
|
// image URLs (for example it will select a smaller size resolution). In that case, it's
|
||||||
// fine to return the image as-is because it has already good dimensions.
|
// fine to return the image as-is because it has already good dimensions.
|
||||||
@ -284,6 +303,13 @@ function replaceImageUrlsByResources(markupLanguage: number, md: string, urls: a
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function extractMediaUrls(markupLanguage: number, text: string): string[] {
|
||||||
|
const urls: string[] = [];
|
||||||
|
urls.push(...ArrayUtils.unique(markupLanguageUtils.extractImageUrls(markupLanguage, text)));
|
||||||
|
urls.push(...ArrayUtils.unique(markupLanguageUtils.extractPdfUrls(markupLanguage, text)));
|
||||||
|
return urls;
|
||||||
|
}
|
||||||
|
|
||||||
// Note must have been saved first
|
// Note must have been saved first
|
||||||
async function attachImageFromDataUrl(note: any, imageDataUrl: string, cropRect: any) {
|
async function attachImageFromDataUrl(note: any, imageDataUrl: string, cropRect: any) {
|
||||||
const tempDir = Setting.value('tempDir');
|
const tempDir = Setting.value('tempDir');
|
||||||
@ -328,17 +354,17 @@ export default async function(request: Request, id: string = null, link: string
|
|||||||
|
|
||||||
let note: any = await requestNoteToNote(requestNote);
|
let note: any = await requestNoteToNote(requestNote);
|
||||||
|
|
||||||
const imageUrls = ArrayUtils.unique(markupLanguageUtils.extractImageUrls(note.markup_language, note.body));
|
const mediaUrls = extractMediaUrls(note.markup_language, note.body);
|
||||||
|
|
||||||
reg.logger().info(`Request (${requestId}): Downloading images: ${imageUrls.length}`);
|
reg.logger().info(`Request (${requestId}): Downloading media files: ${mediaUrls.length}`);
|
||||||
|
|
||||||
let result = await downloadImages(imageUrls); // , allowFileProtocolImages);
|
let result = await downloadMediaFiles(mediaUrls); // , allowFileProtocolImages);
|
||||||
|
|
||||||
reg.logger().info(`Request (${requestId}): Creating resources from paths: ${Object.getOwnPropertyNames(result).length}`);
|
reg.logger().info(`Request (${requestId}): Creating resources from paths: ${Object.getOwnPropertyNames(result).length}`);
|
||||||
|
|
||||||
result = await createResourcesFromPaths(result);
|
result = await createResourcesFromPaths(result);
|
||||||
await removeTempFiles(result);
|
await removeTempFiles(result);
|
||||||
note.body = replaceImageUrlsByResources(note.markup_language, note.body, result, imageSizes);
|
note.body = replaceUrlsByResources(note.markup_language, note.body, result, imageSizes);
|
||||||
|
|
||||||
reg.logger().info(`Request (${requestId}): Saving note...`);
|
reg.logger().info(`Request (${requestId}): Saving note...`);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user