You've already forked joplin
							
							
				mirror of
				https://github.com/laurent22/joplin.git
				synced 2025-10-31 00:07:48 +02:00 
			
		
		
		
	| @@ -32,6 +32,15 @@ | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	function escapeHtml(s) { | ||||
| 		return s | ||||
| 			.replace(/&/g, '&') | ||||
| 			.replace(/</g, '<') | ||||
| 			.replace(/>/g, '>') | ||||
| 			.replace(/"/g, '"') | ||||
| 			.replace(/'/g, '''); | ||||
| 	} | ||||
|  | ||||
| 	function pageTitle() { | ||||
| 		const titleElements = document.getElementsByTagName('title'); | ||||
| 		if (titleElements.length) return titleElements[0].text.trim(); | ||||
| @@ -204,6 +213,16 @@ | ||||
| 					} | ||||
| 				} | ||||
|  | ||||
| 				if (nodeName === 'embed') { | ||||
| 					const src = absoluteUrl(node.src); | ||||
| 					node.setAttribute('src', src); | ||||
| 				} | ||||
|  | ||||
| 				if (nodeName === 'object') { | ||||
| 					const data = absoluteUrl(node.data); | ||||
| 					node.setAttribute('data', data); | ||||
| 				} | ||||
|  | ||||
| 				cleanUpElement(convertToMarkup, node, imageSizes, imageIndexes); | ||||
| 			} | ||||
| 		} | ||||
| @@ -317,6 +336,9 @@ | ||||
| 	} | ||||
|  | ||||
| 	function readabilityProcess() { | ||||
|  | ||||
| 		if (isPagePdf()) throw new Error('Could not parse PDF document with Readability'); | ||||
|  | ||||
| 		// eslint-disable-next-line no-undef | ||||
| 		const readability = new Readability(documentForReadability()); | ||||
| 		const article = readability.parse(); | ||||
| @@ -329,6 +351,14 @@ | ||||
| 		}; | ||||
| 	} | ||||
|  | ||||
| 	function isPagePdf() { | ||||
| 		return document.contentType == 'application/pdf'; | ||||
| 	} | ||||
|  | ||||
| 	function embedPageUrl() { | ||||
| 		return `<embed src="${escapeHtml(window.location.href)}" type="${escapeHtml(document.contentType)}" />`; | ||||
| 	} | ||||
|  | ||||
| 	async function prepareCommandResponse(command) { | ||||
| 		console.info(`Got command: ${command.name}`); | ||||
| 		const shouldSendToJoplin = !!command.shouldSendToJoplin; | ||||
| @@ -375,6 +405,10 @@ | ||||
|  | ||||
| 		} else if (command.name === 'completePageHtml') { | ||||
|  | ||||
| 			if (isPagePdf()) { | ||||
| 				return clippedContentResponse(pageTitle(), embedPageUrl(), getImageSizes(document), getAnchorNames(document)); | ||||
| 			} | ||||
|  | ||||
| 			hardcodePreStyles(document); | ||||
| 			addSvgClass(document); | ||||
| 			preProcessDocument(document); | ||||
|   | ||||
| @@ -2,17 +2,20 @@ const TurndownService = require('@joplin/turndown'); | ||||
| const turndownPluginGfm = require('@joplin/turndown-plugin-gfm').gfm; | ||||
| import markdownUtils from './markdownUtils'; | ||||
|  | ||||
| const pdfUrlRegex = /[\s\S]*?\.pdf$/i; | ||||
|  | ||||
| export interface ParseOptions { | ||||
| 	anchorNames?: string[]; | ||||
| 	preserveImageTagsWithSize?: boolean; | ||||
| 	baseUrl?: string; | ||||
| 	disableEscapeContent?: boolean; | ||||
| 	convertEmbeddedPdfsToLinks?: boolean; | ||||
| } | ||||
|  | ||||
| export default class HtmlToMd { | ||||
|  | ||||
| 	public parse(html: string, options: ParseOptions = {}) { | ||||
| 		const turndown = new TurndownService({ | ||||
| 		const turndownOpts: any = { | ||||
| 			headingStyle: 'atx', | ||||
| 			anchorNames: options.anchorNames ? options.anchorNames.map(n => n.trim().toLowerCase()) : [], | ||||
| 			codeBlockStyle: 'fenced', | ||||
| @@ -22,10 +25,36 @@ export default class HtmlToMd { | ||||
| 			strongDelimiter: '**', | ||||
| 			br: '', | ||||
| 			disableEscapeContent: 'disableEscapeContent' in options ? options.disableEscapeContent : false, | ||||
| 		}); | ||||
| 		}; | ||||
| 		if (options.convertEmbeddedPdfsToLinks) { | ||||
| 			// Turndown ignores empty <object> tags, so we need to handle this case seperately | ||||
| 			// https://github.com/mixmark-io/turndown/issues/293#issuecomment-588984202 | ||||
| 			turndownOpts.blankReplacement = (content: string, node: any) => { | ||||
| 				if (node.matches('object')) { | ||||
| 					return pdfRule.replacement(content, node, {}); | ||||
| 				} | ||||
| 				return '\n\n'; | ||||
| 			}; | ||||
| 		} | ||||
| 		const turndown = new TurndownService(turndownOpts); | ||||
| 		turndown.use(turndownPluginGfm); | ||||
| 		turndown.remove('script'); | ||||
| 		turndown.remove('style'); | ||||
| 		const pdfRule = { | ||||
| 			filter: ['embed', 'object'], | ||||
| 			replacement: function(_content: string, node: any, _options: any) { | ||||
| 				// We are setting embedded_pdf as name so that we can later distingish them from normal links and create resources for them. | ||||
| 				if (node.matches('embed') && node.getAttribute('src') && pdfUrlRegex.test(node.getAttribute('src'))) { | ||||
| 					return `[embedded_pdf](${node.getAttribute('src')})`; | ||||
| 				} else if (node.matches('object') && node.getAttribute('data') && pdfUrlRegex.test(node.getAttribute('data'))) { | ||||
| 					return `[embedded_pdf](${node.getAttribute('data')})`; | ||||
| 				} | ||||
| 				return ''; | ||||
| 			}, | ||||
| 		}; | ||||
| 		if (options.convertEmbeddedPdfsToLinks) { | ||||
| 			turndown.addRule('pdf', pdfRule); | ||||
| 		} | ||||
| 		let md = turndown.turndown(html); | ||||
| 		if (options.baseUrl) md = markdownUtils.prependBaseUrl(md, options.baseUrl); | ||||
| 		return md; | ||||
|   | ||||
| @@ -7,6 +7,9 @@ const { escapeHtml } = require('./string-utils.js'); | ||||
| // https://stackoverflow.com/a/16119722/561309 | ||||
| const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi; | ||||
| const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi; | ||||
| const embedRegex = /<embed([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi; | ||||
| const objectRegex = /<object([\s\S]*?)data=["']([\s\S]*?)["']([\s\S]*?)>/gi; | ||||
| const pdfUrlRegex = /[\s\S]*?\.pdf$/i; | ||||
|  | ||||
| const selfClosingElements = [ | ||||
| 	'area', | ||||
| @@ -61,6 +64,11 @@ class HtmlUtils { | ||||
| 		return this.extractUrls(imageRegex, html); | ||||
| 	} | ||||
|  | ||||
| 	// Returns the **encoded** URLs, so to be useful they should be decoded again before use. | ||||
| 	public extractPdfUrls(html: string) { | ||||
| 		return [...this.extractUrls(embedRegex, html), ...this.extractUrls(objectRegex, html)].filter(url => pdfUrlRegex.test(url)); | ||||
| 	} | ||||
|  | ||||
| 	// Returns the **encoded** URLs, so to be useful they should be decoded again before use. | ||||
| 	public extractAnchorUrls(html: string) { | ||||
| 		return this.extractUrls(anchorRegex, html); | ||||
| @@ -87,6 +95,27 @@ class HtmlUtils { | ||||
| 		}); | ||||
| 	} | ||||
|  | ||||
| 	public replaceEmbedUrls(html: string, callback: Function) { | ||||
| 		if (!html) return ''; | ||||
| 		// We are adding the link as <a> since joplin disabled <embed>, <object> tags due to security reasons. | ||||
| 		// See: CVE-2020-15930 | ||||
| 		html = html.replace(embedRegex, (_v: string, _before: string, src: string, _after: string) => { | ||||
| 			const link = callback(src); | ||||
| 			return `<a href="${link}">${escapeHtml(src)}</a>`; | ||||
| 		}); | ||||
| 		html = html.replace(objectRegex, (_v: string, _before: string, src: string, _after: string) => { | ||||
| 			const link = callback(src); | ||||
| 			return `<a href="${link}">${escapeHtml(src)}</a>`; | ||||
| 		}); | ||||
| 		return html; | ||||
| 	} | ||||
|  | ||||
| 	public replaceMediaUrls(html: string, callback: Function) { | ||||
| 		html = this.replaceImageUrls(html, callback); | ||||
| 		html = this.replaceEmbedUrls(html, callback); | ||||
| 		return html; | ||||
| 	} | ||||
|  | ||||
| 	// Note that the URLs provided by this function are URL-encoded, which is | ||||
| 	// usually what you want for web URLs. But if they are file:// URLs and the | ||||
| 	// file path is going to be used, it will need to be unescaped first. The | ||||
|   | ||||
| @@ -69,7 +69,7 @@ const markdownUtils = { | ||||
| 	}, | ||||
|  | ||||
| 	// Returns the **encoded** URLs, so to be useful they should be decoded again before use. | ||||
| 	extractFileUrls(md: string, onlyImage: boolean = false): Array<string> { | ||||
| 	extractFileUrls(md: string, onlyType: string = null): Array<string> { | ||||
| 		const markdownIt = new MarkdownIt(); | ||||
| 		markdownIt.validateLink = validateLinks; // Necessary to support file:/// links | ||||
|  | ||||
| @@ -77,10 +77,16 @@ const markdownUtils = { | ||||
| 		const tokens = markdownIt.parse(md, env); | ||||
| 		const output: string[] = []; | ||||
|  | ||||
| 		let linkType = onlyType; | ||||
| 		if (linkType === 'pdf') linkType = 'link_open'; | ||||
|  | ||||
| 		const searchUrls = (tokens: any[]) => { | ||||
| 			for (let i = 0; i < tokens.length; i++) { | ||||
| 				const token = tokens[i]; | ||||
| 				if ((onlyImage === true && token.type === 'image') || (onlyImage === false && (token.type === 'image' || token.type === 'link_open'))) { | ||||
| 				if ((!onlyType && (token.type === 'link_open' || token.type === 'image')) || (!!onlyType && token.type === onlyType) || (onlyType == 'pdf' && token.type === 'link_open')) { | ||||
| 					// Pdf embeds are a special case, they are represented as 'link_open' tokens but are marked with 'embedded_pdf' as link name by the parser | ||||
| 					// We are making sure if its in the proper pdf link format, only then we add it to the list | ||||
| 					if (onlyType === 'pdf' && !(tokens.length > i + 1 && tokens[i + 1].type === 'text' && tokens[i + 1].content === 'embedded_pdf')) continue; | ||||
| 					for (let j = 0; j < token.attrs.length; j++) { | ||||
| 						const a = token.attrs[j]; | ||||
| 						if ((a[0] === 'src' || a[0] === 'href') && a.length >= 2 && a[1]) { | ||||
| @@ -107,7 +113,11 @@ const markdownUtils = { | ||||
| 	}, | ||||
|  | ||||
| 	extractImageUrls(md: string) { | ||||
| 		return markdownUtils.extractFileUrls(md,true); | ||||
| 		return markdownUtils.extractFileUrls(md, 'image'); | ||||
| 	}, | ||||
|  | ||||
| 	extractPdfUrls(md: string) { | ||||
| 		return markdownUtils.extractFileUrls(md, 'pdf'); | ||||
| 	}, | ||||
|  | ||||
| 	// The match results has 5 items | ||||
|   | ||||
| @@ -28,6 +28,17 @@ export class MarkupLanguageUtils { | ||||
| 		return urls; | ||||
| 	} | ||||
|  | ||||
| 	public extractPdfUrls(language: MarkupLanguage, text: string): string[] { | ||||
| 		let urls: string[] = []; | ||||
| 		if (language === MarkupLanguage.Any) { | ||||
| 			urls = urls.concat(this.lib_(MarkupLanguage.Markdown).extractPdfUrls(text)); | ||||
| 			urls = urls.concat(this.lib_(MarkupLanguage.Html).extractPdfUrls(text)); | ||||
| 		} else { | ||||
| 			urls = this.lib_(language).extractPdfUrls(text); | ||||
| 		} | ||||
| 		return urls; | ||||
| 	} | ||||
|  | ||||
| 	// Create a new MarkupToHtml instance while injecting options specific to Joplin | ||||
| 	// desktop and mobile applications. | ||||
| 	public newMarkupToHtml(_plugins: PluginStates = null, options: Options = null) { | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| import { PaginationOrderDir } from '../../models/utils/types'; | ||||
| import Api, { RequestMethod } from '../../services/rest/Api'; | ||||
| import { extractMediaUrls } from './routes/notes'; | ||||
| import shim from '../../shim'; | ||||
| import { setupDatabaseAndSynchronizer, switchClient, checkThrowAsync, db, msleep, supportDir } from '../../testing/test-utils'; | ||||
| import Folder from '../../models/Folder'; | ||||
| @@ -9,6 +10,7 @@ import Tag from '../../models/Tag'; | ||||
| import NoteTag from '../../models/NoteTag'; | ||||
| import ResourceService from '../../services/ResourceService'; | ||||
| import SearchEngine from '../../services/searchengine/SearchEngine'; | ||||
| const { MarkupToHtml } = require('@joplin/renderer'); | ||||
| import { ResourceEntity } from '../database/types'; | ||||
|  | ||||
| const createFolderForPagination = async (num: number, time: number) => { | ||||
| @@ -452,6 +454,47 @@ describe('services_rest_Api', function() { | ||||
| 		expect(response.body).toBe('**Bold text**'); | ||||
| 	})); | ||||
|  | ||||
| 	it('should extract media urls from body', (() => { | ||||
| 		const tests = [ | ||||
| 			{ | ||||
| 				language: MarkupToHtml.MARKUP_LANGUAGE_HTML, | ||||
| 				body: '<div> <img src="https://example.com/img.png" /> <embed src="https://example.com/sample.pdf"/> <object data="https://example.com/file.PDF"></object> </div>', | ||||
| 				result: ['https://example.com/img.png', 'https://example.com/sample.pdf', 'https://example.com/file.PDF'], | ||||
| 			}, | ||||
| 			{ | ||||
| 				language: MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN, | ||||
| 				body: 'test text \n  [embedded_pdf](https://example.com/sample1.pdf) [embedded_pdf](https://example.com/file.PDF)', | ||||
| 				result: ['https://example.com/img1.png', 'https://example.com/sample1.pdf', 'https://example.com/file.PDF'], | ||||
| 			}, | ||||
| 			{ | ||||
| 				language: MarkupToHtml.MARKUP_LANGUAGE_HTML, | ||||
| 				body: '<div> <embed src="https://example.com/sample"/> <embed /> <object data="https://example.com/file.pdfff"></object> <a href="https://test.com/file.pdf">Link</a> </div>', | ||||
| 				result: [], | ||||
| 			}, | ||||
| 		]; | ||||
| 		tests.forEach((test) => { | ||||
| 			const urls = extractMediaUrls(test.language, test.body); | ||||
| 			expect(urls).toEqual(test.result); | ||||
| 		}); | ||||
| 	})); | ||||
|  | ||||
| 	it('should create notes with pdf embeds', (async () => { | ||||
| 		let response = null; | ||||
| 		const f = await Folder.save({ title: 'pdf test1' }); | ||||
|  | ||||
| 		response = await api.route(RequestMethod.POST, 'notes', null, JSON.stringify({ | ||||
| 			title: 'testing PDF embeds', | ||||
| 			parent_id: f.id, | ||||
| 			body_html: `<div> <embed src="file://${supportDir}/welcome.pdf" type="application/pdf" /> </div>`, | ||||
| 		})); | ||||
|  | ||||
| 		const resources = await Resource.all(); | ||||
| 		expect(resources.length).toBe(1); | ||||
|  | ||||
| 		const resource = resources[0]; | ||||
| 		expect(response.body.indexOf(resource.id) >= 0).toBe(true); | ||||
| 	})); | ||||
|  | ||||
| 	it('should handle tokens', (async () => { | ||||
| 		api = new Api('mytoken'); | ||||
|  | ||||
|   | ||||
| @@ -89,6 +89,7 @@ async function requestNoteToNote(requestNote: any) { | ||||
| 			output.body = await htmlToMdParser().parse(`<div>${requestNote.body_html}</div>`, { | ||||
| 				baseUrl: baseUrl, | ||||
| 				anchorNames: requestNote.anchor_names ? requestNote.anchor_names : [], | ||||
| 				convertEmbeddedPdfsToLinks: true, | ||||
| 			}); | ||||
| 			output.markup_language = MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN; | ||||
| 		} | ||||
| @@ -143,19 +144,20 @@ async function buildNoteStyleSheet(stylesheets: any[]) { | ||||
| 	return output; | ||||
| } | ||||
|  | ||||
| async function tryToGuessImageExtFromMimeType(response: any, imagePath: string) { | ||||
| async function tryToGuessExtFromMimeType(response: any, mediaPath: string) { | ||||
| 	const mimeType = mimeTypeFromHeaders(response.headers); | ||||
| 	if (!mimeType) return imagePath; | ||||
| 	if (!mimeType) return mediaPath; | ||||
|  | ||||
| 	const newExt = mimeUtils.toFileExtension(mimeType); | ||||
| 	if (!newExt) return imagePath; | ||||
| 	if (!newExt) return mediaPath; | ||||
|  | ||||
| 	const newImagePath = `${imagePath}.${newExt}`; | ||||
| 	await shim.fsDriver().move(imagePath, newImagePath); | ||||
| 	return newImagePath; | ||||
| 	const newMediaPath = `${mediaPath}.${newExt}`; | ||||
| 	await shim.fsDriver().move(mediaPath, newMediaPath); | ||||
| 	return newMediaPath; | ||||
| } | ||||
|  | ||||
| async function downloadImage(url: string /* , allowFileProtocolImages */) { | ||||
| async function downloadMediaFile(url: string /* , allowFileProtocolImages */) { | ||||
|  | ||||
| 	const tempDir = Setting.value('tempDir'); | ||||
|  | ||||
| 	// The URL we get to download have been extracted from the Markdown document | ||||
| @@ -163,6 +165,12 @@ async function downloadImage(url: string /* , allowFileProtocolImages */) { | ||||
|  | ||||
| 	const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0; | ||||
|  | ||||
| 	// PDFs and other heavy resoucres are often served as seperate files insted of data urls, its very unlikely to encounter a pdf as a data url | ||||
| 	if (isDataUrl && !url.toLowerCase().startsWith('data:image/')) { | ||||
| 		reg.logger().warn(`Resources in data URL format is only supported for images ${url}`); | ||||
| 		return ''; | ||||
| 	} | ||||
|  | ||||
| 	const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url); | ||||
| 	let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase()); | ||||
| 	if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it. | ||||
| @@ -170,38 +178,38 @@ async function downloadImage(url: string /* , allowFileProtocolImages */) { | ||||
|  | ||||
| 	// Append a UUID because simply checking if the file exists is not enough since | ||||
| 	// multiple resources can be downloaded at the same time (race condition). | ||||
| 	let imagePath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`; | ||||
| 	let mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`; | ||||
|  | ||||
| 	try { | ||||
| 		if (isDataUrl) { | ||||
| 			await shim.imageFromDataUrl(url, imagePath); | ||||
| 			await shim.imageFromDataUrl(url, mediaPath); | ||||
| 		} else if (urlUtils.urlProtocol(url).toLowerCase() === 'file:') { | ||||
| 			// Can't think of any reason to disallow this at this point | ||||
| 			// if (!allowFileProtocolImages) throw new Error('For security reasons, this URL with file:// protocol cannot be downloaded'); | ||||
| 			const localPath = fileUriToPath(url); | ||||
| 			await shim.fsDriver().copy(localPath, imagePath); | ||||
| 			await shim.fsDriver().copy(localPath, mediaPath); | ||||
| 		} else { | ||||
| 			const response = await shim.fetchBlob(url, { path: imagePath, maxRetry: 1 }); | ||||
| 			const response = await shim.fetchBlob(url, { path: mediaPath, maxRetry: 1 }); | ||||
|  | ||||
| 			// If we could not find the file extension from the URL, try to get it | ||||
| 			// now based on the Content-Type header. | ||||
| 			if (!fileExt) imagePath = await tryToGuessImageExtFromMimeType(response, imagePath); | ||||
| 			if (!fileExt) mediaPath = await tryToGuessExtFromMimeType(response, mediaPath); | ||||
| 		} | ||||
| 		return imagePath; | ||||
| 		return mediaPath; | ||||
| 	} catch (error) { | ||||
| 		reg.logger().warn(`Cannot download image at ${url}`, error); | ||||
| 		return ''; | ||||
| 	} | ||||
| } | ||||
|  | ||||
| async function downloadImages(urls: string[] /* , allowFileProtocolImages:boolean */) { | ||||
| async function downloadMediaFiles(urls: string[] /* , allowFileProtocolImages:boolean */) { | ||||
| 	const PromisePool = require('es6-promise-pool'); | ||||
|  | ||||
| 	const output: any = {}; | ||||
|  | ||||
| 	const downloadOne = async (url: string) => { | ||||
| 		const imagePath = await downloadImage(url); // , allowFileProtocolImages); | ||||
| 		if (imagePath) output[url] = { path: imagePath, originalUrl: url }; | ||||
| 		const mediaPath = await downloadMediaFile(url); // , allowFileProtocolImages); | ||||
| 		if (mediaPath) output[url] = { path: mediaPath, originalUrl: url }; | ||||
| 	}; | ||||
|  | ||||
| 	let urlIndex = 0; | ||||
| @@ -245,27 +253,38 @@ async function removeTempFiles(urls: string[]) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| function replaceImageUrlsByResources(markupLanguage: number, md: string, urls: any, imageSizes: any) { | ||||
| function replaceUrlsByResources(markupLanguage: number, md: string, urls: any, imageSizes: any) { | ||||
| 	const imageSizesIndexes: any = {}; | ||||
|  | ||||
| 	if (markupLanguage === MarkupToHtml.MARKUP_LANGUAGE_HTML) { | ||||
| 		return htmlUtils.replaceImageUrls(md, (imageUrl: string) => { | ||||
| 			const urlInfo: any = urls[imageUrl]; | ||||
| 			if (!urlInfo || !urlInfo.resource) return imageUrl; | ||||
| 		return htmlUtils.replaceMediaUrls(md, (url: string) => { | ||||
| 			const urlInfo: any = urls[url]; | ||||
| 			if (!urlInfo || !urlInfo.resource) return url; | ||||
| 			return Resource.internalUrl(urlInfo.resource); | ||||
| 		}); | ||||
| 	} else { | ||||
| 		// eslint-disable-next-line no-useless-escape | ||||
| 		return md.replace(/(!\[.*?\]\()([^\s\)]+)(.*?\))/g, (_match: any, before: string, imageUrl: string, after: string) => { | ||||
| 			const urlInfo = urls[imageUrl]; | ||||
| 			if (!urlInfo || !urlInfo.resource) return before + imageUrl + after; | ||||
| 			if (!(urlInfo.originalUrl in imageSizesIndexes)) imageSizesIndexes[urlInfo.originalUrl] = 0; | ||||
| 		return md.replace(/(!?\[.*?\]\()([^\s\)]+)(.*?\))/g, (_match: any, before: string, url: string, after: string) => { | ||||
| 			let type = 'link'; | ||||
| 			if (before.startsWith('[embedded_pdf]')) { | ||||
| 				type = 'pdf'; | ||||
| 			} else if (before.startsWith('![')) { | ||||
| 				type = 'image'; | ||||
| 			} | ||||
|  | ||||
| 			const urlInfo = urls[url]; | ||||
| 			if (type === 'link' || !urlInfo || !urlInfo.resource) return before + url + after; | ||||
|  | ||||
| 			const resourceUrl = Resource.internalUrl(urlInfo.resource); | ||||
| 			const imageSizesCollection = imageSizes[urlInfo.originalUrl]; | ||||
| 			if (type === 'pdf') { | ||||
| 				return `[${markdownUtils.escapeLinkUrl(url)}](${resourceUrl}${after}`; | ||||
| 			} | ||||
|  | ||||
| 			if (!(urlInfo.originalUrl in imageSizesIndexes)) imageSizesIndexes[urlInfo.originalUrl] = 0; | ||||
| 			const imageSizesCollection = imageSizes[urlInfo.originalUrl]; | ||||
| 			if (!imageSizesCollection) { | ||||
| 				// In some cases, we won't find the image size information for that particular URL. Normally | ||||
| 				// Either its not an image or we don't know the size of the image | ||||
| 				// In some cases, we won't find the image size information for that particular image URL. Normally | ||||
| 				// it will only happen when using the "Clip simplified page" feature, which can modify the | ||||
| 				// image URLs (for example it will select a smaller size resolution). In that case, it's | ||||
| 				// fine to return the image as-is because it has already good dimensions. | ||||
| @@ -284,6 +303,13 @@ function replaceImageUrlsByResources(markupLanguage: number, md: string, urls: a | ||||
| 	} | ||||
| } | ||||
|  | ||||
| export function extractMediaUrls(markupLanguage: number, text: string): string[] { | ||||
| 	const urls: string[] = []; | ||||
| 	urls.push(...ArrayUtils.unique(markupLanguageUtils.extractImageUrls(markupLanguage, text))); | ||||
| 	urls.push(...ArrayUtils.unique(markupLanguageUtils.extractPdfUrls(markupLanguage, text))); | ||||
| 	return urls; | ||||
| } | ||||
|  | ||||
| // Note must have been saved first | ||||
| async function attachImageFromDataUrl(note: any, imageDataUrl: string, cropRect: any) { | ||||
| 	const tempDir = Setting.value('tempDir'); | ||||
| @@ -328,17 +354,17 @@ export default async function(request: Request, id: string = null, link: string | ||||
|  | ||||
| 		let note: any = await requestNoteToNote(requestNote); | ||||
|  | ||||
| 		const imageUrls = ArrayUtils.unique(markupLanguageUtils.extractImageUrls(note.markup_language, note.body)); | ||||
| 		const mediaUrls = extractMediaUrls(note.markup_language, note.body); | ||||
|  | ||||
| 		reg.logger().info(`Request (${requestId}): Downloading images: ${imageUrls.length}`); | ||||
| 		reg.logger().info(`Request (${requestId}): Downloading media files: ${mediaUrls.length}`); | ||||
|  | ||||
| 		let result = await downloadImages(imageUrls); // , allowFileProtocolImages); | ||||
| 		let result = await downloadMediaFiles(mediaUrls); // , allowFileProtocolImages); | ||||
|  | ||||
| 		reg.logger().info(`Request (${requestId}): Creating resources from paths: ${Object.getOwnPropertyNames(result).length}`); | ||||
|  | ||||
| 		result = await createResourcesFromPaths(result); | ||||
| 		await removeTempFiles(result); | ||||
| 		note.body = replaceImageUrlsByResources(note.markup_language, note.body, result, imageSizes); | ||||
| 		note.body = replaceUrlsByResources(note.markup_language, note.body, result, imageSizes); | ||||
|  | ||||
| 		reg.logger().info(`Request (${requestId}): Saving note...`); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user