mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-21 09:38:01 +02:00
515 lines
18 KiB
TypeScript
515 lines
18 KiB
TypeScript
import Setting from '../../../models/Setting';
|
|
import shim from '../../../shim';
|
|
import uuid from '../../../uuid';
|
|
import readonlyProperties from '../utils/readonlyProperties';
|
|
import defaultSaveOptions from '../utils/defaultSaveOptions';
|
|
import defaultAction from '../utils/defaultAction';
|
|
import BaseModel, { ModelType } from '../../../BaseModel';
|
|
import defaultLoadOptions from '../utils/defaultLoadOptions';
|
|
import { RequestMethod, Request } from '../Api';
|
|
import markdownUtils from '../../../markdownUtils';
|
|
import collectionToPaginatedResults from '../utils/collectionToPaginatedResults';
|
|
import Database from '../../../database';
|
|
import Folder from '../../../models/Folder';
|
|
import Note from '../../../models/Note';
|
|
import Tag from '../../../models/Tag';
|
|
import Resource from '../../../models/Resource';
|
|
import htmlUtils from '../../../htmlUtils';
|
|
import markupLanguageUtils from '../../../markupLanguageUtils';
|
|
const mimeUtils = require('../../../mime-utils.js').mime;
|
|
const md5 = require('md5');
|
|
import HtmlToMd from '../../../HtmlToMd';
|
|
const urlUtils = require('../../../urlUtils.js');
|
|
import * as ArrayUtils from '../../../ArrayUtils';
|
|
import Logger from '@joplin/utils/Logger';
|
|
const { mimeTypeFromHeaders } = require('../../../net-utils');
|
|
const { fileExtension, safeFileExtension, safeFilename, filename } = require('../../../path-utils');
|
|
const { MarkupToHtml } = require('@joplin/renderer');
|
|
const { ErrorNotFound } = require('../utils/errors');
|
|
import { fileUriToPath } from '@joplin/utils/url';
|
|
import { NoteEntity } from '../../database/types';
|
|
|
|
const logger = Logger.create('routes/notes');
|
|
|
|
let htmlToMdParser_: any = null;
|
|
|
|
function htmlToMdParser() {
|
|
if (htmlToMdParser_) return htmlToMdParser_;
|
|
htmlToMdParser_ = new HtmlToMd();
|
|
return htmlToMdParser_;
|
|
}
|
|
|
|
type RequestNote = {
|
|
id?: any;
|
|
parent_id?: string;
|
|
title: string;
|
|
body?: string;
|
|
latitude?: number;
|
|
longitude?: number;
|
|
altitude?: number;
|
|
author?: string;
|
|
source_url?: string;
|
|
is_todo?: number;
|
|
todo_due?: number;
|
|
todo_completed?: number;
|
|
user_updated_time?: number;
|
|
user_created_time?: number;
|
|
markup_language?: number;
|
|
body_html: string;
|
|
base_url?: string;
|
|
convert_to: string;
|
|
anchor_names?: any[];
|
|
image_sizes?: object;
|
|
stylesheets: any;
|
|
};
|
|
|
|
type FetchOptions = {
|
|
timeout?: number;
|
|
maxRedirects?: number;
|
|
};
|
|
|
|
async function requestNoteToNote(requestNote: RequestNote): Promise<NoteEntity> {
|
|
const output: any = {
|
|
title: requestNote.title ? requestNote.title : '',
|
|
body: requestNote.body ? requestNote.body : '',
|
|
};
|
|
|
|
if (requestNote.id) output.id = requestNote.id;
|
|
|
|
const baseUrl = requestNote.base_url ? requestNote.base_url : '';
|
|
|
|
if (requestNote.body_html) {
|
|
if (requestNote.convert_to === 'html') {
|
|
const style = await buildNoteStyleSheet(requestNote.stylesheets);
|
|
const minify = require('html-minifier').minify;
|
|
|
|
const minifyOptions = {
|
|
// Remove all spaces and, especially, newlines from tag attributes, as that would
|
|
// break the rendering.
|
|
customAttrCollapse: /.*/,
|
|
// Need to remove all whitespaces because whitespace at a beginning of a line
|
|
// means a code block in Markdown.
|
|
collapseWhitespace: true,
|
|
minifyCSS: true,
|
|
maxLineLength: 300,
|
|
};
|
|
|
|
const uglifycss = require('uglifycss');
|
|
const styleString = uglifycss.processString(style.join('\n'), {
|
|
// Need to set a max length because Ace Editor takes forever
|
|
// to display notes with long lines.
|
|
maxLineLen: 200,
|
|
});
|
|
|
|
const styleTag = style.length ? `<style>${styleString}</style>` + '\n' : '';
|
|
let minifiedHtml = '';
|
|
try {
|
|
minifiedHtml = minify(requestNote.body_html, minifyOptions);
|
|
} catch (error) {
|
|
console.warn('Could not minify HTML - using non-minified HTML instead', error);
|
|
minifiedHtml = requestNote.body_html;
|
|
}
|
|
output.body = styleTag + minifiedHtml;
|
|
output.body = htmlUtils.prependBaseUrl(output.body, baseUrl);
|
|
output.markup_language = MarkupToHtml.MARKUP_LANGUAGE_HTML;
|
|
} else {
|
|
// Convert to Markdown
|
|
// Parsing will not work if the HTML is not wrapped in a top level tag, which is not guaranteed
|
|
// when getting the content from elsewhere. So here wrap it - it won't change anything to the final
|
|
// rendering but it makes sure everything will be parsed.
|
|
output.body = await htmlToMdParser().parse(`<div>${requestNote.body_html}</div>`, {
|
|
baseUrl: baseUrl,
|
|
anchorNames: requestNote.anchor_names ? requestNote.anchor_names : [],
|
|
convertEmbeddedPdfsToLinks: true,
|
|
});
|
|
output.markup_language = MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN;
|
|
}
|
|
}
|
|
|
|
if (requestNote.parent_id) {
|
|
output.parent_id = requestNote.parent_id;
|
|
} else {
|
|
const folder = await Folder.defaultFolder();
|
|
if (!folder) throw new Error('Cannot find folder for note');
|
|
output.parent_id = folder.id;
|
|
}
|
|
|
|
if ('source_url' in requestNote) output.source_url = requestNote.source_url;
|
|
if ('author' in requestNote) output.author = requestNote.author;
|
|
if ('user_updated_time' in requestNote) output.user_updated_time = Database.formatValue(Database.TYPE_INT, requestNote.user_updated_time);
|
|
if ('user_created_time' in requestNote) output.user_created_time = Database.formatValue(Database.TYPE_INT, requestNote.user_created_time);
|
|
if ('is_todo' in requestNote) output.is_todo = Database.formatValue(Database.TYPE_INT, requestNote.is_todo);
|
|
if ('todo_due' in requestNote) output.todo_due = Database.formatValue(Database.TYPE_INT, requestNote.todo_due);
|
|
if ('todo_completed' in requestNote) output.todo_completed = Database.formatValue(Database.TYPE_INT, requestNote.todo_completed);
|
|
if ('markup_language' in requestNote) output.markup_language = Database.formatValue(Database.TYPE_INT, requestNote.markup_language);
|
|
if ('longitude' in requestNote) output.longitude = requestNote.longitude;
|
|
if ('latitude' in requestNote) output.latitude = requestNote.latitude;
|
|
if ('altitude' in requestNote) output.altitude = requestNote.altitude;
|
|
|
|
if (!output.markup_language) output.markup_language = MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN;
|
|
|
|
return output;
|
|
}
|
|
|
|
async function buildNoteStyleSheet(stylesheets: any[]) {
|
|
if (!stylesheets) return [];
|
|
|
|
const output = [];
|
|
|
|
for (const stylesheet of stylesheets) {
|
|
if (stylesheet.type === 'text') {
|
|
output.push(stylesheet.value);
|
|
} else if (stylesheet.type === 'url') {
|
|
try {
|
|
const tempPath = `${Setting.value('tempDir')}/${md5(`${Math.random()}_${Date.now()}`)}.css`;
|
|
await shim.fetchBlob(stylesheet.value, { path: tempPath, maxRetry: 1 });
|
|
const text = await shim.fsDriver().readFile(tempPath);
|
|
output.push(text);
|
|
await shim.fsDriver().remove(tempPath);
|
|
} catch (error) {
|
|
logger.warn(`Cannot download stylesheet at ${stylesheet.value}`, error);
|
|
}
|
|
} else {
|
|
throw new Error(`Invalid stylesheet type: ${stylesheet.type}`);
|
|
}
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
async function tryToGuessExtFromMimeType(response: any, mediaPath: string) {
|
|
const mimeType = mimeTypeFromHeaders(response.headers);
|
|
if (!mimeType) return mediaPath;
|
|
|
|
const newExt = mimeUtils.toFileExtension(mimeType);
|
|
if (!newExt) return mediaPath;
|
|
|
|
const newMediaPath = `${mediaPath}.${newExt}`;
|
|
await shim.fsDriver().move(mediaPath, newMediaPath);
|
|
return newMediaPath;
|
|
}
|
|
|
|
|
|
const getFileExtension = (url: string, isDataUrl: boolean) => {
|
|
let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase());
|
|
if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it.
|
|
if (fileExt) fileExt = `.${fileExt}`;
|
|
|
|
return fileExt;
|
|
};
|
|
|
|
const generateMediaPath = (url: string, isDataUrl: boolean, fileExt: string) => {
|
|
const tempDir = Setting.value('tempDir');
|
|
const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url);
|
|
// Append a UUID because simply checking if the file exists is not enough since
|
|
// multiple resources can be downloaded at the same time (race condition).
|
|
const mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`;
|
|
return mediaPath;
|
|
};
|
|
|
|
const isValidUrl = (url: string, isDataUrl: boolean, urlProtocol?: string, allowedProtocols?: string[]) => {
|
|
if (!urlProtocol) return false;
|
|
|
|
// PDFs and other heavy resoucres are often served as seperate files insted of data urls, its very unlikely to encounter a pdf as a data url
|
|
if (isDataUrl && !url.toLowerCase().startsWith('data:image/')) {
|
|
logger.warn(`Resources in data URL format is only supported for images ${url}`);
|
|
return false;
|
|
}
|
|
|
|
const defaultAllowedProtocols = ['http:', 'https:', 'data:'];
|
|
const allowed = allowedProtocols ?? defaultAllowedProtocols;
|
|
const isAllowedProtocol = allowed.includes(urlProtocol);
|
|
|
|
return isAllowedProtocol;
|
|
};
|
|
|
|
export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions, allowedProtocols?: string[]) {
|
|
logger.info('Downloading media file', url);
|
|
|
|
// The URL we get to download have been extracted from the Markdown document
|
|
url = markdownUtils.unescapeLinkUrl(url);
|
|
|
|
const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0;
|
|
const urlProtocol = urlUtils.urlProtocol(url)?.toLowerCase();
|
|
|
|
if (!isValidUrl(url, isDataUrl, urlProtocol, allowedProtocols)) {
|
|
return '';
|
|
}
|
|
|
|
const fileExt = getFileExtension(url, isDataUrl);
|
|
const mediaPath = generateMediaPath(url, isDataUrl, fileExt);
|
|
let newMediaPath = undefined;
|
|
|
|
try {
|
|
if (isDataUrl) {
|
|
await shim.imageFromDataUrl(url, mediaPath);
|
|
} else if (urlProtocol === 'file:') {
|
|
const localPath = fileUriToPath(url);
|
|
await shim.fsDriver().copy(localPath, mediaPath);
|
|
} else {
|
|
const response = await shim.fetchBlob(url, { path: mediaPath, maxRetry: 1, ...fetchOptions });
|
|
|
|
if (!fileExt) {
|
|
// If we could not find the file extension from the URL, try to get it
|
|
// now based on the Content-Type header.
|
|
newMediaPath = await tryToGuessExtFromMimeType(response, mediaPath);
|
|
}
|
|
}
|
|
return newMediaPath ?? mediaPath;
|
|
} catch (error) {
|
|
logger.warn(`Cannot download image at ${url}`, error);
|
|
return '';
|
|
}
|
|
}
|
|
|
|
async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions, allowedProtocols?: string[]) {
|
|
const PromisePool = require('es6-promise-pool');
|
|
|
|
const output: any = {};
|
|
|
|
const downloadOne = async (url: string) => {
|
|
const mediaPath = await downloadMediaFile(url, fetchOptions, allowedProtocols);
|
|
if (mediaPath) output[url] = { path: mediaPath, originalUrl: url };
|
|
};
|
|
|
|
let urlIndex = 0;
|
|
const promiseProducer = () => {
|
|
if (urlIndex >= urls.length) return null;
|
|
|
|
const url = urls[urlIndex++];
|
|
return downloadOne(url);
|
|
};
|
|
|
|
const concurrency = 10;
|
|
const pool = new PromisePool(promiseProducer, concurrency);
|
|
await pool.start();
|
|
|
|
return output;
|
|
}
|
|
|
|
async function createResourcesFromPaths(urls: string[]) {
|
|
for (const url in urls) {
|
|
if (!urls.hasOwnProperty(url)) continue;
|
|
const urlInfo: any = urls[url];
|
|
try {
|
|
const resource = await shim.createResourceFromPath(urlInfo.path);
|
|
urlInfo.resource = resource;
|
|
} catch (error) {
|
|
logger.warn(`Cannot create resource for ${url}`, error);
|
|
}
|
|
}
|
|
return urls;
|
|
}
|
|
|
|
async function removeTempFiles(urls: string[]) {
|
|
for (const url in urls) {
|
|
if (!urls.hasOwnProperty(url)) continue;
|
|
const urlInfo: any = urls[url];
|
|
try {
|
|
await shim.fsDriver().remove(urlInfo.path);
|
|
} catch (error) {
|
|
logger.warn(`Cannot remove ${urlInfo.path}`, error);
|
|
}
|
|
}
|
|
}
|
|
|
|
function replaceUrlsByResources(markupLanguage: number, md: string, urls: any, imageSizes: any) {
|
|
const imageSizesIndexes: any = {};
|
|
|
|
if (markupLanguage === MarkupToHtml.MARKUP_LANGUAGE_HTML) {
|
|
return htmlUtils.replaceMediaUrls(md, (url: string) => {
|
|
const urlInfo: any = urls[url];
|
|
if (!urlInfo || !urlInfo.resource) return url;
|
|
return Resource.internalUrl(urlInfo.resource);
|
|
});
|
|
} else {
|
|
// Proper Regex:
|
|
//
|
|
// /(!\[.*?\]\()([^\s\)]+)(.*?\))/g
|
|
//
|
|
// Broken regex when [embedded_pdf] support was added, and fixed with
|
|
// `before.startsWith('[![')` hack. But ideally that function should be
|
|
// unit tested to prevent it from being broken again.
|
|
//
|
|
// /(!?\[.*?\]\()([^\s\)]+)(.*?\))/g
|
|
//
|
|
// eslint-disable-next-line no-useless-escape
|
|
return md.replace(/(!?\[.*?\]\()([^\s\)]+)(.*?\))/g, (_match: any, before: string, url: string, after: string) => {
|
|
let type = 'link';
|
|
if (before.startsWith('[embedded_pdf]')) {
|
|
type = 'pdf';
|
|
} else if (before.startsWith('![') || before.startsWith('[![')) {
|
|
type = 'image';
|
|
}
|
|
|
|
const urlInfo = urls[url];
|
|
if (type === 'link' || !urlInfo || !urlInfo.resource) return before + url + after;
|
|
|
|
const resourceUrl = Resource.internalUrl(urlInfo.resource);
|
|
if (type === 'pdf') {
|
|
return `[${markdownUtils.escapeLinkUrl(url)}](${resourceUrl}${after}`;
|
|
}
|
|
|
|
if (!(urlInfo.originalUrl in imageSizesIndexes)) imageSizesIndexes[urlInfo.originalUrl] = 0;
|
|
const imageSizesCollection = imageSizes[urlInfo.originalUrl];
|
|
if (!imageSizesCollection) {
|
|
// Either its not an image or we don't know the size of the image
|
|
// In some cases, we won't find the image size information for that particular image URL. Normally
|
|
// it will only happen when using the "Clip simplified page" feature, which can modify the
|
|
// image URLs (for example it will select a smaller size resolution). In that case, it's
|
|
// fine to return the image as-is because it has already good dimensions.
|
|
return before + resourceUrl + after;
|
|
}
|
|
|
|
const imageSize = imageSizesCollection[imageSizesIndexes[urlInfo.originalUrl]];
|
|
imageSizesIndexes[urlInfo.originalUrl]++;
|
|
|
|
if (imageSize && (imageSize.naturalWidth !== imageSize.width || imageSize.naturalHeight !== imageSize.height)) {
|
|
return `<img width="${imageSize.width}" height="${imageSize.height}" src="${resourceUrl}"/>`;
|
|
} else {
|
|
return before + resourceUrl + after;
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
export function extractMediaUrls(markupLanguage: number, text: string): string[] {
|
|
const urls: string[] = [];
|
|
urls.push(...ArrayUtils.unique(markupLanguageUtils.extractImageUrls(markupLanguage, text)));
|
|
urls.push(...ArrayUtils.unique(markupLanguageUtils.extractPdfUrls(markupLanguage, text)));
|
|
return urls;
|
|
}
|
|
|
|
// Note must have been saved first
|
|
async function attachImageFromDataUrl(note: any, imageDataUrl: string, cropRect: any) {
|
|
const tempDir = Setting.value('tempDir');
|
|
const mime = mimeUtils.fromDataUrl(imageDataUrl);
|
|
let ext = mimeUtils.toFileExtension(mime) || '';
|
|
if (ext) ext = `.${ext}`;
|
|
const tempFilePath = `${tempDir}/${md5(`${Math.random()}_${Date.now()}`)}${ext}`;
|
|
const imageConvOptions: any = {};
|
|
if (cropRect) imageConvOptions.cropRect = cropRect;
|
|
await shim.imageFromDataUrl(imageDataUrl, tempFilePath, imageConvOptions);
|
|
return await shim.attachFileToNote(note, tempFilePath);
|
|
}
|
|
|
|
export const extractNoteFromHTML = async (
|
|
requestNote: RequestNote,
|
|
requestId: number,
|
|
imageSizes: any,
|
|
fetchOptions?: FetchOptions,
|
|
allowedProtocols?: string[],
|
|
) => {
|
|
const note = await requestNoteToNote(requestNote);
|
|
|
|
const mediaUrls = extractMediaUrls(note.markup_language, note.body);
|
|
|
|
logger.info(`Request (${requestId}): Downloading media files: ${mediaUrls.length}`);
|
|
|
|
const mediaFiles = await downloadMediaFiles(mediaUrls, fetchOptions, allowedProtocols);
|
|
|
|
logger.info(`Request (${requestId}): Creating resources from paths: ${Object.getOwnPropertyNames(mediaFiles).length}`);
|
|
|
|
const resources = await createResourcesFromPaths(mediaFiles);
|
|
await removeTempFiles(resources);
|
|
note.body = replaceUrlsByResources(note.markup_language, note.body, resources, imageSizes);
|
|
|
|
logger.info(`Request (${requestId}): Saving note...`);
|
|
|
|
const saveOptions = defaultSaveOptions('POST', note.id);
|
|
saveOptions.autoTimestamp = false; // No auto-timestamp because user may have provided them
|
|
const timestamp = Date.now();
|
|
note.updated_time = timestamp;
|
|
note.created_time = timestamp;
|
|
if (!('user_updated_time' in note)) note.user_updated_time = timestamp;
|
|
if (!('user_created_time' in note)) note.user_created_time = timestamp;
|
|
|
|
return { note, saveOptions, resources };
|
|
};
|
|
|
|
export default async function(request: Request, id: string = null, link: string = null) {
|
|
if (request.method === 'GET') {
|
|
if (link && link === 'tags') {
|
|
return collectionToPaginatedResults(ModelType.Tag, await Tag.tagsByNoteId(id), request);
|
|
} else if (link && link === 'resources') {
|
|
const note = await Note.load(id);
|
|
if (!note) throw new ErrorNotFound();
|
|
const resourceIds = await Note.linkedResourceIds(note.body);
|
|
const output = [];
|
|
const loadOptions = defaultLoadOptions(request, BaseModel.TYPE_RESOURCE);
|
|
for (const resourceId of resourceIds) {
|
|
output.push(await Resource.load(resourceId, loadOptions));
|
|
}
|
|
return collectionToPaginatedResults(ModelType.Resource, output, request);
|
|
} else if (link) {
|
|
throw new ErrorNotFound();
|
|
}
|
|
|
|
return defaultAction(BaseModel.TYPE_NOTE, request, id, link);
|
|
}
|
|
|
|
if (request.method === RequestMethod.POST) {
|
|
const requestId = Date.now();
|
|
const requestNote = JSON.parse(request.body);
|
|
|
|
// const allowFileProtocolImages = urlUtils.urlProtocol(requestNote.base_url).toLowerCase() === 'file:';
|
|
|
|
const imageSizes = requestNote.image_sizes ? requestNote.image_sizes : {};
|
|
|
|
logger.info('Images:', imageSizes);
|
|
|
|
const allowedProtocolsForDownloadMediaFiles = ['http:', 'https:', 'file:', 'data:'];
|
|
const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes, undefined, allowedProtocolsForDownloadMediaFiles);
|
|
|
|
let note = await Note.save(extracted.note, extracted.saveOptions);
|
|
|
|
if (requestNote.tags) {
|
|
const tagTitles = requestNote.tags.split(',');
|
|
await Tag.setNoteTagsByTitles(note.id, tagTitles);
|
|
}
|
|
|
|
if (requestNote.image_data_url) {
|
|
note = await attachImageFromDataUrl(note, requestNote.image_data_url, requestNote.crop_rect);
|
|
}
|
|
|
|
logger.info(`Request (${requestId}): Created note ${note.id}`);
|
|
|
|
return note;
|
|
}
|
|
|
|
if (request.method === 'PUT') {
|
|
const note = await Note.load(id);
|
|
|
|
if (!note) throw new ErrorNotFound();
|
|
|
|
const saveOptions = {
|
|
...defaultSaveOptions('PUT', note.id),
|
|
autoTimestamp: false, // No auto-timestamp because user may have provided them
|
|
userSideValidation: true,
|
|
};
|
|
|
|
const timestamp = Date.now();
|
|
|
|
const newProps = request.bodyJson(readonlyProperties('PUT'));
|
|
if (!('user_updated_time' in newProps)) newProps.user_updated_time = timestamp;
|
|
|
|
let newNote = {
|
|
...note,
|
|
...newProps,
|
|
updated_time: timestamp,
|
|
};
|
|
|
|
newNote = await Note.save(newNote, saveOptions);
|
|
|
|
const requestNote = JSON.parse(request.body);
|
|
if (requestNote.tags || requestNote.tags === '') {
|
|
const tagTitles = requestNote.tags.split(',');
|
|
await Tag.setNoteTagsByTitles(id, tagTitles);
|
|
}
|
|
|
|
return newNote;
|
|
}
|
|
|
|
return defaultAction(BaseModel.TYPE_NOTE, request, id, link);
|
|
}
|