mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-24 10:27:10 +02:00
API: Increase protection of files downloaded via the REST API (#9676)
This commit is contained in:
parent
a863f92490
commit
d4d400217b
@ -1,7 +1,12 @@
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
import shim from '../../../shim';
|
||||
import uuid from '../../../uuid';
|
||||
import { downloadMediaFile } from './notes';
|
||||
import Setting from '../../../models/Setting';
|
||||
import { readFile, readdir, remove, writeFile } from 'fs-extra';
|
||||
const md5 = require('md5');
|
||||
|
||||
const imagePath = `${__dirname}/../../../images/SideMenuHeader.png`;
|
||||
const jpgBase64Content = '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/wAALCAAFAAUBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EAB8QAAEEAQUBAAAAAAAAAAAAAAQBAgUGAwAREiExM//aAAgBAQAAPwBJarVpGHm7KWbapCSwyZ6FDjkLyYE1W/LHyV2zfOk2TrzX/9k=';
|
||||
|
||||
describe('routes/notes', () => {
|
||||
|
||||
@ -21,44 +26,50 @@ describe('routes/notes', () => {
|
||||
'https://joplinapp.org/valid/image_url.png',
|
||||
'http://joplinapp.org/valid/image_url.png',
|
||||
])('should try to download and return a local path to a valid URL', async (url) => {
|
||||
const fetchBlobSpy = jest.fn();
|
||||
jest.spyOn(shim, 'fetchBlob').mockImplementation(fetchBlobSpy);
|
||||
jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value');
|
||||
const fetchBlobSpy = jest.fn(async (_url, options) => {
|
||||
await writeFile(options.path, Buffer.from(jpgBase64Content, 'base64'));
|
||||
});
|
||||
const spy = jest.spyOn(shim, 'fetchBlob').mockImplementation(fetchBlobSpy);
|
||||
|
||||
const response = await downloadMediaFile(url);
|
||||
|
||||
expect(response.endsWith('mocked_uuid_value.png')).toBe(true);
|
||||
expect(fetchBlobSpy).toBeCalledTimes(1);
|
||||
const files = await readdir(Setting.value('tempDir'));
|
||||
|
||||
expect(files.length).toBe(1);
|
||||
expect(fetchBlobSpy).toHaveBeenCalledTimes(1);
|
||||
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);
|
||||
await remove(response);
|
||||
spy.mockRestore();
|
||||
});
|
||||
|
||||
test('should get file from local drive if protocol allows it', async () => {
|
||||
const url = 'file://valid/image.png';
|
||||
const url = `file:///${imagePath}`;
|
||||
const originalFileContent = await readFile(imagePath);
|
||||
|
||||
const fsDriverCopySpy = jest.fn();
|
||||
jest.spyOn(shim, 'fsDriver').mockImplementation(() => {
|
||||
return {
|
||||
copy: fsDriverCopySpy,
|
||||
} as any;
|
||||
});
|
||||
jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value');
|
||||
const response = await downloadMediaFile(url, null, ['file:']);
|
||||
|
||||
const response = await downloadMediaFile(url);
|
||||
const files = await readdir(Setting.value('tempDir'));
|
||||
expect(files.length).toBe(1);
|
||||
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);
|
||||
|
||||
expect(response.endsWith('mocked_uuid_value.png')).toBe(true);
|
||||
expect(fsDriverCopySpy).toBeCalledTimes(1);
|
||||
const responseFileContent = await readFile(response);
|
||||
expect(md5(responseFileContent)).toBe(md5(originalFileContent));
|
||||
await remove(response);
|
||||
});
|
||||
|
||||
test('should be able to handle URLs with data', async () => {
|
||||
const url = 'data:image/gif;base64,R0lGODlhEAAQAMQAAORHHOVSKudfOulrSOp3WOyDZu6QdvCchPGolfO0o/XBs/fNwfjZ0frl3/zy7////wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAkAABAALAAAAAAQABAAAAVVICSOZGlCQAosJ6mu7fiyZeKqNKToQGDsM8hBADgUXoGAiqhSvp5QAnQKGIgUhwFUYLCVDFCrKUE1lBavAViFIDlTImbKC5Gm2hB0SlBCBMQiB0UjIQA7';
|
||||
|
||||
const imageFromDataUrlSpy = jest.fn();
|
||||
jest.spyOn(shim, 'imageFromDataUrl').mockImplementation(imageFromDataUrlSpy);
|
||||
jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value');
|
||||
const originalFileContent = Buffer.from(url.split('data:image/gif;base64,')[1], 'base64');
|
||||
|
||||
const response = await downloadMediaFile(url);
|
||||
|
||||
expect(response.endsWith('mocked_uuid_value.gif')).toBe(true);
|
||||
expect(imageFromDataUrlSpy).toBeCalledTimes(1);
|
||||
const files = await readdir(Setting.value('tempDir'));
|
||||
expect(files.length).toBe(1);
|
||||
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);
|
||||
|
||||
const responseFileContent = await readFile(response);
|
||||
expect(md5(responseFileContent)).toBe(md5(originalFileContent));
|
||||
await remove(response);
|
||||
});
|
||||
|
||||
test('should not process URLs with data that is not image type', async () => {
|
||||
@ -68,6 +79,8 @@ describe('routes/notes', () => {
|
||||
const response = await downloadMediaFile(url);
|
||||
Logger.globalLogger.enabled = true;
|
||||
|
||||
const files = await readdir(Setting.value('tempDir'));
|
||||
expect(files.length).toBe(0);
|
||||
expect(response).toBe('');
|
||||
});
|
||||
|
||||
@ -76,6 +89,42 @@ describe('routes/notes', () => {
|
||||
|
||||
const response = await downloadMediaFile(url);
|
||||
|
||||
const files = await readdir(Setting.value('tempDir'));
|
||||
expect(files.length).toBe(0);
|
||||
expect(response).toBe('');
|
||||
});
|
||||
|
||||
test('should not copy content from invalid protocols', async () => {
|
||||
const url = 'file:///home/user/file.db';
|
||||
|
||||
const allowedProtocols: string[] = [];
|
||||
const mediaFilePath = await downloadMediaFile(url, null, allowedProtocols);
|
||||
|
||||
const files = await readdir(Setting.value('tempDir'));
|
||||
expect(files.length).toBe(0);
|
||||
expect(mediaFilePath).toBe('');
|
||||
});
|
||||
|
||||
test.each([
|
||||
'https://joplinapp.org/valid/image_url',
|
||||
'https://joplinapp.org/valid/image_url.invalid_url',
|
||||
])('should correct the file extension in filename from files without or invalid ones', async (url) => {
|
||||
const spy = jest.spyOn(shim, 'fetchBlob').mockImplementation(async (_url, options) => {
|
||||
await writeFile(options.path, Buffer.from(jpgBase64Content, 'base64'));
|
||||
return {
|
||||
headers: {
|
||||
'content-type': 'image/jpg',
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
const response = await downloadMediaFile(url);
|
||||
|
||||
const files = await readdir(Setting.value('tempDir'));
|
||||
expect(files.length).toBe(1);
|
||||
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);
|
||||
|
||||
await remove(response);
|
||||
spy.mockRestore();
|
||||
});
|
||||
});
|
||||
|
@ -189,67 +189,86 @@ async function tryToGuessExtFromMimeType(response: any, mediaPath: string) {
|
||||
return newMediaPath;
|
||||
}
|
||||
|
||||
export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions) {
|
||||
logger.info('Downloading media file', url);
|
||||
|
||||
const getFileExtension = (url: string, isDataUrl: boolean) => {
|
||||
let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase());
|
||||
if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it.
|
||||
if (fileExt) fileExt = `.${fileExt}`;
|
||||
|
||||
return fileExt;
|
||||
};
|
||||
|
||||
const generateMediaPath = (url: string, isDataUrl: boolean, fileExt: string) => {
|
||||
const tempDir = Setting.value('tempDir');
|
||||
const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url);
|
||||
// Append a UUID because simply checking if the file exists is not enough since
|
||||
// multiple resources can be downloaded at the same time (race condition).
|
||||
const mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`;
|
||||
return mediaPath;
|
||||
};
|
||||
|
||||
const isValidUrl = (url: string, isDataUrl: boolean, urlProtocol?: string, allowedProtocols?: string[]) => {
|
||||
if (!urlProtocol) return false;
|
||||
|
||||
// PDFs and other heavy resoucres are often served as seperate files insted of data urls, its very unlikely to encounter a pdf as a data url
|
||||
if (isDataUrl && !url.toLowerCase().startsWith('data:image/')) {
|
||||
logger.warn(`Resources in data URL format is only supported for images ${url}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
const defaultAllowedProtocols = ['http:', 'https:', 'data:'];
|
||||
const allowed = allowedProtocols ?? defaultAllowedProtocols;
|
||||
const isAllowedProtocol = allowed.includes(urlProtocol);
|
||||
|
||||
return isAllowedProtocol;
|
||||
};
|
||||
|
||||
export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions, allowedProtocols?: string[]) {
|
||||
logger.info('Downloading media file', url);
|
||||
|
||||
// The URL we get to download have been extracted from the Markdown document
|
||||
url = markdownUtils.unescapeLinkUrl(url);
|
||||
|
||||
const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0;
|
||||
|
||||
// PDFs and other heavy resoucres are often served as seperate files insted of data urls, its very unlikely to encounter a pdf as a data url
|
||||
if (isDataUrl && !url.toLowerCase().startsWith('data:image/')) {
|
||||
logger.warn(`Resources in data URL format is only supported for images ${url}`);
|
||||
return '';
|
||||
}
|
||||
|
||||
const invalidProtocols = ['cid:'];
|
||||
const urlProtocol = urlUtils.urlProtocol(url)?.toLowerCase();
|
||||
|
||||
if (!urlProtocol || invalidProtocols.includes(urlProtocol)) {
|
||||
if (!isValidUrl(url, isDataUrl, urlProtocol, allowedProtocols)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url);
|
||||
let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase());
|
||||
if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it.
|
||||
if (fileExt) fileExt = `.${fileExt}`;
|
||||
|
||||
// Append a UUID because simply checking if the file exists is not enough since
|
||||
// multiple resources can be downloaded at the same time (race condition).
|
||||
let mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`;
|
||||
const fileExt = getFileExtension(url, isDataUrl);
|
||||
const mediaPath = generateMediaPath(url, isDataUrl, fileExt);
|
||||
let newMediaPath = undefined;
|
||||
|
||||
try {
|
||||
if (isDataUrl) {
|
||||
await shim.imageFromDataUrl(url, mediaPath);
|
||||
} else if (urlProtocol === 'file:') {
|
||||
// Can't think of any reason to disallow this at this point
|
||||
// if (!allowFileProtocolImages) throw new Error('For security reasons, this URL with file:// protocol cannot be downloaded');
|
||||
const localPath = fileUriToPath(url);
|
||||
await shim.fsDriver().copy(localPath, mediaPath);
|
||||
} else {
|
||||
const response = await shim.fetchBlob(url, { path: mediaPath, maxRetry: 1, ...fetchOptions });
|
||||
|
||||
// If we could not find the file extension from the URL, try to get it
|
||||
// now based on the Content-Type header.
|
||||
if (!fileExt) mediaPath = await tryToGuessExtFromMimeType(response, mediaPath);
|
||||
if (!fileExt) {
|
||||
// If we could not find the file extension from the URL, try to get it
|
||||
// now based on the Content-Type header.
|
||||
newMediaPath = await tryToGuessExtFromMimeType(response, mediaPath);
|
||||
}
|
||||
}
|
||||
return mediaPath;
|
||||
return newMediaPath ?? mediaPath;
|
||||
} catch (error) {
|
||||
logger.warn(`Cannot download image at ${url}`, error);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions) {
|
||||
async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions, allowedProtocols?: string[]) {
|
||||
const PromisePool = require('es6-promise-pool');
|
||||
|
||||
const output: any = {};
|
||||
|
||||
const downloadOne = async (url: string) => {
|
||||
const mediaPath = await downloadMediaFile(url, fetchOptions); // , allowFileProtocolImages);
|
||||
const mediaPath = await downloadMediaFile(url, fetchOptions, allowedProtocols);
|
||||
if (mediaPath) output[url] = { path: mediaPath, originalUrl: url };
|
||||
};
|
||||
|
||||
@ -374,14 +393,20 @@ async function attachImageFromDataUrl(note: any, imageDataUrl: string, cropRect:
|
||||
return await shim.attachFileToNote(note, tempFilePath);
|
||||
}
|
||||
|
||||
export const extractNoteFromHTML = async (requestNote: RequestNote, requestId: number, imageSizes: any, fetchOptions?: FetchOptions) => {
|
||||
export const extractNoteFromHTML = async (
|
||||
requestNote: RequestNote,
|
||||
requestId: number,
|
||||
imageSizes: any,
|
||||
fetchOptions?: FetchOptions,
|
||||
allowedProtocols?: string[],
|
||||
) => {
|
||||
const note = await requestNoteToNote(requestNote);
|
||||
|
||||
const mediaUrls = extractMediaUrls(note.markup_language, note.body);
|
||||
|
||||
logger.info(`Request (${requestId}): Downloading media files: ${mediaUrls.length}`);
|
||||
|
||||
const mediaFiles = await downloadMediaFiles(mediaUrls, fetchOptions); // , allowFileProtocolImages);
|
||||
const mediaFiles = await downloadMediaFiles(mediaUrls, fetchOptions, allowedProtocols);
|
||||
|
||||
logger.info(`Request (${requestId}): Creating resources from paths: ${Object.getOwnPropertyNames(mediaFiles).length}`);
|
||||
|
||||
@ -433,7 +458,8 @@ export default async function(request: Request, id: string = null, link: string
|
||||
|
||||
logger.info('Images:', imageSizes);
|
||||
|
||||
const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes);
|
||||
const allowedProtocolsForDownloadMediaFiles = ['http:', 'https:', 'file:', 'data:'];
|
||||
const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes, undefined, allowedProtocolsForDownloadMediaFiles);
|
||||
|
||||
let note = await Note.save(extracted.note, extracted.saveOptions);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user