1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-24 10:27:10 +02:00

API: Increase protection of files downloaded via the REST API (#9676)

This commit is contained in:
pedr 2024-01-22 11:46:18 -03:00 committed by GitHub
parent a863f92490
commit d4d400217b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 128 additions and 53 deletions

View File

@ -1,7 +1,12 @@
import Logger from '@joplin/utils/Logger';
import shim from '../../../shim';
import uuid from '../../../uuid';
import { downloadMediaFile } from './notes';
import Setting from '../../../models/Setting';
import { readFile, readdir, remove, writeFile } from 'fs-extra';
const md5 = require('md5');
const imagePath = `${__dirname}/../../../images/SideMenuHeader.png`;
const jpgBase64Content = '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/wAALCAAFAAUBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EAB8QAAEEAQUBAAAAAAAAAAAAAAQBAgUGAwAREiExM//aAAgBAQAAPwBJarVpGHm7KWbapCSwyZ6FDjkLyYE1W/LHyV2zfOk2TrzX/9k=';
describe('routes/notes', () => {
@ -21,44 +26,50 @@ describe('routes/notes', () => {
'https://joplinapp.org/valid/image_url.png',
'http://joplinapp.org/valid/image_url.png',
])('should try to download and return a local path to a valid URL', async (url) => {
const fetchBlobSpy = jest.fn();
jest.spyOn(shim, 'fetchBlob').mockImplementation(fetchBlobSpy);
jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value');
const fetchBlobSpy = jest.fn(async (_url, options) => {
await writeFile(options.path, Buffer.from(jpgBase64Content, 'base64'));
});
const spy = jest.spyOn(shim, 'fetchBlob').mockImplementation(fetchBlobSpy);
const response = await downloadMediaFile(url);
expect(response.endsWith('mocked_uuid_value.png')).toBe(true);
expect(fetchBlobSpy).toBeCalledTimes(1);
const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(1);
expect(fetchBlobSpy).toHaveBeenCalledTimes(1);
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);
await remove(response);
spy.mockRestore();
});
test('should get file from local drive if protocol allows it', async () => {
const url = 'file://valid/image.png';
const url = `file:///${imagePath}`;
const originalFileContent = await readFile(imagePath);
const fsDriverCopySpy = jest.fn();
jest.spyOn(shim, 'fsDriver').mockImplementation(() => {
return {
copy: fsDriverCopySpy,
} as any;
});
jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value');
const response = await downloadMediaFile(url, null, ['file:']);
const response = await downloadMediaFile(url);
const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(1);
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);
expect(response.endsWith('mocked_uuid_value.png')).toBe(true);
expect(fsDriverCopySpy).toBeCalledTimes(1);
const responseFileContent = await readFile(response);
expect(md5(responseFileContent)).toBe(md5(originalFileContent));
await remove(response);
});
test('should be able to handle URLs with data', async () => {
const url = 'data:image/gif;base64,R0lGODlhEAAQAMQAAORHHOVSKudfOulrSOp3WOyDZu6QdvCchPGolfO0o/XBs/fNwfjZ0frl3/zy7////wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAkAABAALAAAAAAQABAAAAVVICSOZGlCQAosJ6mu7fiyZeKqNKToQGDsM8hBADgUXoGAiqhSvp5QAnQKGIgUhwFUYLCVDFCrKUE1lBavAViFIDlTImbKC5Gm2hB0SlBCBMQiB0UjIQA7';
const imageFromDataUrlSpy = jest.fn();
jest.spyOn(shim, 'imageFromDataUrl').mockImplementation(imageFromDataUrlSpy);
jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value');
const originalFileContent = Buffer.from(url.split('data:image/gif;base64,')[1], 'base64');
const response = await downloadMediaFile(url);
expect(response.endsWith('mocked_uuid_value.gif')).toBe(true);
expect(imageFromDataUrlSpy).toBeCalledTimes(1);
const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(1);
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);
const responseFileContent = await readFile(response);
expect(md5(responseFileContent)).toBe(md5(originalFileContent));
await remove(response);
});
test('should not process URLs with data that is not image type', async () => {
@ -68,6 +79,8 @@ describe('routes/notes', () => {
const response = await downloadMediaFile(url);
Logger.globalLogger.enabled = true;
const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(0);
expect(response).toBe('');
});
@ -76,6 +89,42 @@ describe('routes/notes', () => {
const response = await downloadMediaFile(url);
const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(0);
expect(response).toBe('');
});
test('should not copy content from invalid protocols', async () => {
const url = 'file:///home/user/file.db';
const allowedProtocols: string[] = [];
const mediaFilePath = await downloadMediaFile(url, null, allowedProtocols);
const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(0);
expect(mediaFilePath).toBe('');
});
test.each([
'https://joplinapp.org/valid/image_url',
'https://joplinapp.org/valid/image_url.invalid_url',
])('should correct the file extension in filename from files without or invalid ones', async (url) => {
const spy = jest.spyOn(shim, 'fetchBlob').mockImplementation(async (_url, options) => {
await writeFile(options.path, Buffer.from(jpgBase64Content, 'base64'));
return {
headers: {
'content-type': 'image/jpg',
},
};
});
const response = await downloadMediaFile(url);
const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(1);
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);
await remove(response);
spy.mockRestore();
});
});

View File

@ -189,67 +189,86 @@ async function tryToGuessExtFromMimeType(response: any, mediaPath: string) {
return newMediaPath;
}
export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions) {
logger.info('Downloading media file', url);
const getFileExtension = (url: string, isDataUrl: boolean) => {
let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase());
if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it.
if (fileExt) fileExt = `.${fileExt}`;
return fileExt;
};
const generateMediaPath = (url: string, isDataUrl: boolean, fileExt: string) => {
const tempDir = Setting.value('tempDir');
const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url);
// Append a UUID because simply checking if the file exists is not enough since
// multiple resources can be downloaded at the same time (race condition).
const mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`;
return mediaPath;
};
const isValidUrl = (url: string, isDataUrl: boolean, urlProtocol?: string, allowedProtocols?: string[]) => {
if (!urlProtocol) return false;
// PDFs and other heavy resoucres are often served as seperate files insted of data urls, its very unlikely to encounter a pdf as a data url
if (isDataUrl && !url.toLowerCase().startsWith('data:image/')) {
logger.warn(`Resources in data URL format is only supported for images ${url}`);
return false;
}
const defaultAllowedProtocols = ['http:', 'https:', 'data:'];
const allowed = allowedProtocols ?? defaultAllowedProtocols;
const isAllowedProtocol = allowed.includes(urlProtocol);
return isAllowedProtocol;
};
export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions, allowedProtocols?: string[]) {
logger.info('Downloading media file', url);
// The URL we get to download have been extracted from the Markdown document
url = markdownUtils.unescapeLinkUrl(url);
const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0;
// PDFs and other heavy resoucres are often served as seperate files insted of data urls, its very unlikely to encounter a pdf as a data url
if (isDataUrl && !url.toLowerCase().startsWith('data:image/')) {
logger.warn(`Resources in data URL format is only supported for images ${url}`);
return '';
}
const invalidProtocols = ['cid:'];
const urlProtocol = urlUtils.urlProtocol(url)?.toLowerCase();
if (!urlProtocol || invalidProtocols.includes(urlProtocol)) {
if (!isValidUrl(url, isDataUrl, urlProtocol, allowedProtocols)) {
return '';
}
const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url);
let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase());
if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it.
if (fileExt) fileExt = `.${fileExt}`;
// Append a UUID because simply checking if the file exists is not enough since
// multiple resources can be downloaded at the same time (race condition).
let mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`;
const fileExt = getFileExtension(url, isDataUrl);
const mediaPath = generateMediaPath(url, isDataUrl, fileExt);
let newMediaPath = undefined;
try {
if (isDataUrl) {
await shim.imageFromDataUrl(url, mediaPath);
} else if (urlProtocol === 'file:') {
// Can't think of any reason to disallow this at this point
// if (!allowFileProtocolImages) throw new Error('For security reasons, this URL with file:// protocol cannot be downloaded');
const localPath = fileUriToPath(url);
await shim.fsDriver().copy(localPath, mediaPath);
} else {
const response = await shim.fetchBlob(url, { path: mediaPath, maxRetry: 1, ...fetchOptions });
// If we could not find the file extension from the URL, try to get it
// now based on the Content-Type header.
if (!fileExt) mediaPath = await tryToGuessExtFromMimeType(response, mediaPath);
if (!fileExt) {
// If we could not find the file extension from the URL, try to get it
// now based on the Content-Type header.
newMediaPath = await tryToGuessExtFromMimeType(response, mediaPath);
}
}
return mediaPath;
return newMediaPath ?? mediaPath;
} catch (error) {
logger.warn(`Cannot download image at ${url}`, error);
return '';
}
}
async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions) {
async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions, allowedProtocols?: string[]) {
const PromisePool = require('es6-promise-pool');
const output: any = {};
const downloadOne = async (url: string) => {
const mediaPath = await downloadMediaFile(url, fetchOptions); // , allowFileProtocolImages);
const mediaPath = await downloadMediaFile(url, fetchOptions, allowedProtocols);
if (mediaPath) output[url] = { path: mediaPath, originalUrl: url };
};
@ -374,14 +393,20 @@ async function attachImageFromDataUrl(note: any, imageDataUrl: string, cropRect:
return await shim.attachFileToNote(note, tempFilePath);
}
export const extractNoteFromHTML = async (requestNote: RequestNote, requestId: number, imageSizes: any, fetchOptions?: FetchOptions) => {
export const extractNoteFromHTML = async (
requestNote: RequestNote,
requestId: number,
imageSizes: any,
fetchOptions?: FetchOptions,
allowedProtocols?: string[],
) => {
const note = await requestNoteToNote(requestNote);
const mediaUrls = extractMediaUrls(note.markup_language, note.body);
logger.info(`Request (${requestId}): Downloading media files: ${mediaUrls.length}`);
const mediaFiles = await downloadMediaFiles(mediaUrls, fetchOptions); // , allowFileProtocolImages);
const mediaFiles = await downloadMediaFiles(mediaUrls, fetchOptions, allowedProtocols);
logger.info(`Request (${requestId}): Creating resources from paths: ${Object.getOwnPropertyNames(mediaFiles).length}`);
@ -433,7 +458,8 @@ export default async function(request: Request, id: string = null, link: string
logger.info('Images:', imageSizes);
const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes);
const allowedProtocolsForDownloadMediaFiles = ['http:', 'https:', 'file:', 'data:'];
const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes, undefined, allowedProtocolsForDownloadMediaFiles);
let note = await Note.save(extracted.note, extracted.saveOptions);