1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-02 12:47:41 +02:00

Api: Add capability of limiting downloads (#9788)

This commit is contained in:
pedr 2024-03-09 07:45:21 -03:00 committed by GitHub
parent 4d8fcff6d5
commit 17a8ce5010
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 152 additions and 22 deletions

View File

@ -723,6 +723,7 @@ packages/lib/database.js
packages/lib/debug/DebugService.js
packages/lib/determineBaseAppDirs.js
packages/lib/dom.js
packages/lib/downloadController.js
packages/lib/errorUtils.js
packages/lib/errors.js
packages/lib/eventManager.js

1
.gitignore vendored
View File

@ -703,6 +703,7 @@ packages/lib/database.js
packages/lib/debug/DebugService.js
packages/lib/determineBaseAppDirs.js
packages/lib/dom.js
packages/lib/downloadController.js
packages/lib/errorUtils.js
packages/lib/errors.js
packages/lib/eventManager.js

View File

@ -0,0 +1,95 @@
import Logger from '@joplin/utils/Logger';
import JoplinError from './JoplinError';
import { ErrorCode } from './errors';
import { bytesToHuman } from '@joplin/utils/bytes';
const logger = Logger.create('downloadController');
export interface DownloadController {
totalBytes: number;
imagesCount: number;
maxImagesCount: number;
imageCountExpected: number;
printStats(imagesCountExpected: number): void;
handleChunk(request: any): (chunk: any)=> void;
limitMessage(): string;
}
export class LimitedDownloadController implements DownloadController {
private totalBytes_ = 0;
// counts before the downloaded has finished, so at the end if the totalBytes > maxTotalBytesAllowed
// it means that imageCount will be higher than the total downloaded during the process
private imagesCount_ = 0;
// how many images links the content has
private imageCountExpected_ = 0;
private isLimitExceeded_ = false;
private maxTotalBytes = 0;
public readonly maxImagesCount: number;
private ownerId = '';
public constructor(ownerId: string, maxTotalBytes: number, maxImagesCount: number) {
this.ownerId = ownerId;
this.maxTotalBytes = maxTotalBytes;
this.maxImagesCount = maxImagesCount;
}
public set totalBytes(value: number) {
if (this.totalBytes_ >= this.maxTotalBytes) {
throw new JoplinError(`Total bytes stored (${this.totalBytes_}) has exceeded the amount established (${this.maxTotalBytes})`, ErrorCode.DownloadLimiter);
}
this.totalBytes_ = value;
}
public get totalBytes() {
return this.totalBytes_;
}
public set imagesCount(value: number) {
if (this.imagesCount_ > this.maxImagesCount) {
throw new JoplinError(`Total images to be stored (${this.imagesCount_}) has exceeded the amount established (${this.maxImagesCount})`, ErrorCode.DownloadLimiter);
}
this.imagesCount_ = value;
}
public get imagesCount() {
return this.imagesCount_;
}
public set imageCountExpected(value: number) {
this.imageCountExpected_ = value;
}
public get imageCountExpected() {
return this.imageCountExpected_;
}
public handleChunk(request: any) {
return (chunk: any) => {
try {
this.totalBytes += chunk.length;
} catch (error) {
request.destroy(error);
}
};
}
public printStats() {
if (!this.isLimitExceeded_) return;
const owner = `Owner id: ${this.ownerId}`;
const totalBytes = `Total bytes stored: ${this.totalBytes}. Maximum: ${this.maxTotalBytes}`;
const totalImages = `Images initiated for download: ${this.imagesCount_}. Maximum: ${this.maxImagesCount}. Expected: ${this.imageCountExpected}`;
logger.info(`${owner} - ${totalBytes} - ${totalImages}`);
}
public limitMessage() {
if (this.imagesCount_ > this.maxImagesCount) {
return `The maximum image count of ${this.maxImagesCount} has been exceeded. Image count in your content: ${this.imageCountExpected}`;
}
if (this.totalBytes >= this.maxTotalBytes) {
return `The maximum content size ${bytesToHuman(this.maxTotalBytes)} has been exceeded. Content size: (${bytesToHuman(this.totalBytes)})`;
}
return '';
}
}

View File

@ -5,4 +5,5 @@ export enum ErrorCode {
NotFound = 'notFound',
UnsupportedMimeType = 'unsupportedMimeType',
MustUpgradeApp = 'mustUpgradeApp',
DownloadLimiter = 'downloadLimiter',
}

View File

@ -54,7 +54,6 @@
"color": "3.2.1",
"compare-versions": "6.1.0",
"diff-match-patch": "1.0.5",
"es6-promise-pool": "2.5.0",
"fast-deep-equal": "3.1.3",
"fast-xml-parser": "3.21.1",
"follow-redirects": "1.15.5",

View File

@ -28,6 +28,7 @@ const { MarkupToHtml } = require('@joplin/renderer');
const { ErrorNotFound } = require('../utils/errors');
import { fileUriToPath } from '@joplin/utils/url';
import { NoteEntity } from '../../database/types';
import { DownloadController } from '../../../downloadController';
const logger = Logger.create('routes/notes');
@ -66,6 +67,7 @@ type RequestNote = {
type FetchOptions = {
timeout?: number;
maxRedirects?: number;
downloadController?: DownloadController;
};
async function requestNoteToNote(requestNote: RequestNote): Promise<NoteEntity> {
@ -263,26 +265,31 @@ export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions
}
async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions, allowedProtocols?: string[]) {
const PromisePool = require('es6-promise-pool');
const output: any = {};
const downloadController = fetchOptions?.downloadController ?? null;
const downloadOne = async (url: string) => {
if (downloadController) downloadController.imagesCount += 1;
const mediaPath = await downloadMediaFile(url, fetchOptions, allowedProtocols);
if (mediaPath) output[url] = { path: mediaPath, originalUrl: url };
};
let urlIndex = 0;
const promiseProducer = () => {
if (urlIndex >= urls.length) return null;
const maximumImageDownloadsAllowed = downloadController ? downloadController.maxImagesCount : Number.POSITIVE_INFINITY;
const urlsAllowedByController = urls.slice(0, maximumImageDownloadsAllowed);
logger.info(`Media files allowed to be downloaded: ${maximumImageDownloadsAllowed}`);
const url = urls[urlIndex++];
return downloadOne(url);
};
const promises = [];
for (const url of urlsAllowedByController) {
promises.push(downloadOne(url));
}
const concurrency = 10;
const pool = new PromisePool(promiseProducer, concurrency);
await pool.start();
await Promise.all(promises);
if (downloadController) {
downloadController.imageCountExpected = urls.length;
downloadController.printStats(urls.length);
}
return output;
}
@ -459,7 +466,13 @@ export default async function(request: Request, id: string = null, link: string
logger.info('Images:', imageSizes);
const allowedProtocolsForDownloadMediaFiles = ['http:', 'https:', 'file:', 'data:'];
const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes, undefined, allowedProtocolsForDownloadMediaFiles);
const extracted = await extractNoteFromHTML(
requestNote,
requestId,
imageSizes,
undefined,
allowedProtocolsForDownloadMediaFiles,
);
let note = await Note.save(extracted.note, extracted.saveOptions);

View File

@ -9,6 +9,7 @@ import * as fs from 'fs-extra';
import * as pdfJsNamespace from 'pdfjs-dist';
import { writeFile } from 'fs/promises';
import { ResourceEntity } from './services/database/types';
import { DownloadController } from './downloadController';
import { TextItem } from 'pdfjs-dist/types/src/display/api';
import replaceUnsupportedCharacters from './utils/replaceUnsupportedCharacters';
@ -25,6 +26,15 @@ const dgram = require('dgram');
const proxySettings: any = {};
type FetchBlobOptions = {
path?: string;
method?: string;
maxRedirects?: number;
timeout?: number;
headers?: any;
downloadController?: DownloadController;
};
function fileExists(filePath: string) {
try {
return fs.statSync(filePath).isFile();
@ -493,7 +503,7 @@ function shimInit(options: ShimInitOptions = null) {
}, options);
};
shim.fetchBlob = async function(url: any, options) {
shim.fetchBlob = async function(url: any, options: FetchBlobOptions) {
if (!options || !options.path) throw new Error('fetchBlob: target file path is missing');
if (!options.method) options.method = 'GET';
// if (!('maxRetry' in options)) options.maxRetry = 5;
@ -510,6 +520,7 @@ function shimInit(options: ShimInitOptions = null) {
const http = url.protocol.toLowerCase() === 'http:' ? require('follow-redirects').http : require('follow-redirects').https;
const headers = options.headers ? options.headers : {};
const filePath = options.path;
const downloadController = options.downloadController;
function makeResponse(response: any) {
return {
@ -571,6 +582,11 @@ function shimInit(options: ShimInitOptions = null) {
});
const request = http.request(requestOptions, (response: any) => {
if (downloadController) {
response.on('data', downloadController.handleChunk(request));
}
response.pipe(file);
const isGzipped = response.headers['content-encoding'] === 'gzip';

12
packages/utils/bytes.ts Normal file
View File

@ -0,0 +1,12 @@
// eslint-disable-next-line import/prefer-default-export
export const bytesToHuman = (bytes: number) => {
const units = ['Bytes', 'KB', 'MB', 'GB'];
let unitIndex = 0;
while (bytes >= 1024 && unitIndex < units.length - 1) {
bytes /= 1024;
unitIndex++;
}
return `${bytes.toFixed(1)} ${units[unitIndex]}`;
};

View File

@ -6866,7 +6866,6 @@ __metadata:
color: 3.2.1
compare-versions: 6.1.0
diff-match-patch: 1.0.5
es6-promise-pool: 2.5.0
fast-deep-equal: 3.1.3
fast-xml-parser: 3.21.1
follow-redirects: 1.15.5
@ -20008,13 +20007,6 @@ __metadata:
languageName: node
linkType: hard
"es6-promise-pool@npm:2.5.0":
version: 2.5.0
resolution: "es6-promise-pool@npm:2.5.0"
checksum: e472ec5959b022b28e678446674c78dd2d198dd50c537ef59916d32d2423fe4518c43f132d81f2e98249b8b8450c95f77b8d9aecc1fb15e8dcd224c5b98f0cce
languageName: node
linkType: hard
"es6-promise@npm:^4.0.3, es6-promise@npm:^4.1.1":
version: 4.2.8
resolution: "es6-promise@npm:4.2.8"