From 17a8ce501064532cb517891397a95ecdcfc9dd54 Mon Sep 17 00:00:00 2001 From: pedr Date: Sat, 9 Mar 2024 07:45:21 -0300 Subject: [PATCH] Api: Add capability of limiting downloads (#9788) --- .eslintignore | 1 + .gitignore | 1 + packages/lib/downloadController.ts | 95 ++++++++++++++++++++++ packages/lib/errors.ts | 1 + packages/lib/package.json | 1 - packages/lib/services/rest/routes/notes.ts | 37 ++++++--- packages/lib/shim-init-node.ts | 18 +++- packages/utils/bytes.ts | 12 +++ yarn.lock | 8 -- 9 files changed, 152 insertions(+), 22 deletions(-) create mode 100644 packages/lib/downloadController.ts create mode 100644 packages/utils/bytes.ts diff --git a/.eslintignore b/.eslintignore index d6ceb5dc0..facf53b2b 100644 --- a/.eslintignore +++ b/.eslintignore @@ -723,6 +723,7 @@ packages/lib/database.js packages/lib/debug/DebugService.js packages/lib/determineBaseAppDirs.js packages/lib/dom.js +packages/lib/downloadController.js packages/lib/errorUtils.js packages/lib/errors.js packages/lib/eventManager.js diff --git a/.gitignore b/.gitignore index 89dbf35c0..32ddcc56e 100644 --- a/.gitignore +++ b/.gitignore @@ -703,6 +703,7 @@ packages/lib/database.js packages/lib/debug/DebugService.js packages/lib/determineBaseAppDirs.js packages/lib/dom.js +packages/lib/downloadController.js packages/lib/errorUtils.js packages/lib/errors.js packages/lib/eventManager.js diff --git a/packages/lib/downloadController.ts b/packages/lib/downloadController.ts new file mode 100644 index 000000000..5be7265a4 --- /dev/null +++ b/packages/lib/downloadController.ts @@ -0,0 +1,95 @@ +import Logger from '@joplin/utils/Logger'; +import JoplinError from './JoplinError'; +import { ErrorCode } from './errors'; +import { bytesToHuman } from '@joplin/utils/bytes'; + +const logger = Logger.create('downloadController'); + +export interface DownloadController { + totalBytes: number; + imagesCount: number; + maxImagesCount: number; + imageCountExpected: number; + printStats(imagesCountExpected: number): void; + handleChunk(request: any): (chunk: any)=> void; + limitMessage(): string; +} + +export class LimitedDownloadController implements DownloadController { + private totalBytes_ = 0; + // counts before the downloaded has finished, so at the end if the totalBytes > maxTotalBytesAllowed + // it means that imageCount will be higher than the total downloaded during the process + private imagesCount_ = 0; + // how many images links the content has + private imageCountExpected_ = 0; + private isLimitExceeded_ = false; + + private maxTotalBytes = 0; + public readonly maxImagesCount: number; + private ownerId = ''; + + public constructor(ownerId: string, maxTotalBytes: number, maxImagesCount: number) { + this.ownerId = ownerId; + this.maxTotalBytes = maxTotalBytes; + this.maxImagesCount = maxImagesCount; + } + + public set totalBytes(value: number) { + if (this.totalBytes_ >= this.maxTotalBytes) { + throw new JoplinError(`Total bytes stored (${this.totalBytes_}) has exceeded the amount established (${this.maxTotalBytes})`, ErrorCode.DownloadLimiter); + } + this.totalBytes_ = value; + } + + public get totalBytes() { + return this.totalBytes_; + } + + public set imagesCount(value: number) { + if (this.imagesCount_ > this.maxImagesCount) { + throw new JoplinError(`Total images to be stored (${this.imagesCount_}) has exceeded the amount established (${this.maxImagesCount})`, ErrorCode.DownloadLimiter); + } + this.imagesCount_ = value; + } + + public get imagesCount() { + return this.imagesCount_; + } + + public set imageCountExpected(value: number) { + this.imageCountExpected_ = value; + } + + public get imageCountExpected() { + return this.imageCountExpected_; + } + + public handleChunk(request: any) { + return (chunk: any) => { + try { + this.totalBytes += chunk.length; + } catch (error) { + request.destroy(error); + } + }; + } + + public printStats() { + if (!this.isLimitExceeded_) return; + + const owner = `Owner id: ${this.ownerId}`; + const totalBytes = `Total bytes stored: ${this.totalBytes}. Maximum: ${this.maxTotalBytes}`; + const totalImages = `Images initiated for download: ${this.imagesCount_}. Maximum: ${this.maxImagesCount}. Expected: ${this.imageCountExpected}`; + logger.info(`${owner} - ${totalBytes} - ${totalImages}`); + } + + public limitMessage() { + if (this.imagesCount_ > this.maxImagesCount) { + return `The maximum image count of ${this.maxImagesCount} has been exceeded. Image count in your content: ${this.imageCountExpected}`; + } + if (this.totalBytes >= this.maxTotalBytes) { + return `The maximum content size ${bytesToHuman(this.maxTotalBytes)} has been exceeded. Content size: (${bytesToHuman(this.totalBytes)})`; + } + return ''; + } +} diff --git a/packages/lib/errors.ts b/packages/lib/errors.ts index e310a472d..514d6468a 100644 --- a/packages/lib/errors.ts +++ b/packages/lib/errors.ts @@ -5,4 +5,5 @@ export enum ErrorCode { NotFound = 'notFound', UnsupportedMimeType = 'unsupportedMimeType', MustUpgradeApp = 'mustUpgradeApp', + DownloadLimiter = 'downloadLimiter', } diff --git a/packages/lib/package.json b/packages/lib/package.json index 7c369cd53..a1adda18f 100644 --- a/packages/lib/package.json +++ b/packages/lib/package.json @@ -54,7 +54,6 @@ "color": "3.2.1", "compare-versions": "6.1.0", "diff-match-patch": "1.0.5", - "es6-promise-pool": "2.5.0", "fast-deep-equal": "3.1.3", "fast-xml-parser": "3.21.1", "follow-redirects": "1.15.5", diff --git a/packages/lib/services/rest/routes/notes.ts b/packages/lib/services/rest/routes/notes.ts index 78338c179..d6b4d143b 100644 --- a/packages/lib/services/rest/routes/notes.ts +++ b/packages/lib/services/rest/routes/notes.ts @@ -28,6 +28,7 @@ const { MarkupToHtml } = require('@joplin/renderer'); const { ErrorNotFound } = require('../utils/errors'); import { fileUriToPath } from '@joplin/utils/url'; import { NoteEntity } from '../../database/types'; +import { DownloadController } from '../../../downloadController'; const logger = Logger.create('routes/notes'); @@ -66,6 +67,7 @@ type RequestNote = { type FetchOptions = { timeout?: number; maxRedirects?: number; + downloadController?: DownloadController; }; async function requestNoteToNote(requestNote: RequestNote): Promise { @@ -263,26 +265,31 @@ export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions } async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions, allowedProtocols?: string[]) { - const PromisePool = require('es6-promise-pool'); - const output: any = {}; + const downloadController = fetchOptions?.downloadController ?? null; + const downloadOne = async (url: string) => { + if (downloadController) downloadController.imagesCount += 1; const mediaPath = await downloadMediaFile(url, fetchOptions, allowedProtocols); if (mediaPath) output[url] = { path: mediaPath, originalUrl: url }; }; - let urlIndex = 0; - const promiseProducer = () => { - if (urlIndex >= urls.length) return null; + const maximumImageDownloadsAllowed = downloadController ? downloadController.maxImagesCount : Number.POSITIVE_INFINITY; + const urlsAllowedByController = urls.slice(0, maximumImageDownloadsAllowed); + logger.info(`Media files allowed to be downloaded: ${maximumImageDownloadsAllowed}`); - const url = urls[urlIndex++]; - return downloadOne(url); - }; + const promises = []; + for (const url of urlsAllowedByController) { + promises.push(downloadOne(url)); + } - const concurrency = 10; - const pool = new PromisePool(promiseProducer, concurrency); - await pool.start(); + await Promise.all(promises); + + if (downloadController) { + downloadController.imageCountExpected = urls.length; + downloadController.printStats(urls.length); + } return output; } @@ -459,7 +466,13 @@ export default async function(request: Request, id: string = null, link: string logger.info('Images:', imageSizes); const allowedProtocolsForDownloadMediaFiles = ['http:', 'https:', 'file:', 'data:']; - const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes, undefined, allowedProtocolsForDownloadMediaFiles); + const extracted = await extractNoteFromHTML( + requestNote, + requestId, + imageSizes, + undefined, + allowedProtocolsForDownloadMediaFiles, + ); let note = await Note.save(extracted.note, extracted.saveOptions); diff --git a/packages/lib/shim-init-node.ts b/packages/lib/shim-init-node.ts index 87a70ff44..2304a5667 100644 --- a/packages/lib/shim-init-node.ts +++ b/packages/lib/shim-init-node.ts @@ -9,6 +9,7 @@ import * as fs from 'fs-extra'; import * as pdfJsNamespace from 'pdfjs-dist'; import { writeFile } from 'fs/promises'; import { ResourceEntity } from './services/database/types'; +import { DownloadController } from './downloadController'; import { TextItem } from 'pdfjs-dist/types/src/display/api'; import replaceUnsupportedCharacters from './utils/replaceUnsupportedCharacters'; @@ -25,6 +26,15 @@ const dgram = require('dgram'); const proxySettings: any = {}; +type FetchBlobOptions = { + path?: string; + method?: string; + maxRedirects?: number; + timeout?: number; + headers?: any; + downloadController?: DownloadController; +}; + function fileExists(filePath: string) { try { return fs.statSync(filePath).isFile(); @@ -493,7 +503,7 @@ function shimInit(options: ShimInitOptions = null) { }, options); }; - shim.fetchBlob = async function(url: any, options) { + shim.fetchBlob = async function(url: any, options: FetchBlobOptions) { if (!options || !options.path) throw new Error('fetchBlob: target file path is missing'); if (!options.method) options.method = 'GET'; // if (!('maxRetry' in options)) options.maxRetry = 5; @@ -510,6 +520,7 @@ function shimInit(options: ShimInitOptions = null) { const http = url.protocol.toLowerCase() === 'http:' ? require('follow-redirects').http : require('follow-redirects').https; const headers = options.headers ? options.headers : {}; const filePath = options.path; + const downloadController = options.downloadController; function makeResponse(response: any) { return { @@ -571,6 +582,11 @@ function shimInit(options: ShimInitOptions = null) { }); const request = http.request(requestOptions, (response: any) => { + + if (downloadController) { + response.on('data', downloadController.handleChunk(request)); + } + response.pipe(file); const isGzipped = response.headers['content-encoding'] === 'gzip'; diff --git a/packages/utils/bytes.ts b/packages/utils/bytes.ts new file mode 100644 index 000000000..1fc224dc9 --- /dev/null +++ b/packages/utils/bytes.ts @@ -0,0 +1,12 @@ +// eslint-disable-next-line import/prefer-default-export +export const bytesToHuman = (bytes: number) => { + const units = ['Bytes', 'KB', 'MB', 'GB']; + let unitIndex = 0; + + while (bytes >= 1024 && unitIndex < units.length - 1) { + bytes /= 1024; + unitIndex++; + } + + return `${bytes.toFixed(1)} ${units[unitIndex]}`; +}; diff --git a/yarn.lock b/yarn.lock index 31df7bf1d..c992ed865 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6866,7 +6866,6 @@ __metadata: color: 3.2.1 compare-versions: 6.1.0 diff-match-patch: 1.0.5 - es6-promise-pool: 2.5.0 fast-deep-equal: 3.1.3 fast-xml-parser: 3.21.1 follow-redirects: 1.15.5 @@ -20008,13 +20007,6 @@ __metadata: languageName: node linkType: hard -"es6-promise-pool@npm:2.5.0": - version: 2.5.0 - resolution: "es6-promise-pool@npm:2.5.0" - checksum: e472ec5959b022b28e678446674c78dd2d198dd50c537ef59916d32d2423fe4518c43f132d81f2e98249b8b8450c95f77b8d9aecc1fb15e8dcd224c5b98f0cce - languageName: node - linkType: hard - "es6-promise@npm:^4.0.3, es6-promise@npm:^4.1.1": version: 4.2.8 resolution: "es6-promise@npm:4.2.8"