1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-24 10:27:10 +02:00

Server: Allow specifying a max content size when importing content to new storage

This commit is contained in:
Laurent Cozic 2021-11-23 16:06:56 +00:00
parent 82defbdd7b
commit 080c3cc7dc
3 changed files with 107 additions and 27 deletions

View File

@ -15,6 +15,7 @@ interface Argv {
command: ArgvCommand;
connection: string;
batchSize?: number;
maxContentSize?: number;
}
export default class StorageCommand extends BaseCommand {
@ -45,6 +46,10 @@ export default class StorageCommand extends BaseCommand {
type: 'number',
description: 'Item batch size',
},
'max-content-size': {
type: 'number',
description: 'Max content size',
},
'connection': {
description: 'storage connection string',
type: 'string',
@ -59,13 +64,16 @@ export default class StorageCommand extends BaseCommand {
const toStorageConfig = parseStorageConnectionString(argv.connection);
const batchSize = argv.batchSize || 1000;
const maxContentSize = argv.maxContentSize || 200000000;
logger.info('Importing to storage:', toStorageConfig);
logger.info(`Batch size: ${batchSize}`);
logger.info(`Max content size: ${maxContentSize}`);
await runContext.models.item().importContentToStorage(toStorageConfig, {
batchSize: batchSize || 1000,
logger: logger as Logger,
batchSize,
maxContentSize,
logger,
});
},

View File

@ -301,9 +301,7 @@ describe('ItemModel', function() {
await expectNotThrow(async () => models.item().loadWithContent(item.id));
});
test('should allow importing content to item storage', async function() {
const { user: user1 } = await createUserAndSession(1);
const setupImportContentTest = async () => {
const tempDir1 = await tempDir('storage1');
const tempDir2 = await tempDir('storage2');
@ -312,20 +310,52 @@ describe('ItemModel', function() {
path: tempDir1,
};
const models = newModelFactory(db(), {
const toStorageConfig = {
type: StorageDriverType.Filesystem,
path: tempDir2,
};
const fromModels = newModelFactory(db(), {
...config(),
storageDriver: fromStorageConfig,
});
await models.item().saveFromRawContent(user1, {
const toModels = newModelFactory(db(), {
...config(),
storageDriver: toStorageConfig,
});
const fromDriver = await loadStorageDriver(fromStorageConfig, db());
const toDriver = await loadStorageDriver(toStorageConfig, db());
return {
fromStorageConfig,
toStorageConfig,
fromModels,
toModels,
fromDriver,
toDriver,
};
};
test('should allow importing content to item storage', async function() {
const { user: user1 } = await createUserAndSession(1);
const {
toStorageConfig,
fromModels,
fromDriver,
toDriver,
} = await setupImportContentTest();
await fromModels.item().saveFromRawContent(user1, {
body: Buffer.from(JSON.stringify({ 'version': 1 })),
name: 'info.json',
});
const itemBefore = (await models.item().all())[0];
const itemBefore = (await fromModels.item().all())[0];
const fromDriver = await loadStorageDriver(fromStorageConfig, db());
const fromContent = await fromDriver.read(itemBefore.id, { models });
const fromContent = await fromDriver.read(itemBefore.id, { models: fromModels });
expect(fromContent.toString()).toBe('{"version":1}');
@ -333,11 +363,6 @@ describe('ItemModel', function() {
await msleep(2);
const toStorageConfig = {
type: StorageDriverType.Filesystem,
path: tempDir2,
};
const toModels = newModelFactory(db(), {
...config(),
storageDriver: toStorageConfig,
@ -350,24 +375,55 @@ describe('ItemModel', function() {
const itemBefore2 = result['info2.json'].item;
await models.item().importContentToStorage(toStorageConfig);
await fromModels.item().importContentToStorage(toStorageConfig);
const itemAfter = (await models.item().all()).find(it => it.id === itemBefore.id);
const itemAfter = (await fromModels.item().all()).find(it => it.id === itemBefore.id);
expect(itemAfter.content_storage_id).toBe(2);
expect(itemAfter.updated_time).toBe(itemBefore.updated_time);
// Just check the second item has not been processed since it was
// already on the right storage
const itemAfter2 = (await models.item().all()).find(it => it.id === itemBefore2.id);
const itemAfter2 = (await fromModels.item().all()).find(it => it.id === itemBefore2.id);
expect(itemAfter2.content_storage_id).toBe(2);
expect(itemAfter2.updated_time).toBe(itemBefore2.updated_time);
const toDriver = await loadStorageDriver(toStorageConfig, db());
const toContent = await toDriver.read(itemAfter.id, { models });
const toContent = await toDriver.read(itemAfter.id, { models: fromModels });
expect(toContent.toString()).toBe(fromContent.toString());
});
test('should skip large items when importing content to item storage', async function() {
const { user: user1 } = await createUserAndSession(1);
const {
toStorageConfig,
fromModels,
fromDriver,
toDriver,
} = await setupImportContentTest();
const result = await fromModels.item().saveFromRawContent(user1, {
body: Buffer.from(JSON.stringify({ 'version': 1 })),
name: 'info.json',
});
const itemId = result['info.json'].item.id;
expect(await fromDriver.exists(itemId, { models: fromModels })).toBe(true);
await fromModels.item().importContentToStorage(toStorageConfig, {
maxContentSize: 1,
});
expect(await toDriver.exists(itemId, { models: fromModels })).toBe(false);
await fromModels.item().importContentToStorage(toStorageConfig, {
maxContentSize: 999999,
});
expect(await toDriver.exists(itemId, { models: fromModels })).toBe(true);
});
// test('should stop importing item if it has been deleted', async function() {
// const { user: user1 } = await createUserAndSession(1);

View File

@ -13,7 +13,7 @@ import { Config, StorageDriverConfig, StorageDriverMode } from '../utils/types';
import { NewModelFactoryHandler } from './factory';
import loadStorageDriver from './items/storage/loadStorageDriver';
import { msleep } from '../utils/time';
import Logger from '@joplin/lib/Logger';
import Logger, { LoggerWrapper } from '@joplin/lib/Logger';
import prettyBytes = require('pretty-bytes');
const mimeUtils = require('@joplin/lib/mime-utils.js').mime;
@ -23,7 +23,8 @@ const extractNameRegex = /^root:\/(.*):$/;
export interface ImportContentToStorageOptions {
batchSize?: number;
logger?: Logger;
maxContentSize?: number;
logger?: Logger | LoggerWrapper;
}
export interface SaveFromRawContentItem {
@ -282,7 +283,7 @@ export default class ItemModel extends BaseModel<Item> {
}
}
private async atomicMoveContent(item: Item, toDriver: StorageDriverBase, drivers: Record<number, StorageDriverBase>, logger: Logger) {
private async atomicMoveContent(item: Item, toDriver: StorageDriverBase, drivers: Record<number, StorageDriverBase>, logger: Logger | LoggerWrapper) {
for (let i = 0; i < 10; i++) {
let fromDriver: StorageDriverBase = drivers[item.content_storage_id];
@ -338,6 +339,7 @@ export default class ItemModel extends BaseModel<Item> {
public async importContentToStorage(toStorageConfig: StorageDriverConfig | StorageDriverBase, options: ImportContentToStorageOptions = null) {
options = {
batchSize: 1000,
maxContentSize: 200000000,
logger: new Logger(),
...options,
};
@ -350,23 +352,37 @@ export default class ItemModel extends BaseModel<Item> {
.where('content_storage_id', '!=', toStorageDriver.storageId)
.first())['total'];
const skippedItemIds: Uuid[] = [];
let totalDone = 0;
while (true) {
const items: Item[] = await this
const query = this
.db(this.tableName)
.select(['id', 'content_storage_id', 'updated_time'])
.where('content_storage_id', '!=', toStorageDriver.storageId)
.limit(options.batchSize);
.select(['id', 'content_storage_id', 'content_size', 'updated_time'])
.where('content_storage_id', '!=', toStorageDriver.storageId);
if (skippedItemIds.length) void query.whereNotIn('id', skippedItemIds);
void query.limit(options.batchSize);
const items: Item[] = await query;
options.logger.info(`Processing items ${totalDone} / ${itemCount}`);
if (!items.length) {
options.logger.info(`All items have been processed. Total: ${totalDone}`);
options.logger.info(`Skipped items: ${skippedItemIds.join(', ')}`);
return;
}
for (const item of items) {
if (item.content_size > options.maxContentSize) {
options.logger.warn(`Skipped item "${item.id}" (Size: ${prettyBytes(item.content_size)}) because it is over the size limit (${prettyBytes(options.maxContentSize)})`);
skippedItemIds.push(item.id);
continue;
}
try {
await this.atomicMoveContent(item, toStorageDriver, fromDrivers, options.logger);
} catch (error) {