1
0
mirror of https://github.com/immich-app/immich.git synced 2024-12-24 10:37:28 +02:00

feat(server): lower library scan memory usage (#7939)

* use trie

* update tests

* formatting

* pr feedback

* linting
This commit is contained in:
Mert 2024-03-14 01:52:30 -04:00 committed by GitHub
parent 63d252b603
commit d67cc00e4e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 143 additions and 62 deletions

View File

@ -47,6 +47,7 @@
"js-yaml": "^4.1.0",
"lodash": "^4.17.21",
"luxon": "^3.4.2",
"mnemonist": "^0.39.8",
"nest-commander": "^3.11.1",
"nestjs-otel": "^5.1.5",
"node-addon-api": "^7.0.0",
@ -10426,6 +10427,14 @@
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==",
"dev": true
},
"node_modules/mnemonist": {
"version": "0.39.8",
"resolved": "https://registry.npmjs.org/mnemonist/-/mnemonist-0.39.8.tgz",
"integrity": "sha512-vyWo2K3fjrUw8YeeZ1zF0fy6Mu59RHokURlld8ymdUPjMlD9EC9ov1/YPqTgqRvUN9nTr3Gqfz29LYAmu0PHPQ==",
"dependencies": {
"obliterator": "^2.0.1"
}
},
"node_modules/mock-fs": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/mock-fs/-/mock-fs-5.2.0.tgz",
@ -10819,6 +10828,11 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/obliterator": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/obliterator/-/obliterator-2.0.4.tgz",
"integrity": "sha512-lgHwxlxV1qIg1Eap7LgIeoBWIMFibOjbrYPIPJZcI1mmGAI2m3lNYpK12Y+GBdPQ0U1hRwSord7GIaawz962qQ=="
},
"node_modules/obuf": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/obuf/-/obuf-1.1.2.tgz",
@ -22037,6 +22051,14 @@
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==",
"dev": true
},
"mnemonist": {
"version": "0.39.8",
"resolved": "https://registry.npmjs.org/mnemonist/-/mnemonist-0.39.8.tgz",
"integrity": "sha512-vyWo2K3fjrUw8YeeZ1zF0fy6Mu59RHokURlld8ymdUPjMlD9EC9ov1/YPqTgqRvUN9nTr3Gqfz29LYAmu0PHPQ==",
"requires": {
"obliterator": "^2.0.1"
}
},
"mock-fs": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/mock-fs/-/mock-fs-5.2.0.tgz",
@ -22349,6 +22371,11 @@
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.3.tgz",
"integrity": "sha512-geUvdk7c+eizMNUDkRpW1wJwgfOiOeHbxBR/hLXK1aT6zmVSO0jsQcs7fj6MGw89jC/cjGfLcNOrtMYtGqm81g=="
},
"obliterator": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/obliterator/-/obliterator-2.0.4.tgz",
"integrity": "sha512-lgHwxlxV1qIg1Eap7LgIeoBWIMFibOjbrYPIPJZcI1mmGAI2m3lNYpK12Y+GBdPQ0U1hRwSord7GIaawz962qQ=="
},
"obuf": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/obuf/-/obuf-1.1.2.tgz",

View File

@ -71,6 +71,7 @@
"js-yaml": "^4.1.0",
"lodash": "^4.17.21",
"luxon": "^3.4.2",
"mnemonist": "^0.39.8",
"nest-commander": "^3.11.1",
"nestjs-otel": "^5.1.5",
"node-addon-api": "^7.0.0",

View File

@ -155,7 +155,10 @@ describe(LibraryService.name, () => {
};
libraryMock.get.mockResolvedValue(libraryStub.externalLibrary1);
storageMock.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
// eslint-disable-next-line @typescript-eslint/require-await
storageMock.walk.mockImplementation(async function* generator() {
yield '/data/user1/photo.jpg';
});
assetMock.getLibraryAssetPaths.mockResolvedValue({ items: [], hasNextPage: false });
await sut.handleQueueAssetRefresh(mockLibraryJob);
@ -181,7 +184,10 @@ describe(LibraryService.name, () => {
};
libraryMock.get.mockResolvedValue(libraryStub.externalLibrary1);
storageMock.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
// eslint-disable-next-line @typescript-eslint/require-await
storageMock.walk.mockImplementation(async function* generator() {
yield '/data/user1/photo.jpg';
});
assetMock.getLibraryAssetPaths.mockResolvedValue({ items: [], hasNextPage: false });
await sut.handleQueueAssetRefresh(mockLibraryJob);
@ -231,12 +237,11 @@ describe(LibraryService.name, () => {
};
libraryMock.get.mockResolvedValue(libraryStub.externalLibraryWithImportPaths1);
storageMock.crawl.mockResolvedValue([]);
assetMock.getLibraryAssetPaths.mockResolvedValue({ items: [], hasNextPage: false });
await sut.handleQueueAssetRefresh(mockLibraryJob);
expect(storageMock.crawl).toHaveBeenCalledWith({
expect(storageMock.walk).toHaveBeenCalledWith({
pathsToCrawl: [libraryStub.externalLibraryWithImportPaths1.importPaths[1]],
exclusionPatterns: [],
});
@ -250,7 +255,6 @@ describe(LibraryService.name, () => {
};
libraryMock.get.mockResolvedValue(libraryStub.externalLibrary1);
storageMock.crawl.mockResolvedValue([]);
assetMock.getLibraryAssetPaths.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
@ -271,7 +275,10 @@ describe(LibraryService.name, () => {
};
libraryMock.get.mockResolvedValue(libraryStub.externalLibrary1);
storageMock.crawl.mockResolvedValue([assetStub.offline.originalPath]);
// eslint-disable-next-line @typescript-eslint/require-await
storageMock.walk.mockImplementation(async function* generator() {
yield assetStub.offline.originalPath;
});
assetMock.getLibraryAssetPaths.mockResolvedValue({
items: [assetStub.offline],
hasNextPage: false,

View File

@ -1,6 +1,7 @@
import { AssetType, LibraryType } from '@app/infra/entities';
import { AssetType, LibraryEntity, LibraryType } from '@app/infra/entities';
import { ImmichLogger } from '@app/infra/logger';
import { BadRequestException, Inject, Injectable } from '@nestjs/common';
import { Trie } from 'mnemonist';
import { R_OK } from 'node:constants';
import { EventEmitter } from 'node:events';
import { Stats } from 'node:fs';
@ -11,7 +12,6 @@ import { AuthDto } from '../auth';
import { mimeTypes } from '../domain.constant';
import { handlePromiseError, usePagination, validateCronExpression } from '../domain.util';
import { IBaseJob, IEntityJob, ILibraryFileJob, ILibraryRefreshJob, JOBS_ASSET_PAGINATION_SIZE, JobName } from '../job';
import {
DatabaseLock,
IAccessRepository,
@ -39,6 +39,8 @@ import {
mapLibrary,
} from './library.dto';
const LIBRARY_SCAN_BATCH_SIZE = 5000;
@Injectable()
export class LibraryService extends EventEmitter {
readonly logger = new ImmichLogger(LibraryService.name);
@ -626,6 +628,69 @@ export class LibraryService extends EventEmitter {
this.logger.verbose(`Refreshing library: ${job.id}`);
const crawledAssetPaths = await this.getPathTrie(library);
this.logger.debug(`Found ${crawledAssetPaths.size} asset(s) when crawling import paths ${library.importPaths}`);
const assetIdsToMarkOffline = [];
const assetIdsToMarkOnline = [];
const pagination = usePagination(LIBRARY_SCAN_BATCH_SIZE, (pagination) =>
this.assetRepository.getLibraryAssetPaths(pagination, library.id),
);
const shouldScanAll = job.refreshAllFiles || job.refreshModifiedFiles;
for await (const page of pagination) {
for (const asset of page) {
const isOffline = !crawledAssetPaths.has(asset.originalPath);
if (isOffline && !asset.isOffline) {
assetIdsToMarkOffline.push(asset.id);
}
if (!isOffline && asset.isOffline) {
assetIdsToMarkOnline.push(asset.id);
}
if (!shouldScanAll) {
crawledAssetPaths.delete(asset.originalPath);
}
}
}
if (assetIdsToMarkOffline.length > 0) {
this.logger.debug(`Found ${assetIdsToMarkOffline.length} offline asset(s) previously marked as online`);
await this.assetRepository.updateAll(assetIdsToMarkOffline, { isOffline: true });
}
if (assetIdsToMarkOnline.length > 0) {
this.logger.debug(`Found ${assetIdsToMarkOnline.length} online asset(s) previously marked as offline`);
await this.assetRepository.updateAll(assetIdsToMarkOnline, { isOffline: false });
}
if (crawledAssetPaths.size > 0) {
if (!shouldScanAll) {
this.logger.debug(`Will import ${crawledAssetPaths.size} new asset(s)`);
}
const batch = [];
for (const assetPath of crawledAssetPaths) {
batch.push(assetPath);
if (batch.length >= LIBRARY_SCAN_BATCH_SIZE) {
await this.scanAssets(job.id, batch, library.ownerId, job.refreshAllFiles ?? false);
batch.length = 0;
}
}
if (batch.length > 0) {
await this.scanAssets(job.id, batch, library.ownerId, job.refreshAllFiles ?? false);
}
}
await this.repository.update({ id: job.id, refreshedAt: new Date() });
return true;
}
private async getPathTrie(library: LibraryEntity): Promise<Trie<string>> {
const pathValidation = await Promise.all(
library.importPaths.map(async (importPath) => await this.validateImportPath(importPath)),
);
@ -640,61 +705,17 @@ export class LibraryService extends EventEmitter {
.filter((validation) => validation.isValid)
.map((validation) => validation.importPath);
let rawPaths = await this.storageRepository.crawl({
const generator = this.storageRepository.walk({
pathsToCrawl: validImportPaths,
exclusionPatterns: library.exclusionPatterns,
});
const crawledAssetPaths = new Set<string>(rawPaths);
const shouldScanAll = job.refreshAllFiles || job.refreshModifiedFiles;
let pathsToScan: string[] = shouldScanAll ? rawPaths : [];
rawPaths = [];
this.logger.debug(`Found ${crawledAssetPaths.size} asset(s) when crawling import paths ${library.importPaths}`);
const assetIdsToMarkOffline = [];
const assetIdsToMarkOnline = [];
const pagination = usePagination(5000, (pagination) =>
this.assetRepository.getLibraryAssetPaths(pagination, library.id),
);
for await (const page of pagination) {
for (const asset of page) {
const isOffline = !crawledAssetPaths.has(asset.originalPath);
if (isOffline && !asset.isOffline) {
assetIdsToMarkOffline.push(asset.id);
}
if (!isOffline && asset.isOffline) {
assetIdsToMarkOnline.push(asset.id);
}
crawledAssetPaths.delete(asset.originalPath);
}
const trie = new Trie<string>();
for await (const filePath of generator) {
trie.add(filePath);
}
if (assetIdsToMarkOffline.length > 0) {
this.logger.debug(`Found ${assetIdsToMarkOffline.length} offline asset(s) previously marked as online`);
await this.assetRepository.updateAll(assetIdsToMarkOffline, { isOffline: true });
}
if (assetIdsToMarkOnline.length > 0) {
this.logger.debug(`Found ${assetIdsToMarkOnline.length} online asset(s) previously marked as offline`);
await this.assetRepository.updateAll(assetIdsToMarkOnline, { isOffline: false });
}
if (!shouldScanAll) {
pathsToScan = [...crawledAssetPaths];
this.logger.debug(`Will import ${pathsToScan.length} new asset(s)`);
}
if (pathsToScan.length > 0) {
await this.scanAssets(job.id, pathsToScan, library.ownerId, job.refreshAllFiles ?? false);
}
await this.repository.update({ id: job.id, refreshedAt: new Date() });
return true;
return trie;
}
private async findOrFail(id: string) {

View File

@ -53,6 +53,7 @@ export interface IStorageRepository {
readdir(folder: string): Promise<string[]>;
stat(filepath: string): Promise<Stats>;
crawl(crawlOptions: CrawlOptionsDto): Promise<string[]>;
walk(crawlOptions: CrawlOptionsDto): AsyncGenerator<string>;
copyFile(source: string, target: string): Promise<void>;
rename(source: string, target: string): Promise<void>;
watch(paths: string[], options: WatchOptions, events: Partial<WatchEvents>): () => Promise<void>;

View File

@ -11,7 +11,7 @@ import {
import { ImmichLogger } from '@app/infra/logger';
import archiver from 'archiver';
import chokidar, { WatchOptions } from 'chokidar';
import { glob } from 'fast-glob';
import { glob, globStream } from 'fast-glob';
import { constants, createReadStream, existsSync, mkdirSync } from 'node:fs';
import fs from 'node:fs/promises';
import path from 'node:path';
@ -141,10 +141,7 @@ export class FilesystemProvider implements IStorageRepository {
return Promise.resolve([]);
}
const base = pathsToCrawl.length === 1 ? pathsToCrawl[0] : `{${pathsToCrawl.join(',')}}`;
const extensions = `*{${mimeTypes.getSupportedFileExtensions().join(',')}}`;
return glob(`${base}/**/${extensions}`, {
return glob(this.asGlob(pathsToCrawl), {
absolute: true,
caseSensitiveMatch: false,
onlyFiles: true,
@ -153,6 +150,26 @@ export class FilesystemProvider implements IStorageRepository {
});
}
async *walk(crawlOptions: CrawlOptionsDto): AsyncGenerator<string> {
const { pathsToCrawl, exclusionPatterns, includeHidden } = crawlOptions;
if (pathsToCrawl.length === 0) {
async function* emptyGenerator() {}
return emptyGenerator();
}
const stream = globStream(this.asGlob(pathsToCrawl), {
absolute: true,
caseSensitiveMatch: false,
onlyFiles: true,
dot: includeHidden,
ignore: exclusionPatterns,
});
for await (const value of stream) {
yield value as string;
}
}
watch(paths: string[], options: WatchOptions, events: Partial<WatchEvents>) {
const watcher = chokidar.watch(paths, options);
@ -164,4 +181,10 @@ export class FilesystemProvider implements IStorageRepository {
return () => watcher.close();
}
private asGlob(pathsToCrawl: string[]): string {
const base = pathsToCrawl.length === 1 ? pathsToCrawl[0] : `{${pathsToCrawl.join(',')}}`;
const extensions = `*{${mimeTypes.getSupportedFileExtensions().join(',')}}`;
return `${base}/**/${extensions}`;
}
}

View File

@ -56,6 +56,7 @@ export const newStorageRepositoryMock = (reset = true): jest.Mocked<IStorageRepo
readdir: jest.fn(),
stat: jest.fn(),
crawl: jest.fn(),
walk: jest.fn().mockImplementation(async function* () {}),
rename: jest.fn(),
copyFile: jest.fn(),
utimes: jest.fn(),