From db744f500b83dfc3b58a173047125622711c9d90 Mon Sep 17 00:00:00 2001 From: Jason Rasmussen Date: Fri, 22 Mar 2024 10:30:24 -0400 Subject: [PATCH] refactor(cli): crawl service (#8190) --- cli/src/commands/asset.ts | 140 ++++++++---------- cli/src/services/crawl.service.ts | 70 --------- .../crawl.service.spec.ts => utils.spec.ts} | 30 ++-- cli/src/utils.ts | 64 +++++++- 4 files changed, 147 insertions(+), 157 deletions(-) delete mode 100644 cli/src/services/crawl.service.ts rename cli/src/{services/crawl.service.spec.ts => utils.spec.ts} (94%) diff --git a/cli/src/commands/asset.ts b/cli/src/commands/asset.ts index e7a924aaf2..f3b0073b91 100644 --- a/cli/src/commands/asset.ts +++ b/cli/src/commands/asset.ts @@ -12,11 +12,10 @@ import cliProgress from 'cli-progress'; import { chunk, zip } from 'lodash-es'; import { createHash } from 'node:crypto'; import fs, { createReadStream } from 'node:fs'; -import { access, constants, stat, unlink } from 'node:fs/promises'; +import { access, constants, lstat, stat, unlink } from 'node:fs/promises'; import os from 'node:os'; import path, { basename } from 'node:path'; -import { CrawlService } from 'src/services/crawl.service'; -import { BaseOptions, authenticate } from 'src/utils'; +import { BaseOptions, authenticate, crawl } from 'src/utils'; const zipDefined = zip as (a: T[], b: U[]) => [T, U][]; @@ -115,7 +114,7 @@ class Asset { return unlink(this.path); } - public async hash(): Promise { + async hash(): Promise { const sha1 = (filePath: string) => { const hash = createHash('sha1'); return new Promise((resolve, reject) => { @@ -134,40 +133,60 @@ class Asset { } } -class UploadOptionsDto { - recursive? = false; - exclusionPatterns?: string[] = []; - dryRun? = false; - skipHash? = false; - delete? = false; - album? = false; - albumName? = ''; - includeHidden? = false; - concurrency? = 4; +interface UploadOptionsDto { + recursive?: boolean; + exclusionPatterns?: string[]; + dryRun?: boolean; + skipHash?: boolean; + delete?: boolean; + album?: boolean; + albumName?: string; + includeHidden?: boolean; + concurrency: number; } -export const upload = (paths: string[], baseOptions: BaseOptions, uploadOptions: UploadOptionsDto) => - new UploadCommand().run(paths, baseOptions, uploadOptions); +export const upload = async (paths: string[], baseOptions: BaseOptions, uploadOptions: UploadOptionsDto) => { + await authenticate(baseOptions); + + console.log('Crawling for assets...'); + + const inputFiles: string[] = []; + for (const pathArgument of paths) { + const fileStat = await lstat(pathArgument); + if (fileStat.isFile()) { + inputFiles.push(pathArgument); + } + } + + const { image, video } = await getSupportedMediaTypes(); + const files = await crawl({ + pathsToCrawl: paths, + recursive: uploadOptions.recursive, + exclusionPatterns: uploadOptions.exclusionPatterns, + includeHidden: uploadOptions.includeHidden, + extensions: [...image, ...video], + }); + + files.push(...inputFiles); + + if (files.length === 0) { + console.log('No assets found, exiting'); + return; + } + + return new UploadCommand().run(files, uploadOptions); +}; // TODO refactor this class UploadCommand { - public async run(paths: string[], baseOptions: BaseOptions, options: UploadOptionsDto): Promise { - await authenticate(baseOptions); - - console.log('Crawling for assets...'); - const files = await this.getFiles(paths, options); - - if (files.length === 0) { - console.log('No assets found, exiting'); - return; - } - + async run(files: string[], options: UploadOptionsDto): Promise { + const { concurrency, dryRun } = options; const assetsToCheck = files.map((path) => new Asset(path)); - const { newAssets, duplicateAssets } = await this.checkAssets(assetsToCheck, options.concurrency ?? 4); + const { newAssets, duplicateAssets } = await this.checkAssets(assetsToCheck, concurrency); const totalSizeUploaded = await this.upload(newAssets, options); - const messageStart = options.dryRun ? 'Would have' : 'Successfully'; + const messageStart = dryRun ? 'Would have' : 'Successfully'; if (newAssets.length === 0) { console.log('All assets were already uploaded, nothing to do.'); } else { @@ -189,7 +208,7 @@ class UploadCommand { return; } - if (options.dryRun) { + if (dryRun) { console.log(`Would now have deleted assets, but skipped due to dry run`); return; } @@ -199,7 +218,7 @@ class UploadCommand { await this.deleteAssets(newAssets, options); } - public async checkAssets( + async checkAssets( assetsToCheck: Asset[], concurrency: number, ): Promise<{ newAssets: Asset[]; duplicateAssets: Asset[]; rejectedAssets: Asset[] }> { @@ -237,7 +256,7 @@ class UploadCommand { return { newAssets, duplicateAssets, rejectedAssets }; } - public async upload(assetsToUpload: Asset[], options: UploadOptionsDto): Promise { + async upload(assetsToUpload: Asset[], { dryRun, concurrency }: UploadOptionsDto): Promise { let totalSize = 0; // Compute total size first @@ -245,7 +264,7 @@ class UploadCommand { totalSize += asset.fileSize ?? 0; } - if (options.dryRun) { + if (dryRun) { return totalSize; } @@ -260,7 +279,7 @@ class UploadCommand { let totalSizeUploaded = 0; try { - for (const assets of chunk(assetsToUpload, options.concurrency)) { + for (const assets of chunk(assetsToUpload, concurrency)) { const ids = await this.uploadAssets(assets); for (const [asset, id] of zipDefined(assets, ids)) { asset.id = id; @@ -279,42 +298,21 @@ class UploadCommand { return totalSizeUploaded; } - public async getFiles(paths: string[], options: UploadOptionsDto): Promise { - const inputFiles: string[] = []; - for (const pathArgument of paths) { - const fileStat = await fs.promises.lstat(pathArgument); - if (fileStat.isFile()) { - inputFiles.push(pathArgument); - } - } - - const files: string[] = await this.crawl(paths, options); - files.push(...inputFiles); - return files; - } - - public async getAlbums(): Promise> { - const existingAlbums = await getAllAlbums({}); - - const albumMapping = new Map(); - for (const album of existingAlbums) { - albumMapping.set(album.albumName, album.id); - } - - return albumMapping; - } - - public async updateAlbums( + async updateAlbums( assets: Asset[], options: UploadOptionsDto, ): Promise<{ createdAlbumCount: number; updatedAssetCount: number }> { + const { dryRun, concurrency } = options; + if (options.albumName) { for (const asset of assets) { asset.albumName = options.albumName; } } - const existingAlbums = await this.getAlbums(); + const albums = await getAllAlbums({}); + const existingAlbums = new Map(albums.map((album) => [album.albumName, album.id])); + const assetsToUpdate = assets.filter( (asset): asset is Asset & { albumName: string; id: string } => !!(asset.albumName && asset.id), ); @@ -328,7 +326,7 @@ class UploadCommand { const newAlbums = [...newAlbumsSet]; - if (options.dryRun) { + if (dryRun) { return { createdAlbumCount: newAlbums.length, updatedAssetCount: assetsToUpdate.length }; } @@ -341,7 +339,7 @@ class UploadCommand { albumCreationProgress.start(newAlbums.length, 0); try { - for (const albumNames of chunk(newAlbums, options.concurrency)) { + for (const albumNames of chunk(newAlbums, concurrency)) { const newAlbumIds = await Promise.all( albumNames.map((albumName: string) => createAlbum({ createAlbumDto: { albumName } }).then((r) => r.id)), ); @@ -377,7 +375,7 @@ class UploadCommand { try { for (const [albumId, assets] of albumToAssets.entries()) { - for (const assetBatch of chunk(assets, Math.min(1000 * (options.concurrency ?? 4), 65_000))) { + for (const assetBatch of chunk(assets, Math.min(1000 * concurrency, 65_000))) { await addAssetsToAlbum({ id: albumId, bulkIdsDto: { ids: assetBatch } }); albumUpdateProgress.increment(assetBatch.length); } @@ -389,7 +387,7 @@ class UploadCommand { return { createdAlbumCount: newAlbums.length, updatedAssetCount: assetsToUpdate.length }; } - public async deleteAssets(assets: Asset[], options: UploadOptionsDto): Promise { + async deleteAssets(assets: Asset[], options: UploadOptionsDto): Promise { const deletionProgress = new cliProgress.SingleBar( { format: 'Deleting local assets | {bar} | {percentage}% | ETA: {eta}s | {value}/{total} assets', @@ -444,18 +442,6 @@ class UploadCommand { return results.map((response) => response.id); } - private async crawl(paths: string[], options: UploadOptionsDto): Promise { - const formatResponse = await getSupportedMediaTypes(); - const crawlService = new CrawlService(formatResponse.image, formatResponse.video); - - return crawlService.crawl({ - pathsToCrawl: paths, - recursive: options.recursive, - exclusionPatterns: options.exclusionPatterns, - includeHidden: options.includeHidden, - }); - } - private async uploadAsset(data: FormData): Promise<{ id: string }> { const { baseUrl, headers } = defaults; diff --git a/cli/src/services/crawl.service.ts b/cli/src/services/crawl.service.ts deleted file mode 100644 index 3ad0fcf3b8..0000000000 --- a/cli/src/services/crawl.service.ts +++ /dev/null @@ -1,70 +0,0 @@ -import { glob } from 'glob'; -import * as fs from 'node:fs'; - -export class CrawlOptions { - pathsToCrawl!: string[]; - recursive? = false; - includeHidden? = false; - exclusionPatterns?: string[]; -} - -export class CrawlService { - private readonly extensions!: string[]; - - constructor(image: string[], video: string[]) { - this.extensions = [...image, ...video].map((extension) => extension.replace('.', '')); - } - - async crawl(options: CrawlOptions): Promise { - const { recursive, pathsToCrawl, exclusionPatterns, includeHidden } = options; - - if (!pathsToCrawl) { - return []; - } - - const patterns: string[] = []; - const crawledFiles: string[] = []; - - for await (const currentPath of pathsToCrawl) { - try { - const stats = await fs.promises.stat(currentPath); - if (stats.isFile() || stats.isSymbolicLink()) { - crawledFiles.push(currentPath); - } else { - patterns.push(currentPath); - } - } catch (error: any) { - if (error.code === 'ENOENT') { - patterns.push(currentPath); - } else { - throw error; - } - } - } - - let searchPattern: string; - if (patterns.length === 1) { - searchPattern = patterns[0]; - } else if (patterns.length === 0) { - return crawledFiles; - } else { - searchPattern = '{' + patterns.join(',') + '}'; - } - - if (recursive) { - searchPattern = searchPattern + '/**/'; - } - - searchPattern = `${searchPattern}/*.{${this.extensions.join(',')}}`; - - const globbedFiles = await glob(searchPattern, { - absolute: true, - nocase: true, - nodir: true, - dot: includeHidden, - ignore: exclusionPatterns, - }); - - return [...crawledFiles, ...globbedFiles].sort(); - } -} diff --git a/cli/src/services/crawl.service.spec.ts b/cli/src/utils.spec.ts similarity index 94% rename from cli/src/services/crawl.service.spec.ts rename to cli/src/utils.spec.ts index 93879f21e0..9c25702799 100644 --- a/cli/src/services/crawl.service.spec.ts +++ b/cli/src/utils.spec.ts @@ -1,14 +1,31 @@ import mockfs from 'mock-fs'; -import { CrawlOptions, CrawlService } from './crawl.service'; +import { CrawlOptions, crawl } from 'src/utils'; interface Test { test: string; - options: CrawlOptions; + options: Omit; files: Record; } const cwd = process.cwd(); +const extensions = [ + '.jpg', + '.jpeg', + '.png', + '.heif', + '.heic', + '.tif', + '.nef', + '.webp', + '.tiff', + '.dng', + '.gif', + '.mov', + '.mp4', + '.webm', +]; + const tests: Test[] = [ { test: 'should return empty when crawling an empty path list', @@ -251,12 +268,7 @@ const tests: Test[] = [ }, ]; -describe(CrawlService.name, () => { - const sut = new CrawlService( - ['.jpg', '.jpeg', '.png', '.heif', '.heic', '.tif', '.nef', '.webp', '.tiff', '.dng', '.gif'], - ['.mov', '.mp4', '.webm'], - ); - +describe('crawl', () => { afterEach(() => { mockfs.restore(); }); @@ -266,7 +278,7 @@ describe(CrawlService.name, () => { it(test, async () => { mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, '']))); - const actual = await sut.crawl(options); + const actual = await crawl({ ...options, extensions }); const expected = Object.entries(files) .filter((entry) => entry[1]) .map(([file]) => file); diff --git a/cli/src/utils.ts b/cli/src/utils.ts index f99a0e66a8..5afa74acfd 100644 --- a/cli/src/utils.ts +++ b/cli/src/utils.ts @@ -1,5 +1,6 @@ import { defaults, getMyUserInfo, isHttpError } from '@immich/sdk'; -import { readFile, writeFile } from 'node:fs/promises'; +import { glob } from 'glob'; +import { readFile, stat, writeFile } from 'node:fs/promises'; import { join } from 'node:path'; import yaml from 'yaml'; @@ -87,3 +88,64 @@ export const withError = async (promise: Promise): Promise<[Error, undefin return [error, undefined]; } }; + +export interface CrawlOptions { + pathsToCrawl: string[]; + recursive?: boolean; + includeHidden?: boolean; + exclusionPatterns?: string[]; + extensions: string[]; +} +export const crawl = async (options: CrawlOptions): Promise => { + const { extensions: extensionsWithPeriod, recursive, pathsToCrawl, exclusionPatterns, includeHidden } = options; + const extensions = extensionsWithPeriod.map((extension) => extension.replace('.', '')); + + if (!pathsToCrawl) { + return []; + } + + const patterns: string[] = []; + const crawledFiles: string[] = []; + + for await (const currentPath of pathsToCrawl) { + try { + const stats = await stat(currentPath); + if (stats.isFile() || stats.isSymbolicLink()) { + crawledFiles.push(currentPath); + } else { + patterns.push(currentPath); + } + } catch (error: any) { + if (error.code === 'ENOENT') { + patterns.push(currentPath); + } else { + throw error; + } + } + } + + let searchPattern: string; + if (patterns.length === 1) { + searchPattern = patterns[0]; + } else if (patterns.length === 0) { + return crawledFiles; + } else { + searchPattern = '{' + patterns.join(',') + '}'; + } + + if (recursive) { + searchPattern = searchPattern + '/**/'; + } + + searchPattern = `${searchPattern}/*.{${extensions.join(',')}}`; + + const globbedFiles = await glob(searchPattern, { + absolute: true, + nocase: true, + nodir: true, + dot: includeHidden, + ignore: exclusionPatterns, + }); + + return [...crawledFiles, ...globbedFiles].sort(); +};