1
0
mirror of https://github.com/immich-app/immich.git synced 2025-01-11 06:10:28 +02:00

refactor(cli): crawl service (#8190)

This commit is contained in:
Jason Rasmussen 2024-03-22 10:30:24 -04:00 committed by GitHub
parent a56cf35d8c
commit db744f500b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 147 additions and 157 deletions

View File

@ -12,11 +12,10 @@ import cliProgress from 'cli-progress';
import { chunk, zip } from 'lodash-es';
import { createHash } from 'node:crypto';
import fs, { createReadStream } from 'node:fs';
import { access, constants, stat, unlink } from 'node:fs/promises';
import { access, constants, lstat, stat, unlink } from 'node:fs/promises';
import os from 'node:os';
import path, { basename } from 'node:path';
import { CrawlService } from 'src/services/crawl.service';
import { BaseOptions, authenticate } from 'src/utils';
import { BaseOptions, authenticate, crawl } from 'src/utils';
const zipDefined = zip as <T, U>(a: T[], b: U[]) => [T, U][];
@ -115,7 +114,7 @@ class Asset {
return unlink(this.path);
}
public async hash(): Promise<string> {
async hash(): Promise<string> {
const sha1 = (filePath: string) => {
const hash = createHash('sha1');
return new Promise<string>((resolve, reject) => {
@ -134,40 +133,60 @@ class Asset {
}
}
class UploadOptionsDto {
recursive? = false;
exclusionPatterns?: string[] = [];
dryRun? = false;
skipHash? = false;
delete? = false;
album? = false;
albumName? = '';
includeHidden? = false;
concurrency? = 4;
interface UploadOptionsDto {
recursive?: boolean;
exclusionPatterns?: string[];
dryRun?: boolean;
skipHash?: boolean;
delete?: boolean;
album?: boolean;
albumName?: string;
includeHidden?: boolean;
concurrency: number;
}
export const upload = (paths: string[], baseOptions: BaseOptions, uploadOptions: UploadOptionsDto) =>
new UploadCommand().run(paths, baseOptions, uploadOptions);
export const upload = async (paths: string[], baseOptions: BaseOptions, uploadOptions: UploadOptionsDto) => {
await authenticate(baseOptions);
console.log('Crawling for assets...');
const inputFiles: string[] = [];
for (const pathArgument of paths) {
const fileStat = await lstat(pathArgument);
if (fileStat.isFile()) {
inputFiles.push(pathArgument);
}
}
const { image, video } = await getSupportedMediaTypes();
const files = await crawl({
pathsToCrawl: paths,
recursive: uploadOptions.recursive,
exclusionPatterns: uploadOptions.exclusionPatterns,
includeHidden: uploadOptions.includeHidden,
extensions: [...image, ...video],
});
files.push(...inputFiles);
if (files.length === 0) {
console.log('No assets found, exiting');
return;
}
return new UploadCommand().run(files, uploadOptions);
};
// TODO refactor this
class UploadCommand {
public async run(paths: string[], baseOptions: BaseOptions, options: UploadOptionsDto): Promise<void> {
await authenticate(baseOptions);
console.log('Crawling for assets...');
const files = await this.getFiles(paths, options);
if (files.length === 0) {
console.log('No assets found, exiting');
return;
}
async run(files: string[], options: UploadOptionsDto): Promise<void> {
const { concurrency, dryRun } = options;
const assetsToCheck = files.map((path) => new Asset(path));
const { newAssets, duplicateAssets } = await this.checkAssets(assetsToCheck, options.concurrency ?? 4);
const { newAssets, duplicateAssets } = await this.checkAssets(assetsToCheck, concurrency);
const totalSizeUploaded = await this.upload(newAssets, options);
const messageStart = options.dryRun ? 'Would have' : 'Successfully';
const messageStart = dryRun ? 'Would have' : 'Successfully';
if (newAssets.length === 0) {
console.log('All assets were already uploaded, nothing to do.');
} else {
@ -189,7 +208,7 @@ class UploadCommand {
return;
}
if (options.dryRun) {
if (dryRun) {
console.log(`Would now have deleted assets, but skipped due to dry run`);
return;
}
@ -199,7 +218,7 @@ class UploadCommand {
await this.deleteAssets(newAssets, options);
}
public async checkAssets(
async checkAssets(
assetsToCheck: Asset[],
concurrency: number,
): Promise<{ newAssets: Asset[]; duplicateAssets: Asset[]; rejectedAssets: Asset[] }> {
@ -237,7 +256,7 @@ class UploadCommand {
return { newAssets, duplicateAssets, rejectedAssets };
}
public async upload(assetsToUpload: Asset[], options: UploadOptionsDto): Promise<number> {
async upload(assetsToUpload: Asset[], { dryRun, concurrency }: UploadOptionsDto): Promise<number> {
let totalSize = 0;
// Compute total size first
@ -245,7 +264,7 @@ class UploadCommand {
totalSize += asset.fileSize ?? 0;
}
if (options.dryRun) {
if (dryRun) {
return totalSize;
}
@ -260,7 +279,7 @@ class UploadCommand {
let totalSizeUploaded = 0;
try {
for (const assets of chunk(assetsToUpload, options.concurrency)) {
for (const assets of chunk(assetsToUpload, concurrency)) {
const ids = await this.uploadAssets(assets);
for (const [asset, id] of zipDefined(assets, ids)) {
asset.id = id;
@ -279,42 +298,21 @@ class UploadCommand {
return totalSizeUploaded;
}
public async getFiles(paths: string[], options: UploadOptionsDto): Promise<string[]> {
const inputFiles: string[] = [];
for (const pathArgument of paths) {
const fileStat = await fs.promises.lstat(pathArgument);
if (fileStat.isFile()) {
inputFiles.push(pathArgument);
}
}
const files: string[] = await this.crawl(paths, options);
files.push(...inputFiles);
return files;
}
public async getAlbums(): Promise<Map<string, string>> {
const existingAlbums = await getAllAlbums({});
const albumMapping = new Map<string, string>();
for (const album of existingAlbums) {
albumMapping.set(album.albumName, album.id);
}
return albumMapping;
}
public async updateAlbums(
async updateAlbums(
assets: Asset[],
options: UploadOptionsDto,
): Promise<{ createdAlbumCount: number; updatedAssetCount: number }> {
const { dryRun, concurrency } = options;
if (options.albumName) {
for (const asset of assets) {
asset.albumName = options.albumName;
}
}
const existingAlbums = await this.getAlbums();
const albums = await getAllAlbums({});
const existingAlbums = new Map(albums.map((album) => [album.albumName, album.id]));
const assetsToUpdate = assets.filter(
(asset): asset is Asset & { albumName: string; id: string } => !!(asset.albumName && asset.id),
);
@ -328,7 +326,7 @@ class UploadCommand {
const newAlbums = [...newAlbumsSet];
if (options.dryRun) {
if (dryRun) {
return { createdAlbumCount: newAlbums.length, updatedAssetCount: assetsToUpdate.length };
}
@ -341,7 +339,7 @@ class UploadCommand {
albumCreationProgress.start(newAlbums.length, 0);
try {
for (const albumNames of chunk(newAlbums, options.concurrency)) {
for (const albumNames of chunk(newAlbums, concurrency)) {
const newAlbumIds = await Promise.all(
albumNames.map((albumName: string) => createAlbum({ createAlbumDto: { albumName } }).then((r) => r.id)),
);
@ -377,7 +375,7 @@ class UploadCommand {
try {
for (const [albumId, assets] of albumToAssets.entries()) {
for (const assetBatch of chunk(assets, Math.min(1000 * (options.concurrency ?? 4), 65_000))) {
for (const assetBatch of chunk(assets, Math.min(1000 * concurrency, 65_000))) {
await addAssetsToAlbum({ id: albumId, bulkIdsDto: { ids: assetBatch } });
albumUpdateProgress.increment(assetBatch.length);
}
@ -389,7 +387,7 @@ class UploadCommand {
return { createdAlbumCount: newAlbums.length, updatedAssetCount: assetsToUpdate.length };
}
public async deleteAssets(assets: Asset[], options: UploadOptionsDto): Promise<void> {
async deleteAssets(assets: Asset[], options: UploadOptionsDto): Promise<void> {
const deletionProgress = new cliProgress.SingleBar(
{
format: 'Deleting local assets | {bar} | {percentage}% | ETA: {eta}s | {value}/{total} assets',
@ -444,18 +442,6 @@ class UploadCommand {
return results.map((response) => response.id);
}
private async crawl(paths: string[], options: UploadOptionsDto): Promise<string[]> {
const formatResponse = await getSupportedMediaTypes();
const crawlService = new CrawlService(formatResponse.image, formatResponse.video);
return crawlService.crawl({
pathsToCrawl: paths,
recursive: options.recursive,
exclusionPatterns: options.exclusionPatterns,
includeHidden: options.includeHidden,
});
}
private async uploadAsset(data: FormData): Promise<{ id: string }> {
const { baseUrl, headers } = defaults;

View File

@ -1,70 +0,0 @@
import { glob } from 'glob';
import * as fs from 'node:fs';
export class CrawlOptions {
pathsToCrawl!: string[];
recursive? = false;
includeHidden? = false;
exclusionPatterns?: string[];
}
export class CrawlService {
private readonly extensions!: string[];
constructor(image: string[], video: string[]) {
this.extensions = [...image, ...video].map((extension) => extension.replace('.', ''));
}
async crawl(options: CrawlOptions): Promise<string[]> {
const { recursive, pathsToCrawl, exclusionPatterns, includeHidden } = options;
if (!pathsToCrawl) {
return [];
}
const patterns: string[] = [];
const crawledFiles: string[] = [];
for await (const currentPath of pathsToCrawl) {
try {
const stats = await fs.promises.stat(currentPath);
if (stats.isFile() || stats.isSymbolicLink()) {
crawledFiles.push(currentPath);
} else {
patterns.push(currentPath);
}
} catch (error: any) {
if (error.code === 'ENOENT') {
patterns.push(currentPath);
} else {
throw error;
}
}
}
let searchPattern: string;
if (patterns.length === 1) {
searchPattern = patterns[0];
} else if (patterns.length === 0) {
return crawledFiles;
} else {
searchPattern = '{' + patterns.join(',') + '}';
}
if (recursive) {
searchPattern = searchPattern + '/**/';
}
searchPattern = `${searchPattern}/*.{${this.extensions.join(',')}}`;
const globbedFiles = await glob(searchPattern, {
absolute: true,
nocase: true,
nodir: true,
dot: includeHidden,
ignore: exclusionPatterns,
});
return [...crawledFiles, ...globbedFiles].sort();
}
}

View File

@ -1,14 +1,31 @@
import mockfs from 'mock-fs';
import { CrawlOptions, CrawlService } from './crawl.service';
import { CrawlOptions, crawl } from 'src/utils';
interface Test {
test: string;
options: CrawlOptions;
options: Omit<CrawlOptions, 'extensions'>;
files: Record<string, boolean>;
}
const cwd = process.cwd();
const extensions = [
'.jpg',
'.jpeg',
'.png',
'.heif',
'.heic',
'.tif',
'.nef',
'.webp',
'.tiff',
'.dng',
'.gif',
'.mov',
'.mp4',
'.webm',
];
const tests: Test[] = [
{
test: 'should return empty when crawling an empty path list',
@ -251,12 +268,7 @@ const tests: Test[] = [
},
];
describe(CrawlService.name, () => {
const sut = new CrawlService(
['.jpg', '.jpeg', '.png', '.heif', '.heic', '.tif', '.nef', '.webp', '.tiff', '.dng', '.gif'],
['.mov', '.mp4', '.webm'],
);
describe('crawl', () => {
afterEach(() => {
mockfs.restore();
});
@ -266,7 +278,7 @@ describe(CrawlService.name, () => {
it(test, async () => {
mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, ''])));
const actual = await sut.crawl(options);
const actual = await crawl({ ...options, extensions });
const expected = Object.entries(files)
.filter((entry) => entry[1])
.map(([file]) => file);

View File

@ -1,5 +1,6 @@
import { defaults, getMyUserInfo, isHttpError } from '@immich/sdk';
import { readFile, writeFile } from 'node:fs/promises';
import { glob } from 'glob';
import { readFile, stat, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import yaml from 'yaml';
@ -87,3 +88,64 @@ export const withError = async <T>(promise: Promise<T>): Promise<[Error, undefin
return [error, undefined];
}
};
export interface CrawlOptions {
pathsToCrawl: string[];
recursive?: boolean;
includeHidden?: boolean;
exclusionPatterns?: string[];
extensions: string[];
}
export const crawl = async (options: CrawlOptions): Promise<string[]> => {
const { extensions: extensionsWithPeriod, recursive, pathsToCrawl, exclusionPatterns, includeHidden } = options;
const extensions = extensionsWithPeriod.map((extension) => extension.replace('.', ''));
if (!pathsToCrawl) {
return [];
}
const patterns: string[] = [];
const crawledFiles: string[] = [];
for await (const currentPath of pathsToCrawl) {
try {
const stats = await stat(currentPath);
if (stats.isFile() || stats.isSymbolicLink()) {
crawledFiles.push(currentPath);
} else {
patterns.push(currentPath);
}
} catch (error: any) {
if (error.code === 'ENOENT') {
patterns.push(currentPath);
} else {
throw error;
}
}
}
let searchPattern: string;
if (patterns.length === 1) {
searchPattern = patterns[0];
} else if (patterns.length === 0) {
return crawledFiles;
} else {
searchPattern = '{' + patterns.join(',') + '}';
}
if (recursive) {
searchPattern = searchPattern + '/**/';
}
searchPattern = `${searchPattern}/*.{${extensions.join(',')}}`;
const globbedFiles = await glob(searchPattern, {
absolute: true,
nocase: true,
nodir: true,
dot: includeHidden,
ignore: exclusionPatterns,
});
return [...crawledFiles, ...globbedFiles].sort();
};