1
0
mirror of https://github.com/immich-app/immich.git synced 2025-08-10 23:22:22 +02:00

feat(server): near-duplicate detection (#8228)

* duplicate detection job, entity, config

* queueing

* job panel, update api

* use embedding in db instead of fetching

* disable concurrency

* only queue visible assets

* handle multiple duplicateIds

* update concurrent queue check

* add provider

* add web placeholder, server endpoint, migration, various fixes

* update sql

* select embedding by default

* rename variable

* simplify

* remove separate entity, handle re-running with different threshold, set default back to 0.02

* fix tests

* add tests

* add index to entity

* formatting

* update asset mock

* fix `upsertJobStatus` signature

* update sql

* formatting

* default to 0.03

* optimize clustering

* use asset's `duplicateId` if present

* update sql

* update tests

* expose admin setting

* refactor

* formatting

* skip if ml is disabled

* debug trash e2e

* remove from web

* remove from sidebar

* test if ml is disabled

* update sql

* separate duplicate detection from clip in config, disable by default for now

* fix doc

* lower minimum `maxDistance`

* update api

* Add and Use Duplicate Detection Feature Flag (#9364)

* Add Duplicate Detection Flag

* Use Duplicate Detection Flag

* Attempt Fixes for Failing Checks

* lower minimum `maxDistance`

* fix tests

---------

Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>

* chore: fixes and additions after rebase

* chore: update api (remove new Role enum)

* fix: left join smart search so getAll works without machine learning

* test: trash e2e go back to checking length of assets is zero

* chore: regen api after rebase

* test: fix tests after rebase

* redundant join

---------

Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com>
Co-authored-by: Zack Pollard <zackpollard@ymail.com>
Co-authored-by: Zack Pollard <zack@futo.org>
This commit is contained in:
Mert
2024-05-16 13:08:37 -04:00
committed by GitHub
parent 673e97e71d
commit 64636c0618
61 changed files with 1254 additions and 61 deletions

View File

@@ -50,6 +50,7 @@ export const assetStub = {
isExternal: false,
libraryId: 'library-id',
library: libraryStub.uploadLibrary1,
duplicateId: null,
}),
noWebpPath: Object.freeze<AssetEntity>({
@@ -89,6 +90,7 @@ export const assetStub = {
fileSizeInByte: 123_000,
} as ExifEntity,
deletedAt: null,
duplicateId: null,
}),
noThumbhash: Object.freeze<AssetEntity>({
@@ -125,6 +127,7 @@ export const assetStub = {
faces: [],
sidecarPath: null,
deletedAt: null,
duplicateId: null,
}),
primaryImage: Object.freeze<AssetEntity>({
@@ -171,6 +174,7 @@ export const assetStub = {
{ id: 'stack-child-asset-1' } as AssetEntity,
{ id: 'stack-child-asset-2' } as AssetEntity,
]),
duplicateId: null,
}),
image: Object.freeze<AssetEntity>({
@@ -212,6 +216,7 @@ export const assetStub = {
exifImageHeight: 3840,
exifImageWidth: 2160,
} as ExifEntity,
duplicateId: null,
}),
external: Object.freeze<AssetEntity>({
@@ -251,6 +256,7 @@ export const assetStub = {
exifInfo: {
fileSizeInByte: 5000,
} as ExifEntity,
duplicateId: null,
}),
offline: Object.freeze<AssetEntity>({
@@ -290,6 +296,7 @@ export const assetStub = {
fileSizeInByte: 5000,
} as ExifEntity,
deletedAt: null,
duplicateId: null,
}),
externalOffline: Object.freeze<AssetEntity>({
@@ -329,6 +336,7 @@ export const assetStub = {
fileSizeInByte: 5000,
} as ExifEntity,
deletedAt: null,
duplicateId: null,
}),
image1: Object.freeze<AssetEntity>({
@@ -368,6 +376,7 @@ export const assetStub = {
exifInfo: {
fileSizeInByte: 5000,
} as ExifEntity,
duplicateId: null,
}),
imageFrom2015: Object.freeze<AssetEntity>({
@@ -407,6 +416,7 @@ export const assetStub = {
fileSizeInByte: 5000,
} as ExifEntity,
deletedAt: null,
duplicateId: null,
}),
video: Object.freeze<AssetEntity>({
@@ -446,6 +456,7 @@ export const assetStub = {
fileSizeInByte: 100_000,
} as ExifEntity,
deletedAt: null,
duplicateId: null,
}),
livePhotoMotionAsset: Object.freeze({
@@ -541,6 +552,7 @@ export const assetStub = {
country: 'test-country',
} as ExifEntity,
deletedAt: null,
duplicateId: null,
}),
sidecar: Object.freeze<AssetEntity>({
id: 'asset-id',
@@ -576,6 +588,7 @@ export const assetStub = {
faces: [],
sidecarPath: '/original/path.ext.xmp',
deletedAt: null,
duplicateId: null,
}),
sidecarWithoutExt: Object.freeze<AssetEntity>({
id: 'asset-id',
@@ -611,6 +624,7 @@ export const assetStub = {
faces: [],
sidecarPath: '/original/path.xmp',
deletedAt: null,
duplicateId: null,
}),
readOnly: Object.freeze<AssetEntity>({
@@ -647,6 +661,7 @@ export const assetStub = {
faces: [],
sidecarPath: '/original/path.ext.xmp',
deletedAt: null,
duplicateId: null,
}),
hasEncodedVideo: Object.freeze<AssetEntity>({
@@ -686,6 +701,7 @@ export const assetStub = {
fileSizeInByte: 100_000,
} as ExifEntity,
deletedAt: null,
duplicateId: null,
}),
missingFileExtension: Object.freeze<AssetEntity>({
id: 'asset-id',
@@ -724,6 +740,7 @@ export const assetStub = {
exifInfo: {
fileSizeInByte: 5000,
} as ExifEntity,
duplicateId: null,
}),
hasFileExtension: Object.freeze<AssetEntity>({
id: 'asset-id',
@@ -762,6 +779,7 @@ export const assetStub = {
exifInfo: {
fileSizeInByte: 5000,
} as ExifEntity,
duplicateId: null,
}),
imageDng: Object.freeze<AssetEntity>({
id: 'asset-id',
@@ -802,5 +820,92 @@ export const assetStub = {
profileDescription: 'Adobe RGB',
bitsPerSample: 14,
} as ExifEntity,
duplicateId: null,
}),
hasEmbedding: Object.freeze<AssetEntity>({
id: 'asset-id-embedding',
deviceAssetId: 'device-asset-id',
fileModifiedAt: new Date('2023-02-23T05:06:29.716Z'),
fileCreatedAt: new Date('2023-02-23T05:06:29.716Z'),
owner: userStub.user1,
ownerId: 'user-id',
deviceId: 'device-id',
originalPath: '/original/path.jpg',
previewPath: '/uploads/user-id/thumbs/path.jpg',
checksum: Buffer.from('file hash', 'utf8'),
type: AssetType.IMAGE,
thumbnailPath: '/uploads/user-id/webp/path.ext',
thumbhash: Buffer.from('blablabla', 'base64'),
encodedVideoPath: null,
createdAt: new Date('2023-02-23T05:06:29.716Z'),
updatedAt: new Date('2023-02-23T05:06:29.716Z'),
localDateTime: new Date('2023-02-23T05:06:29.716Z'),
isFavorite: true,
isArchived: false,
duration: null,
isVisible: true,
isExternal: false,
livePhotoVideo: null,
livePhotoVideoId: null,
isOffline: false,
libraryId: 'library-id',
library: libraryStub.uploadLibrary1,
tags: [],
sharedLinks: [],
originalFileName: 'asset-id.jpg',
faces: [],
deletedAt: null,
sidecarPath: null,
exifInfo: {
fileSizeInByte: 5000,
} as ExifEntity,
duplicateId: null,
smartSearch: {
assetId: 'asset-id',
embedding: Array.from({ length: 512 }, Math.random),
},
}),
hasDupe: Object.freeze<AssetEntity>({
id: 'asset-id-dupe',
deviceAssetId: 'device-asset-id',
fileModifiedAt: new Date('2023-02-23T05:06:29.716Z'),
fileCreatedAt: new Date('2023-02-23T05:06:29.716Z'),
owner: userStub.user1,
ownerId: 'user-id',
deviceId: 'device-id',
originalPath: '/original/path.jpg',
previewPath: '/uploads/user-id/thumbs/path.jpg',
checksum: Buffer.from('file hash', 'utf8'),
type: AssetType.IMAGE,
thumbnailPath: '/uploads/user-id/webp/path.ext',
thumbhash: Buffer.from('blablabla', 'base64'),
encodedVideoPath: null,
createdAt: new Date('2023-02-23T05:06:29.716Z'),
updatedAt: new Date('2023-02-23T05:06:29.716Z'),
localDateTime: new Date('2023-02-23T05:06:29.716Z'),
isFavorite: true,
isArchived: false,
duration: null,
isVisible: true,
isExternal: false,
livePhotoVideo: null,
livePhotoVideoId: null,
isOffline: false,
libraryId: 'library-id',
library: libraryStub.uploadLibrary1,
tags: [],
sharedLinks: [],
originalFileName: 'asset-id.jpg',
faces: [],
deletedAt: null,
sidecarPath: null,
exifInfo: {
fileSizeInByte: 5000,
} as ExifEntity,
duplicateId: 'duplicate-id',
smartSearch: {
assetId: 'asset-id',
embedding: Array.from({ length: 512 }, Math.random),
},
}),
};