1
0
mirror of https://github.com/immich-app/immich.git synced 2024-12-24 10:37:28 +02:00

feat(server): Fix exif data parsing (#1326)

* Trying to get exifdata working with different lib.

* Got the new library working.

* Addressing PR comments.

* Removed not used vars and proper place for the eslint disable.

* Fix time-utils to use the exiftool-vendored lib.

Fixed also one test, as that would be valid.

* Using filename for timestamp as well if possible.

* Add new tests for time-utils.

* Remember to gracefully terminate the exiftool instance when not needed.

* eslint ignore...

* Apperantly Dockerfile changes were not pushed.

* feat(dockerfile): Tweak the Server Dockerfile

* feat(server): getTimestampFromFilename should return string or undefined.

* feat(server): If we don't have exifData or timestamp from filename, raise an error.

* Apparently test was already right, but my local system disagrees.

* More utilities for parsing and fix the timestampFromFilename.

It was returning an incorrect date as the regex doesn't seem to be the best for this as files named `IMG_0115.HEIC` will want to get parsed incorrectly due to it.

* feat(server/docker): Install perl as it seems to be required.

* feat(server): remember to include exposureTime and focalLength in new exif data.

* feat(server): Remove the parsing from filename as requested.

* feat(server): Import exiftool differently in time-utils.

* feat(server): Error handling when there is no exifData.

* feat(server): Fixes for the error handling when there is no exifData.

* feat(server): Remember to include modifyDate despite no exif.

* feat(server): Remember to include model of Camera.

* feat(server): Fixing up Exiftool usage.

Including proper logging for it, which had to be done in wrapped fashion due to it expecting all the logging levels which NextJS logger doesn't implement.

* feat(server): Do not use a wrapper for ExifTool logging.

* fix merge conflicts in metadata-extractor
This commit is contained in:
Skyler Mäntysaari 2023-01-17 17:29:49 +02:00 committed by GitHub
parent 693adf8488
commit dff10e89fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 164 additions and 111 deletions

View File

@ -2,7 +2,7 @@ FROM node:16-alpine3.14 as builder
WORKDIR /usr/src/app
RUN apk add --update-cache build-base python3 libheif vips-dev ffmpeg
RUN apk add --update-cache build-base python3 libheif vips-dev ffmpeg exiftool perl
COPY package.json package-lock.json ./
@ -21,7 +21,7 @@ FROM node:16-alpine3.14
WORKDIR /usr/src/app
RUN apk add --no-cache libheif vips ffmpeg
RUN apk add --no-cache libheif vips ffmpeg exiftool perl
COPY --from=prod /usr/src/app/node_modules ./node_modules
COPY --from=prod /usr/src/app/dist ./dist

View File

@ -1,8 +1,8 @@
import { AssetEntity, ExifEntity } from '@app/infra';
import {
IExifExtractionProcessor,
IVideoLengthExtractionProcessor,
IReverseGeocodingProcessor,
IVideoLengthExtractionProcessor,
QueueName,
JobName,
} from '@app/job';
@ -11,16 +11,15 @@ import { Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { InjectRepository } from '@nestjs/typeorm';
import { Job } from 'bull';
import exifr from 'exifr';
import ffmpeg from 'fluent-ffmpeg';
import path from 'path';
import sharp from 'sharp';
import { Repository } from 'typeorm/repository/Repository';
import geocoder, { InitOptions } from 'local-reverse-geocoder';
import { getName } from 'i18n-iso-countries';
import { find } from 'geo-tz';
import * as luxon from 'luxon';
import fs from 'node:fs';
import { ExifDateTime, ExifTool } from 'exiftool-vendored';
import { timeUtils } from '@app/common';
function geocoderInit(init: InitOptions) {
return new Promise<void>(function (resolve) {
@ -75,7 +74,6 @@ export type GeoData = {
export class MetadataExtractionProcessor {
private logger = new Logger(MetadataExtractionProcessor.name);
private isGeocodeInitialized = false;
constructor(
@InjectRepository(AssetEntity)
private assetRepository: Repository<AssetEntity>,
@ -102,7 +100,7 @@ export class MetadataExtractionProcessor {
configService.get('REVERSE_GEOCODING_DUMP_DIRECTORY') || process.cwd() + '/.reverse-geocoding-dump/',
}).then(() => {
this.isGeocodeInitialized = true;
Logger.log('Reverse Geocoding Initialised');
this.logger.log('Reverse Geocoding Initialised');
});
}
}
@ -142,84 +140,48 @@ export class MetadataExtractionProcessor {
async extractExifInfo(job: Job<IExifExtractionProcessor>) {
try {
const { asset, fileName }: { asset: AssetEntity; fileName: string } = job.data;
const exifData = await exifr.parse(asset.originalPath, {
tiff: true,
ifd0: true as any,
ifd1: true,
exif: true,
gps: true,
interop: true,
xmp: true,
icc: true,
iptc: true,
jfif: true,
ihdr: true,
const exiftool = new ExifTool();
const exifData = await exiftool.read(asset.originalPath).catch((e) => {
this.logger.warn(`The exifData parsing failed due to: ${e} on file ${asset.originalPath}`);
});
if (!exifData) {
throw new Error(`can not parse exif data from file ${asset.originalPath}`);
const exifToDate = (exifDate: string | ExifDateTime | undefined) =>
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
exifDate ? new Date(exifDate.toString()!) : null;
let createdAt = exifToDate(asset.createdAt);
const newExif = new ExifEntity();
if (exifData) {
createdAt = exifToDate(exifData.DateTimeOriginal ?? exifData.CreateDate ?? asset.createdAt);
const modifyDate = exifToDate(exifData.ModifyDate);
newExif.make = exifData['Make'] || null;
newExif.model = exifData['Model'] || null;
newExif.exifImageHeight = exifData['ExifImageHeight'] || exifData['ImageHeight'] || null;
newExif.exifImageWidth = exifData['ExifImageWidth'] || exifData['ImageWidth'] || null;
newExif.exposureTime = (await timeUtils.parseStringToNumber(exifData['ExposureTime'])) || null;
newExif.orientation = exifData['Orientation']?.toString() || null;
newExif.dateTimeOriginal = createdAt;
newExif.modifyDate = modifyDate || null;
newExif.lensModel = exifData['LensModel'] || null;
newExif.fNumber = exifData['FNumber'] || null;
newExif.focalLength = (await timeUtils.parseStringToNumber(exifData['FocalLength'])) || null;
newExif.iso = exifData['ISO'] || null;
newExif.latitude = exifData['GPSLatitude'] || null;
newExif.longitude = exifData['GPSLongitude'] || null;
} else {
newExif.dateTimeOriginal = createdAt;
newExif.modifyDate = exifToDate(asset.modifiedAt);
}
const createdAt = new Date(exifData.DateTimeOriginal || exifData.CreateDate || new Date(asset.createdAt));
const fileStats = fs.statSync(asset.originalPath);
const fileSizeInBytes = fileStats.size;
const newExif = new ExifEntity();
newExif.assetId = asset.id;
newExif.make = exifData['Make'] || null;
newExif.model = exifData['Model'] || null;
newExif.imageName = path.parse(fileName).name || null;
newExif.exifImageHeight = exifData['ExifImageHeight'] || exifData['ImageHeight'] || null;
newExif.exifImageWidth = exifData['ExifImageWidth'] || exifData['ImageWidth'] || null;
newExif.fileSizeInByte = fileSizeInBytes || null;
newExif.orientation = exifData['Orientation'] || null;
newExif.dateTimeOriginal = createdAt;
newExif.modifyDate = exifData['ModifyDate'] || null;
newExif.lensModel = exifData['LensModel'] || null;
newExif.fNumber = exifData['FNumber'] || null;
newExif.focalLength = exifData['FocalLength'] || null;
newExif.iso = exifData['ISO'] || null;
newExif.exposureTime = exifData['ExposureTime'] || null;
newExif.latitude = exifData['latitude'] || null;
newExif.longitude = exifData['longitude'] || null;
/**
* Correctly store UTC time based on timezone
* The timestamp being extracted from EXIF is based on the timezone
* of the container. We need to correct it to UTC time based on the
* timezone of the location.
*
* The timezone of the location can be exracted from the lat/lon
* GPS coordinates.
*
* Any assets that doesn't have this information will used the
* createdAt timestamp of the asset instead.
*
* The updated/corrected timestamp will be used to update the
* createdAt timestamp in the asset table. So that the information
* is consistent across the database.
* */
if (newExif.longitude && newExif.latitude) {
const tz = find(newExif.latitude, newExif.longitude)[0];
const localTimeWithTimezone = createdAt.toISOString();
if (localTimeWithTimezone.length == 24) {
// Remove the last character
const localTimeWithoutTimezone = localTimeWithTimezone.slice(0, -1);
const correctUTCTime = luxon.DateTime.fromISO(localTimeWithoutTimezone, { zone: tz }).toUTC().toISO();
newExif.dateTimeOriginal = new Date(correctUTCTime);
await this.assetRepository.save({
id: asset.id,
createdAt: correctUTCTime,
});
}
} else {
await this.assetRepository.save({
id: asset.id,
createdAt: createdAt.toISOString(),
});
}
await this.assetRepository.save({
id: asset.id,
createdAt: createdAt?.toISOString(),
});
/**
* Reverse Geocoding
@ -255,6 +217,7 @@ export class MetadataExtractionProcessor {
}
await this.exifRepository.save(newExif);
await exiftool.end();
} catch (error: any) {
this.logger.error(`Error extracting EXIF ${error}`, error?.stack);
}

View File

@ -1,6 +1,12 @@
import exifr from 'exifr';
// This is needed as resolving for the vendored
// exiftool fails in tests otherwise but as it's not meant to be a requirement
// of a project directly I had to include the line below the comment.
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
import { exiftool } from 'exiftool-vendored.pl';
function createTimeUtils() {
const floatRegex = /[+-]?([0-9]*[.])?[0-9]+/;
const checkValidTimestamp = (timestamp: string): boolean => {
const parsedTimestamp = Date.parse(timestamp);
@ -19,22 +25,12 @@ function createTimeUtils() {
const getTimestampFromExif = async (originalPath: string): Promise<string> => {
try {
const exifData = await exifr.parse(originalPath, {
tiff: true,
ifd0: true as any,
ifd1: true,
exif: true,
gps: true,
interop: true,
xmp: true,
icc: true,
iptc: true,
jfif: true,
ihdr: true,
});
const exifData = await exiftool.read(originalPath);
if (exifData && exifData['DateTimeOriginal']) {
return exifData['DateTimeOriginal'];
await exiftool.end();
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return exifData['DateTimeOriginal'].toString()!;
} else {
return new Date().toISOString();
}
@ -42,7 +38,17 @@ function createTimeUtils() {
return new Date().toISOString();
}
};
return { checkValidTimestamp, getTimestampFromExif };
const parseStringToNumber = async (original: string | undefined): Promise<number | null> => {
const match = original?.match(floatRegex)?.[0];
if (match) {
return parseFloat(match);
} else {
return null;
}
};
return { checkValidTimestamp, getTimestampFromExif, parseStringToNumber };
}
export const timeUtils = createTimeUtils();

118
server/package-lock.json generated
View File

@ -32,7 +32,7 @@
"cookie-parser": "^1.4.6",
"diskusage": "^1.1.3",
"dotenv": "^14.2.0",
"exifr": "^7.1.3",
"exiftool-vendored": "^19.0.0",
"fdir": "^5.3.0",
"fluent-ffmpeg": "^2.1.2",
"geo-tz": "^7.0.2",
@ -2237,6 +2237,11 @@
"integrity": "sha512-uZtkfKblCEQtZKBF6EBXVZeQNl82yqtDQdv+eck8u7tdPxjLu2/lp5/uPW+um2tpuxINHWy3GhiccY7QgEaVHQ==",
"dev": true
},
"node_modules/@photostructure/tz-lookup": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/@photostructure/tz-lookup/-/tz-lookup-7.0.0.tgz",
"integrity": "sha512-pTRsZz7Sn4yAtItC7I4+0segDHosMyOtJgAXg+xvDOolT0Xz4IFWqBV33OMCWoaNd3oQb60wbWhLeCQgJCyZAA=="
},
"node_modules/@redis/bloom": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@redis/bloom/-/bloom-1.1.0.tgz",
@ -2724,10 +2729,9 @@
"dev": true
},
"node_modules/@types/luxon": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-2.3.2.tgz",
"integrity": "sha512-WOehptuhKIXukSUUkRgGbj2c997Uv/iUgYgII8U7XLJqq9W2oF0kQ6frEznRQbdurioz+L/cdaIm4GutTQfgmA==",
"dev": true
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-3.2.0.tgz",
"integrity": "sha512-lGmaGFoaXHuOLXFvuju2bfvZRqxAqkHPx9Y9IQdQABrinJJshJwfNCKV+u7rR3kJbiqfTF/NhOkcxxAFrObyaA=="
},
"node_modules/@types/mime": {
"version": "1.3.2",
@ -3775,6 +3779,14 @@
"node": "^4.5.0 || >= 5.9"
}
},
"node_modules/batch-cluster": {
"version": "11.0.0",
"resolved": "https://registry.npmjs.org/batch-cluster/-/batch-cluster-11.0.0.tgz",
"integrity": "sha512-8iwqa+rKTaakOHkqdcXDT5L5117pa+FoP8/yAKpNdL44ZnC4V2NEA/sIg0ZO0O9NkpdjLk0A3efRFM5nVizqHw==",
"engines": {
"node": ">=14"
}
},
"node_modules/bcrypt": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/bcrypt/-/bcrypt-5.0.1.tgz",
@ -5629,10 +5641,39 @@
"url": "https://github.com/sindresorhus/execa?sponsor=1"
}
},
"node_modules/exifr": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/exifr/-/exifr-7.1.3.tgz",
"integrity": "sha512-g/aje2noHivrRSLbAUtBPWFbxKdKhgj/xr1vATDdUXPOFYJlQ62Ft0oy+72V6XLIpDJfHs6gXLbBLAolqOXYRw=="
"node_modules/exiftool-vendored": {
"version": "19.0.0",
"resolved": "https://registry.npmjs.org/exiftool-vendored/-/exiftool-vendored-19.0.0.tgz",
"integrity": "sha512-Zes7TZrYWxts92mbF2Gs3drtWZucm4qsaeYaE6A+OOqmeD9UGaGisqIbyh9MilJrLi+ZHzWEJZtDj37QFf6xsA==",
"dependencies": {
"@photostructure/tz-lookup": "^7.0.0",
"@types/luxon": "^3.2.0",
"batch-cluster": "^11.0.0",
"he": "^1.2.0",
"luxon": "^3.2.1"
},
"optionalDependencies": {
"exiftool-vendored.exe": "12.54.0",
"exiftool-vendored.pl": "12.54.0"
}
},
"node_modules/exiftool-vendored.exe": {
"version": "12.54.0",
"resolved": "https://registry.npmjs.org/exiftool-vendored.exe/-/exiftool-vendored.exe-12.54.0.tgz",
"integrity": "sha512-Dc4W6e0NtQfYuJIYK4piHfDJnd2jvA04e0aaq9R3Q1oO34KC5e+L1D2C7lFuZXqPQLYC1x3GYc/GVv5e+SkkrQ==",
"optional": true,
"os": [
"win32"
]
},
"node_modules/exiftool-vendored.pl": {
"version": "12.54.0",
"resolved": "https://registry.npmjs.org/exiftool-vendored.pl/-/exiftool-vendored.pl-12.54.0.tgz",
"integrity": "sha512-RBBowsYcM6EvbWoBkg2dOqHpH3WIzN7bIzHc+o+LquqCTo3doZwECClD/6PNHVSMQsl2Z0fEf75sNq2msooMSg==",
"optional": true,
"os": [
"!win32"
]
},
"node_modules/exit": {
"version": "0.1.2",
@ -6481,6 +6522,14 @@
"resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
"integrity": "sha1-4Ob+aijPUROIVeCG0Wkedx3iqLk="
},
"node_modules/he": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
"integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
"bin": {
"he": "bin/he"
}
},
"node_modules/hexoid": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/hexoid/-/hexoid-1.0.0.tgz",
@ -13218,6 +13267,11 @@
}
}
},
"@photostructure/tz-lookup": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/@photostructure/tz-lookup/-/tz-lookup-7.0.0.tgz",
"integrity": "sha512-pTRsZz7Sn4yAtItC7I4+0segDHosMyOtJgAXg+xvDOolT0Xz4IFWqBV33OMCWoaNd3oQb60wbWhLeCQgJCyZAA=="
},
"@redis/bloom": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@redis/bloom/-/bloom-1.1.0.tgz",
@ -13676,10 +13730,9 @@
"dev": true
},
"@types/luxon": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-2.3.2.tgz",
"integrity": "sha512-WOehptuhKIXukSUUkRgGbj2c997Uv/iUgYgII8U7XLJqq9W2oF0kQ6frEznRQbdurioz+L/cdaIm4GutTQfgmA==",
"dev": true
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/@types/luxon/-/luxon-3.2.0.tgz",
"integrity": "sha512-lGmaGFoaXHuOLXFvuju2bfvZRqxAqkHPx9Y9IQdQABrinJJshJwfNCKV+u7rR3kJbiqfTF/NhOkcxxAFrObyaA=="
},
"@types/mime": {
"version": "1.3.2",
@ -14528,6 +14581,11 @@
"resolved": "https://registry.npmjs.org/base64id/-/base64id-2.0.0.tgz",
"integrity": "sha512-lGe34o6EHj9y3Kts9R4ZYs/Gr+6N7MCaMlIFA3F1R2O5/m7K06AxfSeO5530PEERE6/WyEg3lsuyw4GHlPZHog=="
},
"batch-cluster": {
"version": "11.0.0",
"resolved": "https://registry.npmjs.org/batch-cluster/-/batch-cluster-11.0.0.tgz",
"integrity": "sha512-8iwqa+rKTaakOHkqdcXDT5L5117pa+FoP8/yAKpNdL44ZnC4V2NEA/sIg0ZO0O9NkpdjLk0A3efRFM5nVizqHw=="
},
"bcrypt": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/bcrypt/-/bcrypt-5.0.1.tgz",
@ -15929,10 +15987,31 @@
"strip-final-newline": "^2.0.0"
}
},
"exifr": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/exifr/-/exifr-7.1.3.tgz",
"integrity": "sha512-g/aje2noHivrRSLbAUtBPWFbxKdKhgj/xr1vATDdUXPOFYJlQ62Ft0oy+72V6XLIpDJfHs6gXLbBLAolqOXYRw=="
"exiftool-vendored": {
"version": "19.0.0",
"resolved": "https://registry.npmjs.org/exiftool-vendored/-/exiftool-vendored-19.0.0.tgz",
"integrity": "sha512-Zes7TZrYWxts92mbF2Gs3drtWZucm4qsaeYaE6A+OOqmeD9UGaGisqIbyh9MilJrLi+ZHzWEJZtDj37QFf6xsA==",
"requires": {
"@photostructure/tz-lookup": "^7.0.0",
"@types/luxon": "^3.2.0",
"batch-cluster": "^11.0.0",
"exiftool-vendored.exe": "12.54.0",
"exiftool-vendored.pl": "12.54.0",
"he": "^1.2.0",
"luxon": "^3.2.1"
}
},
"exiftool-vendored.exe": {
"version": "12.54.0",
"resolved": "https://registry.npmjs.org/exiftool-vendored.exe/-/exiftool-vendored.exe-12.54.0.tgz",
"integrity": "sha512-Dc4W6e0NtQfYuJIYK4piHfDJnd2jvA04e0aaq9R3Q1oO34KC5e+L1D2C7lFuZXqPQLYC1x3GYc/GVv5e+SkkrQ==",
"optional": true
},
"exiftool-vendored.pl": {
"version": "12.54.0",
"resolved": "https://registry.npmjs.org/exiftool-vendored.pl/-/exiftool-vendored.pl-12.54.0.tgz",
"integrity": "sha512-RBBowsYcM6EvbWoBkg2dOqHpH3WIzN7bIzHc+o+LquqCTo3doZwECClD/6PNHVSMQsl2Z0fEf75sNq2msooMSg==",
"optional": true
},
"exit": {
"version": "0.1.2",
@ -16577,6 +16656,11 @@
"resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
"integrity": "sha1-4Ob+aijPUROIVeCG0Wkedx3iqLk="
},
"he": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
"integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw=="
},
"hexoid": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/hexoid/-/hexoid-1.0.0.tgz",

View File

@ -57,7 +57,7 @@
"cookie-parser": "^1.4.6",
"diskusage": "^1.1.3",
"dotenv": "^14.2.0",
"exifr": "^7.1.3",
"exiftool-vendored": "^19.0.0",
"fdir": "^5.3.0",
"fluent-ffmpeg": "^2.1.2",
"geo-tz": "^7.0.2",