diff --git a/package-lock.json b/package-lock.json index 06dc3a97..96be8c81 100644 --- a/package-lock.json +++ b/package-lock.json @@ -26,7 +26,6 @@ "nodemailer": "6.9.4", "reflect-metadata": "0.1.13", "sharp": "0.31.3", - "ts-node-iptc": "1.0.11", "typeconfig": "2.2.11", "typeorm": "0.3.12", "xml2js": "0.6.2" diff --git a/package.json b/package.json index 65e65868..70e71323 100644 --- a/package.json +++ b/package.json @@ -53,7 +53,6 @@ "nodemailer": "6.9.4", "reflect-metadata": "0.1.13", "sharp": "0.31.3", - "ts-node-iptc": "1.0.11", "typeconfig": "2.2.11", "typeorm": "0.3.12", "xml2js": "0.6.2" diff --git a/src/backend/model/fileaccess/MetadataLoader.ts b/src/backend/model/fileaccess/MetadataLoader.ts index a0aafe52..0e08ef5e 100644 --- a/src/backend/model/fileaccess/MetadataLoader.ts +++ b/src/backend/model/fileaccess/MetadataLoader.ts @@ -12,7 +12,6 @@ import { FfprobeData } from 'fluent-ffmpeg'; import { FileHandle } from 'fs/promises'; import * as util from 'node:util'; import * as path from 'path'; -import { IptcParser } from 'ts-node-iptc'; import { Utils } from '../../../common/Utils'; import { FFmpegFactory } from '../FFmpegFactory'; import { ExtensionDecorator } from '../extension/ExtensionDecorator'; @@ -181,7 +180,7 @@ export class MetadataLoader { icc: false, jfif: false, //not needed and not supported for png ihdr: true, - iptc: false, //exifr reads UTF8-encoded data wrongly, using IptcParser instead + iptc: true, exif: true, gps: true, reviveValues: false, //don't convert timestamps @@ -221,46 +220,6 @@ export class MetadataLoader { await fileHandle.close(); } try { - - - try { //Parse iptc data using the IptcParser, which works correctly for both UTF-8 and ASCII - const iptcData = IptcParser.parse(data); - if (iptcData.country_or_primary_location_name) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.country = - iptcData.country_or_primary_location_name - .replace(/\0/g, '') - .trim(); - } - if (iptcData.province_or_state) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.state = iptcData.province_or_state - .replace(/\0/g, '') - .trim(); - } - if (iptcData.city) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.city = iptcData.city - .replace(/\0/g, '') - .trim(); - } - if (iptcData.object_name) { - metadata.title = iptcData.object_name.replace(/\0/g, '').trim(); - } - if (iptcData.caption) { - metadata.caption = iptcData.caption.replace(/\0/g, '').trim(); - } - if (Array.isArray(iptcData.keywords)) { - metadata.keywords = iptcData.keywords; - } - - if (iptcData.date_time) { - metadata.creationDate = iptcData.date_time.getTime(); - } - } catch (err) { - // Logger.debug(LOG_TAG, 'Error parsing iptc data', fullPath, err); - } - try { const exif = await exifr.parse(data, exifrOptions); MetadataLoader.mapMetadata(metadata, exif); @@ -370,20 +329,35 @@ export class MetadataLoader { } } } + if (exif.iptc && + exif.iptc.Keywords && + exif.iptc.Keywords.length > 0) { + const subj = Array.isArray(exif.iptc.Keywords) ? exif.iptc.Keywords : [exif.iptc.Keywords]; + if (metadata.keywords === undefined) { + metadata.keywords = []; + } + for (let kw of subj) { + kw = Utils.asciiToUTF8(kw); + if (metadata.keywords.indexOf(kw) === -1) { + metadata.keywords.push(kw); + } + } + } } private static mapTitle(metadata: PhotoMetadata, exif: any) { - metadata.title = exif.dc?.title?.value || metadata.title || exif.photoshop?.Headline || exif.acdsee?.caption; //acdsee caption holds the title when data is saved by digikam. Used as last resort if iptc and dc do not contain the data + metadata.title = exif.dc?.title?.value || Utils.asciiToUTF8(exif.iptc?.ObjectName) || metadata.title || exif.photoshop?.Headline || exif.acdsee?.caption; //acdsee caption holds the title when data is saved by digikam. Used as last resort if iptc and dc do not contain the data } private static mapCaption(metadata: PhotoMetadata, exif: any) { - metadata.caption = exif.dc?.description?.value || metadata.caption || exif.ifd0?.ImageDescription || exif.exif?.UserComment?.value || exif.Iptc4xmpCore?.ExtDescrAccessibility?.value ||exif.acdsee?.notes; + metadata.caption = exif.dc?.description?.value || Utils.asciiToUTF8(exif.iptc?.Caption) || metadata.caption || exif.ifd0?.ImageDescription || exif.exif?.UserComment?.value || exif.Iptc4xmpCore?.ExtDescrAccessibility?.value ||exif.acdsee?.notes; } private static mapTimestampAndOffset(metadata: PhotoMetadata, exif: any) { metadata.creationDate = Utils.timestampToMS(exif?.photoshop?.DateCreated, null) || Utils.timestampToMS(exif?.xmp?.CreateDate, null) || Utils.timestampToMS(exif?.xmp?.ModifyDate, null) || + Utils.timestampToMS(Utils.toIsoTimestampString(exif?.iptc?.DateCreated, exif?.iptc?.TimeCreated), null) || metadata.creationDate; metadata.creationDateOffset = Utils.timestampToOffsetString(exif?.photoshop?.DateCreated) || @@ -490,24 +464,15 @@ export class MetadataLoader { private static mapToponyms(metadata: PhotoMetadata, exif: any) { //Function to convert html code for special characters into their corresponding character (used in exif.photoshop-section) - const unescape = (tag: string) => { - return tag.replace(/&#([0-9]{1,3});/gi, function (match, numStr) { - return String.fromCharCode(parseInt(numStr, 10)); - }); - } - //photoshop section sometimes has City, Country and State - if (exif.photoshop) { - if (!metadata.positionData?.country && exif.photoshop.Country) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.country = unescape(exif.photoshop.Country); - } - if (!metadata.positionData?.state && exif.photoshop.State) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.state = unescape(exif.photoshop.State); - } - if (!metadata.positionData?.city && exif.photoshop.City) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.city = unescape(exif.photoshop.City); + + metadata.positionData = metadata.positionData || {}; + metadata.positionData.country = Utils.asciiToUTF8(exif.iptc?.Country) || Utils.decodeHTMLChars(exif.photoshop?.Country); + metadata.positionData.state = Utils.asciiToUTF8(exif.iptc?.State) || Utils.decodeHTMLChars(exif.photoshop?.State); + metadata.positionData.city = Utils.asciiToUTF8(exif.iptc?.City) || Utils.decodeHTMLChars(exif.photoshop?.City); + if (metadata.positionData) { + Utils.removeNullOrEmptyObj(metadata.positionData); + if (Object.keys(metadata.positionData).length === 0) { + delete metadata.positionData; } } } diff --git a/src/common/HTMLCharCodes.ts b/src/common/HTMLCharCodes.ts new file mode 100644 index 00000000..c094c79e --- /dev/null +++ b/src/common/HTMLCharCodes.ts @@ -0,0 +1,118 @@ +interface HTMLCharDictionary { + [key: string]: string; +} + +export const HTMLChar: HTMLCharDictionary = { + """: "\"", + "&": "&", + "<": "<", + ">": ">", + " ": " ", + "¡": "¡", + "¢": "¢", + "£": "£", + "¤": "¤", + "¥": "¥", + "¦": "¦", + "§": "§", + "¨": "¨", + "©": "©", + "®": "®", + "™": "™", + "ª": "ª", + "«": "«", + "¬": "¬", + "­": "­", + "¯": "¯", + "°": "°", + "±": "±", + "²": "²", + "³": "³", + "´": "´", + "µ": "µ", + "¶": "¶", + "·": "·", + "¸": "¸", + "¹": "¹", + "º": "º", + "»": "»", + "¼": "¼", + "½": "½", + "¾": "¾", + "¿": "¿", + "×": "×", + "÷": "÷", + "Ð": "Ð", + "ð": "ð", + "Þ": "Þ", + "þ": "þ", + "Æ": "Æ", + "æ": "æ", + "Œ": "Œ", + "œ": "œ", + "Å": "Å", + "Ø": "Ø", + "Ç": "Ç", + "ç": "ç", + "ß": "ß", + "Ñ": "Ñ", + "ñ": "ñ", + "Á": "Á", + "À": "À", + "Â": "Â", + "Ä": "Ä", + "Ã": "Ã", + "á": "á", + "à": "à", + "â": "â", + "ä": "ä", + "ã": "ã", + "å": "å", + "É": "É", + "È": "È", + "Ê": "Ê", + "Ë": "Ë", + "&Etilde;": "Ẽ", + "é": "é", + "è": "è", + "ê": "ê", + "ë": "ë", + "Í": "Í", + "Ì": "Ì", + "Î": "Î", + "Ï": "Ï", + "Ĩ": "Ĩ", + "í": "í", + "ì": "ì", + "î": "î", + "ï": "ï", + "ĩ": "ĩ", + "Ó": "Ó", + "Ò": "Ò", + "Ô": "Ô", + "Ö": "Ö", + "Õ": "Õ", + "ó": "ó", + "ò": "ò", + "ô": "ô", + "ö": "ö", + "õ": "õ", + "Ú": "Ú", + "Ù": "Ù", + "Û": "Û", + "Ü": "Ü", + "Ũ": "Ũ", + "Ů": "Ů", + "ú": "ú", + "ù": "ù", + "û": "û", + "ü": "ü", + "ũ": "ũ", + "ů": "ů", + "Ý": "Ý", + "Ŷ": "Ŷ", + "Ÿ": "Ÿ", + "ý": "ý", + "ŷ": "ŷ", + "ÿ": "ÿ" +}; \ No newline at end of file diff --git a/src/common/Utils.ts b/src/common/Utils.ts index 2d69923f..2750cf29 100644 --- a/src/common/Utils.ts +++ b/src/common/Utils.ts @@ -1,3 +1,5 @@ +import { HTMLChar } from './HTMLCharCodes'; + export class Utils { static GUID(): string { const s4 = (): string => @@ -97,6 +99,25 @@ export class Utils { return d.getUTCFullYear() + '-' + d.getUTCMonth() + '-' + d.getUTCDate(); } + static toIsoTimestampString(YYYYMMDD: string, hhmmss: string): string { + if (YYYYMMDD && hhmmss) { + // Regular expression to match YYYYMMDD format + const dateRegex = /^(\d{4})(\d{2})(\d{2})$/; + // Regular expression to match hhmmss+/-ohom format + const timeRegex = /^(\d{2})(\d{2})(\d{2})([+-]\d{2})?(\d{2})?$/; + const [, year, month, day] = YYYYMMDD.match(dateRegex); + const [, hour, minute, second, offsetHour, offsetMinute] = hhmmss.match(timeRegex); + const isoTimestamp = `${year}-${month}-${day}T${hour}:${minute}:${second}`; + if (offsetHour && offsetMinute) { + return isoTimestamp + `${offsetHour}:${offsetMinute}`; + } else { + return isoTimestamp; + } + } else { + return undefined; + } + } + static makeUTCMidnight(d: number | Date) { if (!(d instanceof Date)) { @@ -125,7 +146,7 @@ export class Utils { } //function to convert timestamp into milliseconds taking offset into account - static timestampToMS(timestamp: string, offset: string) { + static timestampToMS(timestamp: string, offset: string): number { if (!timestamp) { return undefined; } @@ -371,6 +392,31 @@ export class Utils { return curr; } + public static asciiToUTF8(text: string): string { + if (text) { + return Buffer.from(text, 'ascii').toString('utf-8'); + } else { + return text; + } + } + + + + public static decodeHTMLChars(text: string): string { + if (text) { + const newtext = text.replace(/&#([0-9]{1,3});/gi, function (match, numStr) { + return String.fromCharCode(parseInt(numStr, 10)); + }); + return newtext.replace(/&[^;]+;/g, function (match) { + const char = HTMLChar[match]; + return char ? char : match; + }); + } else { + return text; + } + } + + public static isUInt32(value: number, max = 4294967295): boolean { value = parseInt('' + value, 10); return !isNaN(value) && value >= 0 && value <= max; diff --git a/test/backend/assets/sidecar/testimagedesc1.json b/test/backend/assets/sidecar/testimagedesc1.json index bc0aac3b..42f82919 100644 --- a/test/backend/assets/sidecar/testimagedesc1.json +++ b/test/backend/assets/sidecar/testimagedesc1.json @@ -41,9 +41,9 @@ "latitude": 37.871093, "longitude": -122.25678 }, - "city": "test city őúéáűóöí-.,)(=", + "city": "test city őúéáűóöí-.,)(=/%!+\"'", "country": "test country őúéáűóöí-.,)(=/%!+\"'", - "state": "test state őúéáűóöí-.,)(" + "state": "test state őúéáűóöí-.,)(=/%!+\"'" }, "rating": 3, "size": { diff --git a/test/tmp/sqlite.db-journal b/test/tmp/sqlite.db-journal deleted file mode 100644 index 41fef9ee..00000000 Binary files a/test/tmp/sqlite.db-journal and /dev/null differ