diff --git a/.eslintignore b/.eslintignore index a257c7f6f..9c2c5d7cc 100644 --- a/.eslintignore +++ b/.eslintignore @@ -1209,6 +1209,7 @@ packages/lib/utils/ActionLogger.test.js packages/lib/utils/ActionLogger.js packages/lib/utils/credentialFiles.js packages/lib/utils/focusHandler.js +packages/lib/utils/frontMatter.js packages/lib/utils/ipc/RemoteMessenger.test.js packages/lib/utils/ipc/RemoteMessenger.js packages/lib/utils/ipc/TestMessenger.js diff --git a/.gitignore b/.gitignore index 2c7ef2073..4a5f4ad49 100644 --- a/.gitignore +++ b/.gitignore @@ -1188,6 +1188,7 @@ packages/lib/utils/ActionLogger.test.js packages/lib/utils/ActionLogger.js packages/lib/utils/credentialFiles.js packages/lib/utils/focusHandler.js +packages/lib/utils/frontMatter.js packages/lib/utils/ipc/RemoteMessenger.test.js packages/lib/utils/ipc/RemoteMessenger.js packages/lib/utils/ipc/TestMessenger.js diff --git a/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.test.ts b/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.test.ts index 1bbc5c1a9..5408de784 100644 --- a/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.test.ts +++ b/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.test.ts @@ -4,9 +4,9 @@ import Folder from '../../models/Folder'; import Note from '../../models/Note'; import Tag from '../../models/Tag'; import time from '../../time'; -import { fieldOrder } from './InteropService_Exporter_Md_frontmatter'; import * as fs from 'fs-extra'; import { ExportModuleOutputFormat } from './types'; +import { fieldOrder } from '../../utils/frontMatter'; async function recreateExportDir() { const dir = exportDir(); diff --git a/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.ts b/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.ts index 631d618eb..f1fca230a 100644 --- a/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.ts +++ b/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.ts @@ -1,13 +1,9 @@ import InteropService_Exporter_Md from './InteropService_Exporter_Md'; import BaseModel from '../../BaseModel'; -import Note from '../../models/Note'; import NoteTag from '../../models/NoteTag'; import Tag from '../../models/Tag'; -import time from '../../time'; import { NoteEntity } from '../database/types'; -import { MdFrontMatterExport } from './types'; - -import * as yaml from 'js-yaml'; +import { serialize } from '../../utils/frontMatter'; interface NoteTagContext { noteTags: Record; @@ -19,33 +15,6 @@ interface TagContext { interface FrontMatterContext extends NoteTagContext, TagContext {} -// There is a special case (negative numbers) where the yaml library will force quotations -// These need to be stripped -function trimQuotes(rawOutput: string): string { - return rawOutput.split('\n').map(line => { - const index = line.indexOf(': \'-'); - const indexWithSpace = line.indexOf(': \'- '); - - // We don't apply this processing if the string starts with a dash - // followed by a space. Those should actually be in quotes, otherwise - // they are detected as invalid list items when we later try to import - // the file. - if (index === indexWithSpace) return line; - - if (index >= 0) { - // The plus 2 eats the : and space characters - const start = line.substring(0, index + 2); - // The plus 3 eats the quote character - const end = line.substring(index + 3, line.length - 1); - return start + end; - } - - return line; - }).join('\n'); -} - -export const fieldOrder = ['title', 'updated', 'created', 'source', 'author', 'latitude', 'longitude', 'altitude', 'completed?', 'due', 'tags']; - export default class InteropService_Exporter_Md_frontmatter extends InteropService_Exporter_Md { // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied @@ -94,78 +63,17 @@ export default class InteropService_Exporter_Md_frontmatter extends InteropServi } } - private convertDate(datetime: number): string { - return time.unixMsToRfc3339Sec(datetime); - } - - private extractMetadata(note: NoteEntity) { - const md: MdFrontMatterExport = {}; - // Every variable needs to be converted separately, so they will be handles in groups - // - // title - if (note.title) { md['title'] = note.title; } - - // source, author - if (note.source_url) { md['source'] = note.source_url; } - if (note.author) { md['author'] = note.author; } - - // locations - // non-strict inequality is used here to interpret the location strings - // as numbers i.e 0.000000 is the same as 0. - // This is necessary because these fields are officially numbers, but often - // contain strings. - - // eslint-disable-next-line eqeqeq - if (note.latitude != 0 || note.longitude != 0 || note.altitude != 0) { - md['latitude'] = note.latitude; - md['longitude'] = note.longitude; - md['altitude'] = note.altitude; - } - - // todo - if (note.is_todo) { - // boolean is not support by the yaml FAILSAFE_SCHEMA - md['completed?'] = note.todo_completed ? 'yes' : 'no'; - } - if (note.todo_due) { md['due'] = this.convertDate(note.todo_due); } - - // time - if (note.user_updated_time) { md['updated'] = this.convertDate(note.user_updated_time); } - if (note.user_created_time) { md['created'] = this.convertDate(note.user_created_time); } - - // tags + protected async getNoteExportContent_(modNote: NoteEntity) { + let tagTitles: string[] = []; const context: FrontMatterContext = this.context(); - if (context.noteTags[note.id]) { - const tagIds = context.noteTags[note.id]; + if (context.noteTags[modNote.id]) { + const tagIds = context.noteTags[modNote.id]; // In some cases a NoteTag can still exist, while the Tag does not. In this case, tagTitles // for that tagId will return undefined, which can't be handled by the yaml library (issue #7782) - const tags = tagIds.map((id: string) => context.tagTitles[id]).filter(e => !!e).sort(); - if (tags.length > 0) { - md['tags'] = tags; - } + tagTitles = tagIds.map((id: string) => context.tagTitles[id]).filter(e => !!e).sort(); } - // This guarentees that fields will always be ordered the same way - // which can be useful if users are using this for generating diffs - const sort = (a: string, b: string) => { - return fieldOrder.indexOf(a) - fieldOrder.indexOf(b); - }; - - // The FAILSAFE_SCHEMA along with noCompatMode allows this to export strings that look - // like numbers (or yes/no) without the added '' quotes around the text - const rawOutput = yaml.dump(md, { sortKeys: sort, noCompatMode: true, schema: yaml.FAILSAFE_SCHEMA }); - // The additional trimming is the unfortunate result of the yaml library insisting on - // quoting negative numbers. - // For now the trimQuotes function only trims quotes associated with a negative number - // but it can be extended to support more special cases in the future if necessary. - return trimQuotes(rawOutput); - } - - - protected async getNoteExportContent_(modNote: NoteEntity) { - const noteContent = await Note.replaceResourceInternalToExternalLinks(await Note.serialize(modNote, ['body'])); - const metadata = this.extractMetadata(modNote); - return `---\n${metadata}---\n\n${noteContent}`; + return serialize(modNote, tagTitles); } } diff --git a/packages/lib/services/interop/InteropService_Importer_Md_frontmatter.ts b/packages/lib/services/interop/InteropService_Importer_Md_frontmatter.ts index 1570738e2..f71337940 100644 --- a/packages/lib/services/interop/InteropService_Importer_Md_frontmatter.ts +++ b/packages/lib/services/interop/InteropService_Importer_Md_frontmatter.ts @@ -1,173 +1,15 @@ import InteropService_Importer_Md from './InteropService_Importer_Md'; import Note from '../../models/Note'; import Tag from '../../models/Tag'; -import time from '../../time'; -import { NoteEntity } from '../database/types'; - -import * as yaml from 'js-yaml'; import shim from '../../shim'; - -interface ParsedMeta { - metadata: NoteEntity; - tags: string[]; -} - -function isTruthy(str: string): boolean { - return str.toLowerCase() in ['true', 'yes']; -} - -// Enforces exactly 2 spaces in front of list items -function normalizeYamlWhitespace(yaml: string[]): string[] { - return yaml.map(line => { - const l = line.trimStart(); - if (l.startsWith('-')) { - return ` ${l}`; - } - - return line; - }); -} - -// This is a helper function to convert an arbitrary author variable into a string -// the use case is for loading from r-markdown/pandoc style notes -// references: -// https://pandoc.org/MANUAL.html#extension-yaml_metadata_block -// https://github.com/hao203/rmarkdown-YAML -// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied -function extractAuthor(author: any): string { - if (!author) return ''; - - if (typeof(author) === 'string') { - return author; - } else if (Array.isArray(author)) { - // Joplin only supports a single author, so we take the first one - return extractAuthor(author[0]); - } else if (typeof(author) === 'object') { - if ('name' in author) { - return author['name']; - } - } - - return ''; -} +import { parse } from '../../utils/frontMatter'; export default class InteropService_Importer_Md_frontmatter extends InteropService_Importer_Md { - private getNoteHeader(note: string) { - // Ignore the leading `---` - const lines = note.split('\n').slice(1); - let inHeader = true; - const headerLines: string[] = []; - const bodyLines: string[] = []; - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - const nextLine = i + 1 <= lines.length - 1 ? lines[i + 1] : ''; - - if (inHeader && line.startsWith('---')) { - inHeader = false; - - // Need to eat the extra newline after the yaml block. Note that - // if the next line is not an empty line, we keep it. Fixes - // https://github.com/laurent22/joplin/issues/8802 - if (nextLine.trim() === '') i++; - - continue; - } - - if (inHeader) { headerLines.push(line); } else { bodyLines.push(line); } - } - - const normalizedHeaderLines = normalizeYamlWhitespace(headerLines); - const header = normalizedHeaderLines.join('\n'); - const body = bodyLines.join('\n'); - - return { header, body }; - } - - // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied - private toLowerCase(obj: Record): Record { - // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied - const newObj: Record = {}; - for (const key of Object.keys(obj)) { - newObj[key.toLowerCase()] = obj[key]; - } - - return newObj; - } - - private parseYamlNote(note: string): ParsedMeta { - if (!note.startsWith('---')) return { metadata: { body: note }, tags: [] }; - - const { header, body } = this.getNoteHeader(note); - - const md = this.toLowerCase(yaml.load(header, { schema: yaml.FAILSAFE_SCHEMA })); - const metadata: NoteEntity = { - title: md['title'] || '', - source_url: md['source'] || '', - is_todo: ('completed?' in md) ? 1 : 0, - }; - - if ('author' in md) { metadata['author'] = extractAuthor(md['author']); } - - // The date fallback gives support for MultiMarkdown format, r-markdown, and pandoc formats - if ('created' in md) { - metadata['user_created_time'] = time.anythingToMs(md['created'], Date.now()); - } else if ('date' in md) { - metadata['user_created_time'] = time.anythingToMs(md['date'], Date.now()); - } else if ('created_at' in md) { - // Add support for Notesnook - metadata['user_created_time'] = time.anythingToMs(md['created_at'], Date.now()); - } - - if ('updated' in md) { - metadata['user_updated_time'] = time.anythingToMs(md['updated'], Date.now()); - } else if ('lastmod' in md) { - // Add support for hugo - metadata['user_updated_time'] = time.anythingToMs(md['lastmod'], Date.now()); - } else if ('date' in md) { - metadata['user_updated_time'] = time.anythingToMs(md['date'], Date.now()); - } else if ('updated_at' in md) { - // Notesnook - metadata['user_updated_time'] = time.anythingToMs(md['updated_at'], Date.now()); - } - - if ('latitude' in md) { metadata['latitude'] = md['latitude']; } - if ('longitude' in md) { metadata['longitude'] = md['longitude']; } - if ('altitude' in md) { metadata['altitude'] = md['altitude']; } - - if (metadata.is_todo) { - if (isTruthy(md['completed?'])) { - // Completed time isn't preserved, so we use a sane choice here - metadata['todo_completed'] = metadata['user_updated_time']; - } - if ('due' in md) { - const due_date = time.anythingToMs(md['due'], null); - if (due_date) { metadata['todo_due'] = due_date; } - } - } - - // Tags are handled separately from typical metadata - let tags: string[] = []; - if ('tags' in md) { - // Only create unique tags - tags = md['tags']; - } else if ('keywords' in md) { - // Adding support for r-markdown/pandoc - tags = tags.concat(md['keywords']); - } - - // Only create unique tags - tags = [...new Set(tags)]; - - metadata['body'] = body; - - return { metadata, tags }; - } - public async importFile(filePath: string, parentFolderId: string) { try { const note = await super.importFile(filePath, parentFolderId); - const { metadata, tags } = this.parseYamlNote(note.body); + const { metadata, tags } = parse(note.body); const updatedNote = { ...note, ...metadata }; diff --git a/packages/lib/utils/frontMatter.ts b/packages/lib/utils/frontMatter.ts new file mode 100644 index 000000000..4bb8df8d2 --- /dev/null +++ b/packages/lib/utils/frontMatter.ts @@ -0,0 +1,254 @@ +import Note from '../models/Note'; +import { NoteEntity } from '../services/database/types'; +import { MdFrontMatterExport } from '../services/interop/types'; +import time from '../time'; +import * as yaml from 'js-yaml'; + +export interface ParsedMeta { + metadata: NoteEntity; + tags: string[]; +} + +const convertDate = (datetime: number): string => { + return time.unixMsToRfc3339Sec(datetime); +}; + +export const fieldOrder = ['title', 'id', 'updated', 'created', 'source', 'author', 'latitude', 'longitude', 'altitude', 'completed?', 'due', 'tags']; + +// There is a special case (negative numbers) where the yaml library will force quotations +// These need to be stripped +function trimQuotes(rawOutput: string): string { + return rawOutput.split('\n').map(line => { + const index = line.indexOf(': \'-'); + const indexWithSpace = line.indexOf(': \'- '); + + // We don't apply this processing if the string starts with a dash + // followed by a space. Those should actually be in quotes, otherwise + // they are detected as invalid list items when we later try to import + // the file. + if (index === indexWithSpace) return line; + + if (index >= 0) { + // The plus 2 eats the : and space characters + const start = line.substring(0, index + 2); + // The plus 3 eats the quote character + const end = line.substring(index + 3, line.length - 1); + return start + end; + } + + return line; + }).join('\n'); +} + +export const noteToFrontMatter = (note: NoteEntity, tagTitles: string[]) => { + const md: MdFrontMatterExport = {}; + // Every variable needs to be converted separately, so they will be handles in groups + // + // title + if (note.title) { md['title'] = note.title; } + + // source, author + if (note.source_url) { md['source'] = note.source_url; } + if (note.author) { md['author'] = note.author; } + + // locations + // non-strict inequality is used here to interpret the location strings + // as numbers i.e 0.000000 is the same as 0. + // This is necessary because these fields are officially numbers, but often + // contain strings. + + // eslint-disable-next-line eqeqeq + if (note.latitude != 0 || note.longitude != 0 || note.altitude != 0) { + md['latitude'] = note.latitude; + md['longitude'] = note.longitude; + md['altitude'] = note.altitude; + } + + // todo + if (note.is_todo) { + // boolean is not support by the yaml FAILSAFE_SCHEMA + md['completed?'] = note.todo_completed ? 'yes' : 'no'; + } + if (note.todo_due) { md['due'] = convertDate(note.todo_due); } + + // time + if (note.user_updated_time) { md['updated'] = convertDate(note.user_updated_time); } + if (note.user_created_time) { md['created'] = convertDate(note.user_created_time); } + + // tags + if (tagTitles.length) md['tags'] = tagTitles; + + // This guarentees that fields will always be ordered the same way + // which can be useful if users are using this for generating diffs + const sort = (a: string, b: string) => { + return fieldOrder.indexOf(a) - fieldOrder.indexOf(b); + }; + + // The FAILSAFE_SCHEMA along with noCompatMode allows this to export strings that look + // like numbers (or yes/no) without the added '' quotes around the text + const rawOutput = yaml.dump(md, { sortKeys: sort, noCompatMode: true, schema: yaml.FAILSAFE_SCHEMA }); + // The additional trimming is the unfortunate result of the yaml library insisting on + // quoting negative numbers. + // For now the trimQuotes function only trims quotes associated with a negative number + // but it can be extended to support more special cases in the future if necessary. + return trimQuotes(rawOutput); +}; + +export const serialize = async (modNote: NoteEntity, tagTitles: string[]) => { + const noteContent = await Note.replaceResourceInternalToExternalLinks(await Note.serialize(modNote, ['body'])); + const metadata = noteToFrontMatter(modNote, tagTitles); + return `---\n${metadata}---\n\n${noteContent}`; +}; + +function isTruthy(str: string): boolean { + return str.toLowerCase() in ['true', 'yes']; +} + +// Enforces exactly 2 spaces in front of list items +function normalizeYamlWhitespace(yaml: string[]): string[] { + return yaml.map(line => { + const l = line.trimStart(); + if (l.startsWith('-')) { + return ` ${l}`; + } + + return line; + }); +} + +// This is a helper function to convert an arbitrary author variable into a string +// the use case is for loading from r-markdown/pandoc style notes +// references: +// https://pandoc.org/MANUAL.html#extension-yaml_metadata_block +// https://github.com/hao203/rmarkdown-YAML +function extractAuthor(author: unknown): string { + if (!author) return ''; + + if (typeof(author) === 'string') { + return author; + } else if (Array.isArray(author)) { + // Joplin only supports a single author, so we take the first one + return extractAuthor(author[0]); + } else if (typeof(author) === 'object') { + if ('name' in author) { + return (author as { name: string }).name; + } + } + + return ''; +} + +const getNoteHeader = (note: string) => { + // Ignore the leading `---` + const lines = note.split('\n').slice(1); + let inHeader = true; + const headerLines: string[] = []; + const bodyLines: string[] = []; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const nextLine = i + 1 <= lines.length - 1 ? lines[i + 1] : ''; + + if (inHeader && line.startsWith('---')) { + inHeader = false; + + // Need to eat the extra newline after the yaml block. Note that + // if the next line is not an empty line, we keep it. Fixes + // https://github.com/laurent22/joplin/issues/8802 + if (nextLine.trim() === '') i++; + + continue; + } + + if (inHeader) { headerLines.push(line); } else { bodyLines.push(line); } + } + + const normalizedHeaderLines = normalizeYamlWhitespace(headerLines); + const header = normalizedHeaderLines.join('\n'); + const body = bodyLines.join('\n'); + + return { header, body }; +}; + +// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied +const toLowerCase = (obj: Record): Record => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied + const newObj: Record = {}; + for (const key of Object.keys(obj)) { + newObj[key.toLowerCase()] = obj[key]; + } + + return newObj; +}; + +export const parse = (note: string): ParsedMeta => { + if (!note.startsWith('---')) return { metadata: { body: note }, tags: [] }; + + const { header, body } = getNoteHeader(note); + + const md = toLowerCase(yaml.load(header, { schema: yaml.FAILSAFE_SCHEMA }) ?? {}); + const metadata: NoteEntity = { + title: md['title'] || '', + source_url: md['source'] || '', + is_todo: ('completed?' in md) ? 1 : 0, + }; + + if ('id' in md && typeof md['id'] === 'string' && md.id.match(/^[0-9a-zA-Z]{32}$/)) { + metadata['id'] = md.id; + } + + if ('author' in md) { metadata['author'] = extractAuthor(md['author']); } + + // The date fallback gives support for MultiMarkdown format, r-markdown, and pandoc formats + if ('created' in md) { + metadata['user_created_time'] = time.anythingToMs(md['created'], Date.now()); + } else if ('date' in md) { + metadata['user_created_time'] = time.anythingToMs(md['date'], Date.now()); + } else if ('created_at' in md) { + // Add support for Notesnook + metadata['user_created_time'] = time.anythingToMs(md['created_at'], Date.now()); + } + + if ('updated' in md) { + metadata['user_updated_time'] = time.anythingToMs(md['updated'], Date.now()); + } else if ('lastmod' in md) { + // Add support for hugo + metadata['user_updated_time'] = time.anythingToMs(md['lastmod'], Date.now()); + } else if ('date' in md) { + metadata['user_updated_time'] = time.anythingToMs(md['date'], Date.now()); + } else if ('updated_at' in md) { + // Notesnook + metadata['user_updated_time'] = time.anythingToMs(md['updated_at'], Date.now()); + } + + if ('latitude' in md) { metadata['latitude'] = md['latitude']; } + if ('longitude' in md) { metadata['longitude'] = md['longitude']; } + if ('altitude' in md) { metadata['altitude'] = md['altitude']; } + + if (metadata.is_todo) { + if (isTruthy(md['completed?'])) { + // Completed time isn't preserved, so we use a sane choice here + metadata['todo_completed'] = metadata['user_updated_time']; + } + if ('due' in md) { + const due_date = time.anythingToMs(md['due'], null); + if (due_date) { metadata['todo_due'] = due_date; } + } + } + + // Tags are handled separately from typical metadata + let tags: string[] = []; + if ('tags' in md) { + // Only create unique tags + tags = md['tags']; + } else if ('keywords' in md) { + // Adding support for r-markdown/pandoc + tags = tags.concat(md['keywords']); + } + + // Only create unique tags + tags = [...new Set(tags)]; + + metadata['body'] = body; + + return { metadata, tags }; +};