Chore: Markdown + fontmatter import and export: Extract frontMatter logic into a separate file (#10508)

Co-authored-by: Laurent Cozic <laurent22@users.noreply.github.com>
2024-12-21 09:38:01 +02:00 · 2024-05-30 00:40:52 -07:00 · 2024-05-30 00:40:52 -07:00 · f938d5f489
commit f938d5f489
parent 99b840da34
6 changed files with 266 additions and 260 deletions
--- a/.eslintignore
+++ b/.eslintignore
@ -1209,6 +1209,7 @@ packages/lib/utils/ActionLogger.test.js
 packages/lib/utils/ActionLogger.js
 packages/lib/utils/credentialFiles.js
 packages/lib/utils/focusHandler.js
 packages/lib/utils/frontMatter.js
 packages/lib/utils/ipc/RemoteMessenger.test.js
 packages/lib/utils/ipc/RemoteMessenger.js
 packages/lib/utils/ipc/TestMessenger.js
--- a/.gitignore
+++ b/.gitignore
@ -1188,6 +1188,7 @@ packages/lib/utils/ActionLogger.test.js
 packages/lib/utils/ActionLogger.js
 packages/lib/utils/credentialFiles.js
 packages/lib/utils/focusHandler.js
 packages/lib/utils/frontMatter.js
 packages/lib/utils/ipc/RemoteMessenger.test.js
 packages/lib/utils/ipc/RemoteMessenger.js
 packages/lib/utils/ipc/TestMessenger.js
--- a/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.test.ts
+++ b/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.test.ts
@ -4,9 +4,9 @@ import Folder from '../../models/Folder';
 import Note from '../../models/Note';
 import Tag from '../../models/Tag';
 import time from '../../time';
 import { fieldOrder } from './InteropService_Exporter_Md_frontmatter';
 import * as fs from 'fs-extra';
 import { ExportModuleOutputFormat } from './types';
 import { fieldOrder } from '../../utils/frontMatter';
 async function recreateExportDir() {
 	const dir = exportDir();
--- a/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.ts
+++ b/packages/lib/services/interop/InteropService_Exporter_Md_frontmatter.ts
@ -1,13 +1,9 @@
 import InteropService_Exporter_Md from './InteropService_Exporter_Md';
 import BaseModel from '../../BaseModel';
 import Note from '../../models/Note';
 import NoteTag from '../../models/NoteTag';
 import Tag from '../../models/Tag';
 import time from '../../time';
 import { NoteEntity } from '../database/types';
-import { MdFrontMatterExport } from './types';
+import { serialize } from '../../utils/frontMatter';
 import * as yaml from 'js-yaml';
 interface NoteTagContext {
 	noteTags: Record<string, string[]>;
@ -19,33 +15,6 @@ interface TagContext {
 interface FrontMatterContext extends NoteTagContext, TagContext {}
 // There is a special case (negative numbers) where the yaml library will force quotations
 // These need to be stripped
 function trimQuotes(rawOutput: string): string {
 	return rawOutput.split('\n').map(line => {
 		const index = line.indexOf(': \'-');
 		const indexWithSpace = line.indexOf(': \'- ');
 		// We don't apply this processing if the string starts with a dash
 		// followed by a space. Those should actually be in quotes, otherwise
 		// they are detected as invalid list items when we later try to import
 		// the file.
 		if (index === indexWithSpace) return line;
 		if (index >= 0) {
 			// The plus 2 eats the : and space characters
 			const start = line.substring(0, index + 2);
 			//  The plus 3 eats the quote character
 			const end = line.substring(index + 3, line.length - 1);
 			return start + end;
 		}
 		return line;
 	}).join('\n');
 }
 export const fieldOrder = ['title', 'updated', 'created', 'source', 'author', 'latitude', 'longitude', 'altitude', 'completed?', 'due', 'tags'];
 export default class InteropService_Exporter_Md_frontmatter extends InteropService_Exporter_Md {
 	// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
@ -94,78 +63,17 @@ export default class InteropService_Exporter_Md_frontmatter extends InteropServi
 		}
 	}
-	private convertDate(datetime: number): string {
+	protected async getNoteExportContent_(modNote: NoteEntity) {
-		return time.unixMsToRfc3339Sec(datetime);
+		let tagTitles: string[] = [];
 	}
 	private extractMetadata(note: NoteEntity) {
 		const md: MdFrontMatterExport = {};
 		// Every variable needs to be converted separately, so they will be handles in groups
 		//
 		// title
 		if (note.title) { md['title'] = note.title; }
 		// source, author
 		if (note.source_url) { md['source'] = note.source_url; }
 		if (note.author) { md['author'] = note.author; }
 		// locations
 		// non-strict inequality is used here to interpret the location strings
 		// as numbers i.e 0.000000 is the same as 0.
 		// This is necessary because these fields are officially numbers, but often
 		// contain strings.
 		// eslint-disable-next-line eqeqeq
 		if (note.latitude != 0 || note.longitude != 0 || note.altitude != 0) {
 			md['latitude'] = note.latitude;
 			md['longitude'] = note.longitude;
 			md['altitude'] = note.altitude;
 		}
 		// todo
 		if (note.is_todo) {
 			// boolean is not support by the yaml FAILSAFE_SCHEMA
 			md['completed?'] = note.todo_completed ? 'yes' : 'no';
 		}
 		if (note.todo_due) { md['due'] = this.convertDate(note.todo_due); }
 		// time
 		if (note.user_updated_time) { md['updated'] = this.convertDate(note.user_updated_time); }
 		if (note.user_created_time) { md['created'] = this.convertDate(note.user_created_time); }
 		// tags
 		const context: FrontMatterContext = this.context();
-		if (context.noteTags[note.id]) {
+		if (context.noteTags[modNote.id]) {
-			const tagIds = context.noteTags[note.id];
+			const tagIds = context.noteTags[modNote.id];
 			// In some cases a NoteTag can still exist, while the Tag does not. In this case, tagTitles
 			// for that tagId will return undefined, which can't be handled by the yaml library (issue #7782)
-			const tags = tagIds.map((id: string) => context.tagTitles[id]).filter(e => !!e).sort();
+			tagTitles = tagIds.map((id: string) => context.tagTitles[id]).filter(e => !!e).sort();
 			if (tags.length > 0) {
 				md['tags'] = tags;
 			}
 		}
-		// This guarentees that fields will always be ordered the same way
+		return serialize(modNote, tagTitles);
 		// which can be useful if users are using this for generating diffs
 		const sort = (a: string, b: string) => {
 			return fieldOrder.indexOf(a) - fieldOrder.indexOf(b);
 		};
 		// The FAILSAFE_SCHEMA along with noCompatMode allows this to export strings that look
 		// like numbers (or yes/no) without the added '' quotes around the text
 		const rawOutput = yaml.dump(md, { sortKeys: sort, noCompatMode: true, schema: yaml.FAILSAFE_SCHEMA });
 		// The additional trimming is the unfortunate result of the yaml library insisting on
 		// quoting negative numbers.
 		// For now the trimQuotes function only trims quotes associated with a negative number
 		// but it can be extended to support more special cases in the future if necessary.
 		return trimQuotes(rawOutput);
 	}
 	protected async getNoteExportContent_(modNote: NoteEntity) {
 		const noteContent = await Note.replaceResourceInternalToExternalLinks(await Note.serialize(modNote, ['body']));
 		const metadata = this.extractMetadata(modNote);
 		return `---\n${metadata}---\n\n${noteContent}`;
 	}
 }
--- a/packages/lib/services/interop/InteropService_Importer_Md_frontmatter.ts
+++ b/packages/lib/services/interop/InteropService_Importer_Md_frontmatter.ts
@ -1,173 +1,15 @@
 import InteropService_Importer_Md from './InteropService_Importer_Md';
 import Note from '../../models/Note';
 import Tag from '../../models/Tag';
 import time from '../../time';
 import { NoteEntity } from '../database/types';
 import * as yaml from 'js-yaml';
 import shim from '../../shim';
-
+import { parse } from '../../utils/frontMatter';
 interface ParsedMeta {
 	metadata: NoteEntity;
 	tags: string[];
 }
 function isTruthy(str: string): boolean {
 	return str.toLowerCase() in ['true', 'yes'];
 }
 // Enforces exactly 2 spaces in front of list items
 function normalizeYamlWhitespace(yaml: string[]): string[] {
 	return yaml.map(line => {
 		const l = line.trimStart();
 		if (l.startsWith('-')) {
 			return `  ${l}`;
 		}
 		return line;
 	});
 }
 // This is a helper function to convert an arbitrary author variable into a string
 // the use case is for loading from r-markdown/pandoc style notes
 // references:
 // https://pandoc.org/MANUAL.html#extension-yaml_metadata_block
 // https://github.com/hao203/rmarkdown-YAML
 // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
 function extractAuthor(author: any): string {
 	if (!author) return '';
 	if (typeof(author) === 'string') {
 		return author;
 	} else if (Array.isArray(author)) {
 		// Joplin only supports a single author, so we take the first one
 		return extractAuthor(author[0]);
 	} else if (typeof(author) === 'object') {
 		if ('name' in author) {
 			return author['name'];
 		}
 	}
 	return '';
 }
 export default class InteropService_Importer_Md_frontmatter extends InteropService_Importer_Md {
 	private getNoteHeader(note: string) {
 		// Ignore the leading `---`
 		const lines = note.split('\n').slice(1);
 		let inHeader = true;
 		const headerLines: string[] = [];
 		const bodyLines: string[] = [];
 		for (let i = 0; i < lines.length; i++) {
 			const line = lines[i];
 			const nextLine = i + 1 <= lines.length - 1 ? lines[i + 1] : '';
 			if (inHeader && line.startsWith('---')) {
 				inHeader = false;
 				// Need to eat the extra newline after the yaml block. Note that
 				// if the next line is not an empty line, we keep it. Fixes
 				// https://github.com/laurent22/joplin/issues/8802
 				if (nextLine.trim() === '') i++;
 				continue;
 			}
 			if (inHeader) { headerLines.push(line); } else { bodyLines.push(line); }
 		}
 		const normalizedHeaderLines = normalizeYamlWhitespace(headerLines);
 		const header = normalizedHeaderLines.join('\n');
 		const body = bodyLines.join('\n');
 		return { header, body };
 	}
 	// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
 	private toLowerCase(obj: Record<string, any>): Record<string, any> {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
 		const newObj: Record<string, any> = {};
 		for (const key of Object.keys(obj)) {
 			newObj[key.toLowerCase()] = obj[key];
 		}
 		return newObj;
 	}
 	private parseYamlNote(note: string): ParsedMeta {
 		if (!note.startsWith('---')) return { metadata: { body: note }, tags: [] };
 		const { header, body } = this.getNoteHeader(note);
 		const md = this.toLowerCase(yaml.load(header, { schema: yaml.FAILSAFE_SCHEMA }));
 		const metadata: NoteEntity = {
 			title: md['title'] || '',
 			source_url: md['source'] || '',
 			is_todo: ('completed?' in md) ? 1 : 0,
 		};
 		if ('author' in md) { metadata['author'] = extractAuthor(md['author']); }
 		// The date fallback gives support for MultiMarkdown format, r-markdown, and pandoc formats
 		if ('created' in md) {
 			metadata['user_created_time'] = time.anythingToMs(md['created'], Date.now());
 		} else if ('date' in md) {
 			metadata['user_created_time'] = time.anythingToMs(md['date'], Date.now());
 		} else if ('created_at' in md) {
 			// Add support for Notesnook
 			metadata['user_created_time'] = time.anythingToMs(md['created_at'], Date.now());
 		}
 		if ('updated' in md) {
 			metadata['user_updated_time'] = time.anythingToMs(md['updated'], Date.now());
 		} else if ('lastmod' in md) {
 			// Add support for hugo
 			metadata['user_updated_time'] = time.anythingToMs(md['lastmod'], Date.now());
 		} else if ('date' in md) {
 			metadata['user_updated_time'] = time.anythingToMs(md['date'], Date.now());
 		} else if ('updated_at' in md) {
 			// Notesnook
 			metadata['user_updated_time'] = time.anythingToMs(md['updated_at'], Date.now());
 		}
 		if ('latitude' in md) { metadata['latitude'] = md['latitude']; }
 		if ('longitude' in md) { metadata['longitude'] = md['longitude']; }
 		if ('altitude' in md) { metadata['altitude'] = md['altitude']; }
 		if (metadata.is_todo) {
 			if (isTruthy(md['completed?'])) {
 				// Completed time isn't preserved, so we use a sane choice here
 				metadata['todo_completed'] = metadata['user_updated_time'];
 			}
 			if ('due' in md) {
 				const due_date = time.anythingToMs(md['due'], null);
 				if (due_date) { metadata['todo_due'] = due_date; }
 			}
 		}
 		// Tags are handled separately from typical metadata
 		let tags: string[] = [];
 		if ('tags' in md) {
 			// Only create unique tags
 			tags = md['tags'];
 		} else if ('keywords' in md) {
 			// Adding support for r-markdown/pandoc
 			tags = tags.concat(md['keywords']);
 		}
 		// Only create unique tags
 		tags = [...new Set(tags)];
 		metadata['body'] = body;
 		return { metadata, tags };
 	}
 	public async importFile(filePath: string, parentFolderId: string) {
 		try {
 			const note = await super.importFile(filePath, parentFolderId);
-			const { metadata, tags } = this.parseYamlNote(note.body);
+			const { metadata, tags } = parse(note.body);
 			const updatedNote = { ...note, ...metadata };
--- a/packages/lib/utils/frontMatter.ts
+++ b/packages/lib/utils/frontMatter.ts
@ -0,0 +1,254 @@
 import Note from '../models/Note';
 import { NoteEntity } from '../services/database/types';
 import { MdFrontMatterExport } from '../services/interop/types';
 import time from '../time';
 import * as yaml from 'js-yaml';
 export interface ParsedMeta {
 	metadata: NoteEntity;
 	tags: string[];
 }
 const convertDate = (datetime: number): string => {
 	return time.unixMsToRfc3339Sec(datetime);
 };
 export const fieldOrder = ['title', 'id', 'updated', 'created', 'source', 'author', 'latitude', 'longitude', 'altitude', 'completed?', 'due', 'tags'];
 // There is a special case (negative numbers) where the yaml library will force quotations
 // These need to be stripped
 function trimQuotes(rawOutput: string): string {
 	return rawOutput.split('\n').map(line => {
 		const index = line.indexOf(': \'-');
 		const indexWithSpace = line.indexOf(': \'- ');
 		// We don't apply this processing if the string starts with a dash
 		// followed by a space. Those should actually be in quotes, otherwise
 		// they are detected as invalid list items when we later try to import
 		// the file.
 		if (index === indexWithSpace) return line;
 		if (index >= 0) {
 			// The plus 2 eats the : and space characters
 			const start = line.substring(0, index + 2);
 			// The plus 3 eats the quote character
 			const end = line.substring(index + 3, line.length - 1);
 			return start + end;
 		}
 		return line;
 	}).join('\n');
 }
 export const noteToFrontMatter = (note: NoteEntity, tagTitles: string[]) => {
 	const md: MdFrontMatterExport = {};
 	// Every variable needs to be converted separately, so they will be handles in groups
 	//
 	// title
 	if (note.title) { md['title'] = note.title; }
 	// source, author
 	if (note.source_url) { md['source'] = note.source_url; }
 	if (note.author) { md['author'] = note.author; }
 	// locations
 	// non-strict inequality is used here to interpret the location strings
 	// as numbers i.e 0.000000 is the same as 0.
 	// This is necessary because these fields are officially numbers, but often
 	// contain strings.
 	// eslint-disable-next-line eqeqeq
 	if (note.latitude != 0 || note.longitude != 0 || note.altitude != 0) {
 		md['latitude'] = note.latitude;
 		md['longitude'] = note.longitude;
 		md['altitude'] = note.altitude;
 	}
 	// todo
 	if (note.is_todo) {
 		// boolean is not support by the yaml FAILSAFE_SCHEMA
 		md['completed?'] = note.todo_completed ? 'yes' : 'no';
 	}
 	if (note.todo_due) { md['due'] = convertDate(note.todo_due); }
 	// time
 	if (note.user_updated_time) { md['updated'] = convertDate(note.user_updated_time); }
 	if (note.user_created_time) { md['created'] = convertDate(note.user_created_time); }
 	// tags
 	if (tagTitles.length) md['tags'] = tagTitles;
 	// This guarentees that fields will always be ordered the same way
 	// which can be useful if users are using this for generating diffs
 	const sort = (a: string, b: string) => {
 		return fieldOrder.indexOf(a) - fieldOrder.indexOf(b);
 	};
 	// The FAILSAFE_SCHEMA along with noCompatMode allows this to export strings that look
 	// like numbers (or yes/no) without the added '' quotes around the text
 	const rawOutput = yaml.dump(md, { sortKeys: sort, noCompatMode: true, schema: yaml.FAILSAFE_SCHEMA });
 	// The additional trimming is the unfortunate result of the yaml library insisting on
 	// quoting negative numbers.
 	// For now the trimQuotes function only trims quotes associated with a negative number
 	// but it can be extended to support more special cases in the future if necessary.
 	return trimQuotes(rawOutput);
 };
 export const serialize = async (modNote: NoteEntity, tagTitles: string[]) => {
 	const noteContent = await Note.replaceResourceInternalToExternalLinks(await Note.serialize(modNote, ['body']));
 	const metadata = noteToFrontMatter(modNote, tagTitles);
 	return `---\n${metadata}---\n\n${noteContent}`;
 };
 function isTruthy(str: string): boolean {
 	return str.toLowerCase() in ['true', 'yes'];
 }
 // Enforces exactly 2 spaces in front of list items
 function normalizeYamlWhitespace(yaml: string[]): string[] {
 	return yaml.map(line => {
 		const l = line.trimStart();
 		if (l.startsWith('-')) {
 			return `  ${l}`;
 		}
 		return line;
 	});
 }
 // This is a helper function to convert an arbitrary author variable into a string
 // the use case is for loading from r-markdown/pandoc style notes
 // references:
 // https://pandoc.org/MANUAL.html#extension-yaml_metadata_block
 // https://github.com/hao203/rmarkdown-YAML
 function extractAuthor(author: unknown): string {
 	if (!author) return '';
 	if (typeof(author) === 'string') {
 		return author;
 	} else if (Array.isArray(author)) {
 		// Joplin only supports a single author, so we take the first one
 		return extractAuthor(author[0]);
 	} else if (typeof(author) === 'object') {
 		if ('name' in author) {
 			return (author as { name: string }).name;
 		}
 	}
 	return '';
 }
 const getNoteHeader = (note: string) => {
 	// Ignore the leading `---`
 	const lines = note.split('\n').slice(1);
 	let inHeader = true;
 	const headerLines: string[] = [];
 	const bodyLines: string[] = [];
 	for (let i = 0; i < lines.length; i++) {
 		const line = lines[i];
 		const nextLine = i + 1 <= lines.length - 1 ? lines[i + 1] : '';
 		if (inHeader && line.startsWith('---')) {
 			inHeader = false;
 			// Need to eat the extra newline after the yaml block. Note that
 			// if the next line is not an empty line, we keep it. Fixes
 			// https://github.com/laurent22/joplin/issues/8802
 			if (nextLine.trim() === '') i++;
 			continue;
 		}
 		if (inHeader) { headerLines.push(line); } else { bodyLines.push(line); }
 	}
 	const normalizedHeaderLines = normalizeYamlWhitespace(headerLines);
 	const header = normalizedHeaderLines.join('\n');
 	const body = bodyLines.join('\n');
 	return { header, body };
 };
 // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
 const toLowerCase = (obj: Record<string, any>): Record<string, any> => {
 	// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
 	const newObj: Record<string, any> = {};
 	for (const key of Object.keys(obj)) {
 		newObj[key.toLowerCase()] = obj[key];
 	}
 	return newObj;
 };
 export const parse = (note: string): ParsedMeta => {
 	if (!note.startsWith('---')) return { metadata: { body: note }, tags: [] };
 	const { header, body } = getNoteHeader(note);
 	const md = toLowerCase(yaml.load(header, { schema: yaml.FAILSAFE_SCHEMA }) ?? {});
 	const metadata: NoteEntity = {
 		title: md['title'] || '',
 		source_url: md['source'] || '',
 		is_todo: ('completed?' in md) ? 1 : 0,
 	};
 	if ('id' in md && typeof md['id'] === 'string' && md.id.match(/^[0-9a-zA-Z]{32}$/)) {
 		metadata['id'] = md.id;
 	}
 	if ('author' in md) { metadata['author'] = extractAuthor(md['author']); }
 	// The date fallback gives support for MultiMarkdown format, r-markdown, and pandoc formats
 	if ('created' in md) {
 		metadata['user_created_time'] = time.anythingToMs(md['created'], Date.now());
 	} else if ('date' in md) {
 		metadata['user_created_time'] = time.anythingToMs(md['date'], Date.now());
 	} else if ('created_at' in md) {
 		// Add support for Notesnook
 		metadata['user_created_time'] = time.anythingToMs(md['created_at'], Date.now());
 	}
 	if ('updated' in md) {
 		metadata['user_updated_time'] = time.anythingToMs(md['updated'], Date.now());
 	} else if ('lastmod' in md) {
 		// Add support for hugo
 		metadata['user_updated_time'] = time.anythingToMs(md['lastmod'], Date.now());
 	} else if ('date' in md) {
 		metadata['user_updated_time'] = time.anythingToMs(md['date'], Date.now());
 	} else if ('updated_at' in md) {
 		// Notesnook
 		metadata['user_updated_time'] = time.anythingToMs(md['updated_at'], Date.now());
 	}
 	if ('latitude' in md) { metadata['latitude'] = md['latitude']; }
 	if ('longitude' in md) { metadata['longitude'] = md['longitude']; }
 	if ('altitude' in md) { metadata['altitude'] = md['altitude']; }
 	if (metadata.is_todo) {
 		if (isTruthy(md['completed?'])) {
 			// Completed time isn't preserved, so we use a sane choice here
 			metadata['todo_completed'] = metadata['user_updated_time'];
 		}
 		if ('due' in md) {
 			const due_date = time.anythingToMs(md['due'], null);
 			if (due_date) { metadata['todo_due'] = due_date; }
 		}
 	}
 	// Tags are handled separately from typical metadata
 	let tags: string[] = [];
 	if ('tags' in md) {
 		// Only create unique tags
 		tags = md['tags'];
 	} else if ('keywords' in md) {
 		// Adding support for r-markdown/pandoc
 		tags = tags.concat(md['keywords']);
 	}
 	// Only create unique tags
 	tags = [...new Set(tags)];
 	metadata['body'] = body;
 	return { metadata, tags };
 };