1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-21 09:38:01 +02:00

Chore: Markdown + fontmatter import and export: Extract frontMatter logic into a separate file (#10508)

Co-authored-by: Laurent Cozic <laurent22@users.noreply.github.com>
This commit is contained in:
Henry Heino 2024-05-30 00:40:52 -07:00 committed by GitHub
parent 99b840da34
commit f938d5f489
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 266 additions and 260 deletions

View File

@ -1209,6 +1209,7 @@ packages/lib/utils/ActionLogger.test.js
packages/lib/utils/ActionLogger.js packages/lib/utils/ActionLogger.js
packages/lib/utils/credentialFiles.js packages/lib/utils/credentialFiles.js
packages/lib/utils/focusHandler.js packages/lib/utils/focusHandler.js
packages/lib/utils/frontMatter.js
packages/lib/utils/ipc/RemoteMessenger.test.js packages/lib/utils/ipc/RemoteMessenger.test.js
packages/lib/utils/ipc/RemoteMessenger.js packages/lib/utils/ipc/RemoteMessenger.js
packages/lib/utils/ipc/TestMessenger.js packages/lib/utils/ipc/TestMessenger.js

1
.gitignore vendored
View File

@ -1188,6 +1188,7 @@ packages/lib/utils/ActionLogger.test.js
packages/lib/utils/ActionLogger.js packages/lib/utils/ActionLogger.js
packages/lib/utils/credentialFiles.js packages/lib/utils/credentialFiles.js
packages/lib/utils/focusHandler.js packages/lib/utils/focusHandler.js
packages/lib/utils/frontMatter.js
packages/lib/utils/ipc/RemoteMessenger.test.js packages/lib/utils/ipc/RemoteMessenger.test.js
packages/lib/utils/ipc/RemoteMessenger.js packages/lib/utils/ipc/RemoteMessenger.js
packages/lib/utils/ipc/TestMessenger.js packages/lib/utils/ipc/TestMessenger.js

View File

@ -4,9 +4,9 @@ import Folder from '../../models/Folder';
import Note from '../../models/Note'; import Note from '../../models/Note';
import Tag from '../../models/Tag'; import Tag from '../../models/Tag';
import time from '../../time'; import time from '../../time';
import { fieldOrder } from './InteropService_Exporter_Md_frontmatter';
import * as fs from 'fs-extra'; import * as fs from 'fs-extra';
import { ExportModuleOutputFormat } from './types'; import { ExportModuleOutputFormat } from './types';
import { fieldOrder } from '../../utils/frontMatter';
async function recreateExportDir() { async function recreateExportDir() {
const dir = exportDir(); const dir = exportDir();

View File

@ -1,13 +1,9 @@
import InteropService_Exporter_Md from './InteropService_Exporter_Md'; import InteropService_Exporter_Md from './InteropService_Exporter_Md';
import BaseModel from '../../BaseModel'; import BaseModel from '../../BaseModel';
import Note from '../../models/Note';
import NoteTag from '../../models/NoteTag'; import NoteTag from '../../models/NoteTag';
import Tag from '../../models/Tag'; import Tag from '../../models/Tag';
import time from '../../time';
import { NoteEntity } from '../database/types'; import { NoteEntity } from '../database/types';
import { MdFrontMatterExport } from './types'; import { serialize } from '../../utils/frontMatter';
import * as yaml from 'js-yaml';
interface NoteTagContext { interface NoteTagContext {
noteTags: Record<string, string[]>; noteTags: Record<string, string[]>;
@ -19,33 +15,6 @@ interface TagContext {
interface FrontMatterContext extends NoteTagContext, TagContext {} interface FrontMatterContext extends NoteTagContext, TagContext {}
// There is a special case (negative numbers) where the yaml library will force quotations
// These need to be stripped
function trimQuotes(rawOutput: string): string {
return rawOutput.split('\n').map(line => {
const index = line.indexOf(': \'-');
const indexWithSpace = line.indexOf(': \'- ');
// We don't apply this processing if the string starts with a dash
// followed by a space. Those should actually be in quotes, otherwise
// they are detected as invalid list items when we later try to import
// the file.
if (index === indexWithSpace) return line;
if (index >= 0) {
// The plus 2 eats the : and space characters
const start = line.substring(0, index + 2);
// The plus 3 eats the quote character
const end = line.substring(index + 3, line.length - 1);
return start + end;
}
return line;
}).join('\n');
}
export const fieldOrder = ['title', 'updated', 'created', 'source', 'author', 'latitude', 'longitude', 'altitude', 'completed?', 'due', 'tags'];
export default class InteropService_Exporter_Md_frontmatter extends InteropService_Exporter_Md { export default class InteropService_Exporter_Md_frontmatter extends InteropService_Exporter_Md {
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
@ -94,78 +63,17 @@ export default class InteropService_Exporter_Md_frontmatter extends InteropServi
} }
} }
private convertDate(datetime: number): string { protected async getNoteExportContent_(modNote: NoteEntity) {
return time.unixMsToRfc3339Sec(datetime); let tagTitles: string[] = [];
}
private extractMetadata(note: NoteEntity) {
const md: MdFrontMatterExport = {};
// Every variable needs to be converted separately, so they will be handles in groups
//
// title
if (note.title) { md['title'] = note.title; }
// source, author
if (note.source_url) { md['source'] = note.source_url; }
if (note.author) { md['author'] = note.author; }
// locations
// non-strict inequality is used here to interpret the location strings
// as numbers i.e 0.000000 is the same as 0.
// This is necessary because these fields are officially numbers, but often
// contain strings.
// eslint-disable-next-line eqeqeq
if (note.latitude != 0 || note.longitude != 0 || note.altitude != 0) {
md['latitude'] = note.latitude;
md['longitude'] = note.longitude;
md['altitude'] = note.altitude;
}
// todo
if (note.is_todo) {
// boolean is not support by the yaml FAILSAFE_SCHEMA
md['completed?'] = note.todo_completed ? 'yes' : 'no';
}
if (note.todo_due) { md['due'] = this.convertDate(note.todo_due); }
// time
if (note.user_updated_time) { md['updated'] = this.convertDate(note.user_updated_time); }
if (note.user_created_time) { md['created'] = this.convertDate(note.user_created_time); }
// tags
const context: FrontMatterContext = this.context(); const context: FrontMatterContext = this.context();
if (context.noteTags[note.id]) { if (context.noteTags[modNote.id]) {
const tagIds = context.noteTags[note.id]; const tagIds = context.noteTags[modNote.id];
// In some cases a NoteTag can still exist, while the Tag does not. In this case, tagTitles // In some cases a NoteTag can still exist, while the Tag does not. In this case, tagTitles
// for that tagId will return undefined, which can't be handled by the yaml library (issue #7782) // for that tagId will return undefined, which can't be handled by the yaml library (issue #7782)
const tags = tagIds.map((id: string) => context.tagTitles[id]).filter(e => !!e).sort(); tagTitles = tagIds.map((id: string) => context.tagTitles[id]).filter(e => !!e).sort();
if (tags.length > 0) {
md['tags'] = tags;
}
} }
// This guarentees that fields will always be ordered the same way return serialize(modNote, tagTitles);
// which can be useful if users are using this for generating diffs
const sort = (a: string, b: string) => {
return fieldOrder.indexOf(a) - fieldOrder.indexOf(b);
};
// The FAILSAFE_SCHEMA along with noCompatMode allows this to export strings that look
// like numbers (or yes/no) without the added '' quotes around the text
const rawOutput = yaml.dump(md, { sortKeys: sort, noCompatMode: true, schema: yaml.FAILSAFE_SCHEMA });
// The additional trimming is the unfortunate result of the yaml library insisting on
// quoting negative numbers.
// For now the trimQuotes function only trims quotes associated with a negative number
// but it can be extended to support more special cases in the future if necessary.
return trimQuotes(rawOutput);
}
protected async getNoteExportContent_(modNote: NoteEntity) {
const noteContent = await Note.replaceResourceInternalToExternalLinks(await Note.serialize(modNote, ['body']));
const metadata = this.extractMetadata(modNote);
return `---\n${metadata}---\n\n${noteContent}`;
} }
} }

View File

@ -1,173 +1,15 @@
import InteropService_Importer_Md from './InteropService_Importer_Md'; import InteropService_Importer_Md from './InteropService_Importer_Md';
import Note from '../../models/Note'; import Note from '../../models/Note';
import Tag from '../../models/Tag'; import Tag from '../../models/Tag';
import time from '../../time';
import { NoteEntity } from '../database/types';
import * as yaml from 'js-yaml';
import shim from '../../shim'; import shim from '../../shim';
import { parse } from '../../utils/frontMatter';
interface ParsedMeta {
metadata: NoteEntity;
tags: string[];
}
function isTruthy(str: string): boolean {
return str.toLowerCase() in ['true', 'yes'];
}
// Enforces exactly 2 spaces in front of list items
function normalizeYamlWhitespace(yaml: string[]): string[] {
return yaml.map(line => {
const l = line.trimStart();
if (l.startsWith('-')) {
return ` ${l}`;
}
return line;
});
}
// This is a helper function to convert an arbitrary author variable into a string
// the use case is for loading from r-markdown/pandoc style notes
// references:
// https://pandoc.org/MANUAL.html#extension-yaml_metadata_block
// https://github.com/hao203/rmarkdown-YAML
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
function extractAuthor(author: any): string {
if (!author) return '';
if (typeof(author) === 'string') {
return author;
} else if (Array.isArray(author)) {
// Joplin only supports a single author, so we take the first one
return extractAuthor(author[0]);
} else if (typeof(author) === 'object') {
if ('name' in author) {
return author['name'];
}
}
return '';
}
export default class InteropService_Importer_Md_frontmatter extends InteropService_Importer_Md { export default class InteropService_Importer_Md_frontmatter extends InteropService_Importer_Md {
private getNoteHeader(note: string) {
// Ignore the leading `---`
const lines = note.split('\n').slice(1);
let inHeader = true;
const headerLines: string[] = [];
const bodyLines: string[] = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const nextLine = i + 1 <= lines.length - 1 ? lines[i + 1] : '';
if (inHeader && line.startsWith('---')) {
inHeader = false;
// Need to eat the extra newline after the yaml block. Note that
// if the next line is not an empty line, we keep it. Fixes
// https://github.com/laurent22/joplin/issues/8802
if (nextLine.trim() === '') i++;
continue;
}
if (inHeader) { headerLines.push(line); } else { bodyLines.push(line); }
}
const normalizedHeaderLines = normalizeYamlWhitespace(headerLines);
const header = normalizedHeaderLines.join('\n');
const body = bodyLines.join('\n');
return { header, body };
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
private toLowerCase(obj: Record<string, any>): Record<string, any> {
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
const newObj: Record<string, any> = {};
for (const key of Object.keys(obj)) {
newObj[key.toLowerCase()] = obj[key];
}
return newObj;
}
private parseYamlNote(note: string): ParsedMeta {
if (!note.startsWith('---')) return { metadata: { body: note }, tags: [] };
const { header, body } = this.getNoteHeader(note);
const md = this.toLowerCase(yaml.load(header, { schema: yaml.FAILSAFE_SCHEMA }));
const metadata: NoteEntity = {
title: md['title'] || '',
source_url: md['source'] || '',
is_todo: ('completed?' in md) ? 1 : 0,
};
if ('author' in md) { metadata['author'] = extractAuthor(md['author']); }
// The date fallback gives support for MultiMarkdown format, r-markdown, and pandoc formats
if ('created' in md) {
metadata['user_created_time'] = time.anythingToMs(md['created'], Date.now());
} else if ('date' in md) {
metadata['user_created_time'] = time.anythingToMs(md['date'], Date.now());
} else if ('created_at' in md) {
// Add support for Notesnook
metadata['user_created_time'] = time.anythingToMs(md['created_at'], Date.now());
}
if ('updated' in md) {
metadata['user_updated_time'] = time.anythingToMs(md['updated'], Date.now());
} else if ('lastmod' in md) {
// Add support for hugo
metadata['user_updated_time'] = time.anythingToMs(md['lastmod'], Date.now());
} else if ('date' in md) {
metadata['user_updated_time'] = time.anythingToMs(md['date'], Date.now());
} else if ('updated_at' in md) {
// Notesnook
metadata['user_updated_time'] = time.anythingToMs(md['updated_at'], Date.now());
}
if ('latitude' in md) { metadata['latitude'] = md['latitude']; }
if ('longitude' in md) { metadata['longitude'] = md['longitude']; }
if ('altitude' in md) { metadata['altitude'] = md['altitude']; }
if (metadata.is_todo) {
if (isTruthy(md['completed?'])) {
// Completed time isn't preserved, so we use a sane choice here
metadata['todo_completed'] = metadata['user_updated_time'];
}
if ('due' in md) {
const due_date = time.anythingToMs(md['due'], null);
if (due_date) { metadata['todo_due'] = due_date; }
}
}
// Tags are handled separately from typical metadata
let tags: string[] = [];
if ('tags' in md) {
// Only create unique tags
tags = md['tags'];
} else if ('keywords' in md) {
// Adding support for r-markdown/pandoc
tags = tags.concat(md['keywords']);
}
// Only create unique tags
tags = [...new Set(tags)];
metadata['body'] = body;
return { metadata, tags };
}
public async importFile(filePath: string, parentFolderId: string) { public async importFile(filePath: string, parentFolderId: string) {
try { try {
const note = await super.importFile(filePath, parentFolderId); const note = await super.importFile(filePath, parentFolderId);
const { metadata, tags } = this.parseYamlNote(note.body); const { metadata, tags } = parse(note.body);
const updatedNote = { ...note, ...metadata }; const updatedNote = { ...note, ...metadata };

View File

@ -0,0 +1,254 @@
import Note from '../models/Note';
import { NoteEntity } from '../services/database/types';
import { MdFrontMatterExport } from '../services/interop/types';
import time from '../time';
import * as yaml from 'js-yaml';
export interface ParsedMeta {
metadata: NoteEntity;
tags: string[];
}
const convertDate = (datetime: number): string => {
return time.unixMsToRfc3339Sec(datetime);
};
export const fieldOrder = ['title', 'id', 'updated', 'created', 'source', 'author', 'latitude', 'longitude', 'altitude', 'completed?', 'due', 'tags'];
// There is a special case (negative numbers) where the yaml library will force quotations
// These need to be stripped
function trimQuotes(rawOutput: string): string {
return rawOutput.split('\n').map(line => {
const index = line.indexOf(': \'-');
const indexWithSpace = line.indexOf(': \'- ');
// We don't apply this processing if the string starts with a dash
// followed by a space. Those should actually be in quotes, otherwise
// they are detected as invalid list items when we later try to import
// the file.
if (index === indexWithSpace) return line;
if (index >= 0) {
// The plus 2 eats the : and space characters
const start = line.substring(0, index + 2);
// The plus 3 eats the quote character
const end = line.substring(index + 3, line.length - 1);
return start + end;
}
return line;
}).join('\n');
}
export const noteToFrontMatter = (note: NoteEntity, tagTitles: string[]) => {
const md: MdFrontMatterExport = {};
// Every variable needs to be converted separately, so they will be handles in groups
//
// title
if (note.title) { md['title'] = note.title; }
// source, author
if (note.source_url) { md['source'] = note.source_url; }
if (note.author) { md['author'] = note.author; }
// locations
// non-strict inequality is used here to interpret the location strings
// as numbers i.e 0.000000 is the same as 0.
// This is necessary because these fields are officially numbers, but often
// contain strings.
// eslint-disable-next-line eqeqeq
if (note.latitude != 0 || note.longitude != 0 || note.altitude != 0) {
md['latitude'] = note.latitude;
md['longitude'] = note.longitude;
md['altitude'] = note.altitude;
}
// todo
if (note.is_todo) {
// boolean is not support by the yaml FAILSAFE_SCHEMA
md['completed?'] = note.todo_completed ? 'yes' : 'no';
}
if (note.todo_due) { md['due'] = convertDate(note.todo_due); }
// time
if (note.user_updated_time) { md['updated'] = convertDate(note.user_updated_time); }
if (note.user_created_time) { md['created'] = convertDate(note.user_created_time); }
// tags
if (tagTitles.length) md['tags'] = tagTitles;
// This guarentees that fields will always be ordered the same way
// which can be useful if users are using this for generating diffs
const sort = (a: string, b: string) => {
return fieldOrder.indexOf(a) - fieldOrder.indexOf(b);
};
// The FAILSAFE_SCHEMA along with noCompatMode allows this to export strings that look
// like numbers (or yes/no) without the added '' quotes around the text
const rawOutput = yaml.dump(md, { sortKeys: sort, noCompatMode: true, schema: yaml.FAILSAFE_SCHEMA });
// The additional trimming is the unfortunate result of the yaml library insisting on
// quoting negative numbers.
// For now the trimQuotes function only trims quotes associated with a negative number
// but it can be extended to support more special cases in the future if necessary.
return trimQuotes(rawOutput);
};
export const serialize = async (modNote: NoteEntity, tagTitles: string[]) => {
const noteContent = await Note.replaceResourceInternalToExternalLinks(await Note.serialize(modNote, ['body']));
const metadata = noteToFrontMatter(modNote, tagTitles);
return `---\n${metadata}---\n\n${noteContent}`;
};
function isTruthy(str: string): boolean {
return str.toLowerCase() in ['true', 'yes'];
}
// Enforces exactly 2 spaces in front of list items
function normalizeYamlWhitespace(yaml: string[]): string[] {
return yaml.map(line => {
const l = line.trimStart();
if (l.startsWith('-')) {
return ` ${l}`;
}
return line;
});
}
// This is a helper function to convert an arbitrary author variable into a string
// the use case is for loading from r-markdown/pandoc style notes
// references:
// https://pandoc.org/MANUAL.html#extension-yaml_metadata_block
// https://github.com/hao203/rmarkdown-YAML
function extractAuthor(author: unknown): string {
if (!author) return '';
if (typeof(author) === 'string') {
return author;
} else if (Array.isArray(author)) {
// Joplin only supports a single author, so we take the first one
return extractAuthor(author[0]);
} else if (typeof(author) === 'object') {
if ('name' in author) {
return (author as { name: string }).name;
}
}
return '';
}
const getNoteHeader = (note: string) => {
// Ignore the leading `---`
const lines = note.split('\n').slice(1);
let inHeader = true;
const headerLines: string[] = [];
const bodyLines: string[] = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const nextLine = i + 1 <= lines.length - 1 ? lines[i + 1] : '';
if (inHeader && line.startsWith('---')) {
inHeader = false;
// Need to eat the extra newline after the yaml block. Note that
// if the next line is not an empty line, we keep it. Fixes
// https://github.com/laurent22/joplin/issues/8802
if (nextLine.trim() === '') i++;
continue;
}
if (inHeader) { headerLines.push(line); } else { bodyLines.push(line); }
}
const normalizedHeaderLines = normalizeYamlWhitespace(headerLines);
const header = normalizedHeaderLines.join('\n');
const body = bodyLines.join('\n');
return { header, body };
};
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
const toLowerCase = (obj: Record<string, any>): Record<string, any> => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
const newObj: Record<string, any> = {};
for (const key of Object.keys(obj)) {
newObj[key.toLowerCase()] = obj[key];
}
return newObj;
};
export const parse = (note: string): ParsedMeta => {
if (!note.startsWith('---')) return { metadata: { body: note }, tags: [] };
const { header, body } = getNoteHeader(note);
const md = toLowerCase(yaml.load(header, { schema: yaml.FAILSAFE_SCHEMA }) ?? {});
const metadata: NoteEntity = {
title: md['title'] || '',
source_url: md['source'] || '',
is_todo: ('completed?' in md) ? 1 : 0,
};
if ('id' in md && typeof md['id'] === 'string' && md.id.match(/^[0-9a-zA-Z]{32}$/)) {
metadata['id'] = md.id;
}
if ('author' in md) { metadata['author'] = extractAuthor(md['author']); }
// The date fallback gives support for MultiMarkdown format, r-markdown, and pandoc formats
if ('created' in md) {
metadata['user_created_time'] = time.anythingToMs(md['created'], Date.now());
} else if ('date' in md) {
metadata['user_created_time'] = time.anythingToMs(md['date'], Date.now());
} else if ('created_at' in md) {
// Add support for Notesnook
metadata['user_created_time'] = time.anythingToMs(md['created_at'], Date.now());
}
if ('updated' in md) {
metadata['user_updated_time'] = time.anythingToMs(md['updated'], Date.now());
} else if ('lastmod' in md) {
// Add support for hugo
metadata['user_updated_time'] = time.anythingToMs(md['lastmod'], Date.now());
} else if ('date' in md) {
metadata['user_updated_time'] = time.anythingToMs(md['date'], Date.now());
} else if ('updated_at' in md) {
// Notesnook
metadata['user_updated_time'] = time.anythingToMs(md['updated_at'], Date.now());
}
if ('latitude' in md) { metadata['latitude'] = md['latitude']; }
if ('longitude' in md) { metadata['longitude'] = md['longitude']; }
if ('altitude' in md) { metadata['altitude'] = md['altitude']; }
if (metadata.is_todo) {
if (isTruthy(md['completed?'])) {
// Completed time isn't preserved, so we use a sane choice here
metadata['todo_completed'] = metadata['user_updated_time'];
}
if ('due' in md) {
const due_date = time.anythingToMs(md['due'], null);
if (due_date) { metadata['todo_due'] = due_date; }
}
}
// Tags are handled separately from typical metadata
let tags: string[] = [];
if ('tags' in md) {
// Only create unique tags
tags = md['tags'];
} else if ('keywords' in md) {
// Adding support for r-markdown/pandoc
tags = tags.concat(md['keywords']);
}
// Only create unique tags
tags = [...new Set(tags)];
metadata['body'] = body;
return { metadata, tags };
};