You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-11-23 22:36:32 +02:00
211 lines
7.4 KiB
TypeScript
211 lines
7.4 KiB
TypeScript
import { ImportExportResult, ImportModuleOutputFormat, ImportOptions } from './types';
|
|
|
|
import InteropService_Importer_Base from './InteropService_Importer_Base';
|
|
import { NoteEntity } from '../database/types';
|
|
import { rtrimSlashes } from '../../path-utils';
|
|
import InteropService_Importer_Md from './InteropService_Importer_Md';
|
|
import { join, resolve, normalize, sep, dirname, extname, basename } from 'path';
|
|
import Logger from '@joplin/utils/Logger';
|
|
import { uuidgen } from '../../uuid';
|
|
import shim from '../../shim';
|
|
|
|
const logger = Logger.create('InteropService_Importer_OneNote');
|
|
|
|
export type SvgXml = {
|
|
title: string;
|
|
content: string;
|
|
};
|
|
|
|
type ExtractSvgsReturn = {
|
|
svgs: SvgXml[];
|
|
html: string;
|
|
};
|
|
|
|
// See onenote-converter README.md for more information
|
|
export default class InteropService_Importer_OneNote extends InteropService_Importer_Base {
|
|
protected importedNotes: Record<string, NoteEntity> = {};
|
|
private document: Document = null;
|
|
private xmlSerializer: XMLSerializer = null;
|
|
|
|
public async init(sourcePath: string, options: ImportOptions) {
|
|
await super.init(sourcePath, options);
|
|
if (!options.document || !options.xmlSerializer) {
|
|
throw new Error('OneNote importer requires document and XMLSerializer to be able to extract SVG from HTML.');
|
|
}
|
|
this.document = options.document;
|
|
this.xmlSerializer = options.xmlSerializer;
|
|
}
|
|
|
|
private getEntryDirectory(unzippedPath: string, entryName: string) {
|
|
const withoutBasePath = entryName.replace(unzippedPath, '');
|
|
return normalize(withoutBasePath).split(sep)[0];
|
|
}
|
|
|
|
private async extractFiles_(sourcePath: string, targetPath: string) {
|
|
const fileExtension = extname(sourcePath).toLowerCase();
|
|
const fileNameNoExtension = basename(sourcePath, extname(sourcePath));
|
|
if (fileExtension === '.zip') {
|
|
logger.info('Unzipping files...');
|
|
await shim.fsDriver().zipExtract({ source: sourcePath, extractTo: targetPath });
|
|
} else if (fileExtension === '.one') {
|
|
logger.info('Copying file...');
|
|
|
|
const outputDirectory = join(targetPath, fileNameNoExtension);
|
|
await shim.fsDriver().mkdir(outputDirectory);
|
|
|
|
await shim.fsDriver().copy(sourcePath, join(outputDirectory, basename(sourcePath)));
|
|
} else if (fileExtension === '.onepkg') {
|
|
// Change the file extension so that the archive can be extracted
|
|
const archivePath = join(targetPath, `${fileNameNoExtension}.cab`);
|
|
await shim.fsDriver().copy(sourcePath, archivePath);
|
|
|
|
const extractPath = join(targetPath, fileNameNoExtension);
|
|
await shim.fsDriver().mkdir(extractPath);
|
|
|
|
await shim.fsDriver().cabExtract({
|
|
source: archivePath,
|
|
extractTo: extractPath,
|
|
// Only the .one files are used--there's no need to extract
|
|
// other files.
|
|
fileNamePattern: '*.one',
|
|
});
|
|
} else {
|
|
throw new Error(`Unknown file extension: ${fileExtension}`);
|
|
}
|
|
return await shim.fsDriver().readDirStats(targetPath, { recursive: true });
|
|
}
|
|
|
|
private async execImpl_(result: ImportExportResult, unzipTempDirectory: string, tempOutputDirectory: string) {
|
|
const sourcePath = rtrimSlashes(this.sourcePath_);
|
|
const files = await this.extractFiles_(sourcePath, unzipTempDirectory);
|
|
|
|
if (files.length === 0) {
|
|
result.warnings.push('Zip file has no files.');
|
|
return result;
|
|
}
|
|
|
|
const baseFolder = this.getEntryDirectory(unzipTempDirectory, files[0].path);
|
|
const notebookBaseDir = join(unzipTempDirectory, baseFolder, sep);
|
|
const outputDirectory2 = join(tempOutputDirectory, baseFolder);
|
|
|
|
const notebookFiles = files.filter(e => {
|
|
return extname(e.path) !== '.onetoc2' && basename(e.path) !== 'OneNote_RecycleBin.onetoc2';
|
|
});
|
|
const { oneNoteConverter } = shim.requireDynamic('@joplin/onenote-converter');
|
|
|
|
logger.info('Extracting OneNote to HTML');
|
|
const skippedFiles = [];
|
|
for (const notebookFile of notebookFiles) {
|
|
const notebookFilePath = join(unzipTempDirectory, notebookFile.path);
|
|
// In some cases, the OneNote zip file can include folders and other files
|
|
// that shouldn't be imported directly. Skip these:
|
|
if (!['.one', '.onetoc2'].includes(extname(notebookFilePath).toLowerCase())) {
|
|
logger.info('Skipping non-OneNote file:', notebookFile.path);
|
|
skippedFiles.push(notebookFile.path);
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
await oneNoteConverter(notebookFilePath, resolve(outputDirectory2), notebookBaseDir);
|
|
} catch (error) {
|
|
this.options_.onError?.(error);
|
|
console.error(error);
|
|
}
|
|
}
|
|
|
|
if (skippedFiles.length === notebookFiles.length) {
|
|
this.options_.onError?.(new Error(`None of the files appear to be from OneNote. Skipped files include: ${JSON.stringify(skippedFiles)}`));
|
|
}
|
|
|
|
logger.info('Extracting SVGs into files');
|
|
await this.moveSvgToLocalFile(tempOutputDirectory);
|
|
|
|
logger.info('Importing HTML into Joplin');
|
|
const importer = new InteropService_Importer_Md();
|
|
importer.setMetadata({ fileExtensions: ['html'] });
|
|
await importer.init(tempOutputDirectory, {
|
|
...this.options_,
|
|
format: 'html',
|
|
outputFormat: ImportModuleOutputFormat.Html,
|
|
});
|
|
logger.info('Finished');
|
|
result = await importer.exec(result);
|
|
return result;
|
|
}
|
|
|
|
public async exec(result: ImportExportResult) {
|
|
const unzipTempDirectory = await this.temporaryDirectory_(true);
|
|
const tempOutputDirectory = await this.temporaryDirectory_(true);
|
|
try {
|
|
return await this.execImpl_(result, unzipTempDirectory, tempOutputDirectory);
|
|
} finally {
|
|
await shim.fsDriver().remove(unzipTempDirectory);
|
|
await shim.fsDriver().remove(tempOutputDirectory);
|
|
}
|
|
}
|
|
|
|
private async moveSvgToLocalFile(baseFolder: string) {
|
|
const htmlFiles = await this.getValidHtmlFiles(resolve(baseFolder));
|
|
|
|
for (const file of htmlFiles) {
|
|
const fileLocation = join(baseFolder, file.path);
|
|
const originalHtml = await shim.fsDriver().readFile(fileLocation);
|
|
const { svgs, html: updatedHtml } = this.extractSvgs(originalHtml, () => uuidgen(10));
|
|
|
|
if (!svgs || !svgs.length) continue;
|
|
|
|
await shim.fsDriver().writeFile(fileLocation, updatedHtml, 'utf8');
|
|
await this.createSvgFiles(svgs, join(baseFolder, dirname(file.path)));
|
|
}
|
|
}
|
|
|
|
private async getValidHtmlFiles(baseFolder: string) {
|
|
const files = await shim.fsDriver().readDirStats(baseFolder, { recursive: true });
|
|
const htmlFiles = files.filter(f => !f.isDirectory() && f.path.endsWith('.html'));
|
|
return htmlFiles;
|
|
}
|
|
|
|
private async createSvgFiles(svgs: SvgXml[], svgBaseFolder: string) {
|
|
for (const svg of svgs) {
|
|
await shim.fsDriver().writeFile(join(svgBaseFolder, svg.title), svg.content, 'utf8');
|
|
}
|
|
}
|
|
|
|
public extractSvgs(html: string, titleGenerator: ()=> string): ExtractSvgsReturn {
|
|
const htmlDocument = this.document.implementation.createHTMLDocument('htmlDocument');
|
|
const root = htmlDocument.createElement('html');
|
|
const body = htmlDocument.createElement('body');
|
|
root.appendChild(body);
|
|
root.innerHTML = html;
|
|
|
|
// get all "top-level" SVGS (ignore nested)
|
|
const svgNodeList = root.querySelectorAll('svg');
|
|
|
|
if (!svgNodeList || !svgNodeList.length) {
|
|
return { svgs: [], html };
|
|
}
|
|
|
|
const svgs: SvgXml[] = [];
|
|
|
|
for (const svgNode of svgNodeList) {
|
|
const title = `${titleGenerator()}.svg`;
|
|
const img = htmlDocument.createElement('img');
|
|
img.setAttribute('style', svgNode.getAttribute('style'));
|
|
img.setAttribute('src', `./${title}`);
|
|
svgNode.removeAttribute('style');
|
|
|
|
svgs.push({
|
|
title,
|
|
content: this.xmlSerializer.serializeToString(svgNode),
|
|
});
|
|
|
|
svgNode.parentElement.replaceChild(img, svgNode);
|
|
}
|
|
|
|
return {
|
|
svgs,
|
|
html: this.xmlSerializer.serializeToString(root),
|
|
};
|
|
}
|
|
}
|