1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-11-23 22:36:32 +02:00
Files
joplin/packages/lib/services/interop/InteropService_Importer_OneNote.ts

211 lines
7.4 KiB
TypeScript

import { ImportExportResult, ImportModuleOutputFormat, ImportOptions } from './types';
import InteropService_Importer_Base from './InteropService_Importer_Base';
import { NoteEntity } from '../database/types';
import { rtrimSlashes } from '../../path-utils';
import InteropService_Importer_Md from './InteropService_Importer_Md';
import { join, resolve, normalize, sep, dirname, extname, basename } from 'path';
import Logger from '@joplin/utils/Logger';
import { uuidgen } from '../../uuid';
import shim from '../../shim';
const logger = Logger.create('InteropService_Importer_OneNote');
export type SvgXml = {
title: string;
content: string;
};
type ExtractSvgsReturn = {
svgs: SvgXml[];
html: string;
};
// See onenote-converter README.md for more information
export default class InteropService_Importer_OneNote extends InteropService_Importer_Base {
protected importedNotes: Record<string, NoteEntity> = {};
private document: Document = null;
private xmlSerializer: XMLSerializer = null;
public async init(sourcePath: string, options: ImportOptions) {
await super.init(sourcePath, options);
if (!options.document || !options.xmlSerializer) {
throw new Error('OneNote importer requires document and XMLSerializer to be able to extract SVG from HTML.');
}
this.document = options.document;
this.xmlSerializer = options.xmlSerializer;
}
private getEntryDirectory(unzippedPath: string, entryName: string) {
const withoutBasePath = entryName.replace(unzippedPath, '');
return normalize(withoutBasePath).split(sep)[0];
}
private async extractFiles_(sourcePath: string, targetPath: string) {
const fileExtension = extname(sourcePath).toLowerCase();
const fileNameNoExtension = basename(sourcePath, extname(sourcePath));
if (fileExtension === '.zip') {
logger.info('Unzipping files...');
await shim.fsDriver().zipExtract({ source: sourcePath, extractTo: targetPath });
} else if (fileExtension === '.one') {
logger.info('Copying file...');
const outputDirectory = join(targetPath, fileNameNoExtension);
await shim.fsDriver().mkdir(outputDirectory);
await shim.fsDriver().copy(sourcePath, join(outputDirectory, basename(sourcePath)));
} else if (fileExtension === '.onepkg') {
// Change the file extension so that the archive can be extracted
const archivePath = join(targetPath, `${fileNameNoExtension}.cab`);
await shim.fsDriver().copy(sourcePath, archivePath);
const extractPath = join(targetPath, fileNameNoExtension);
await shim.fsDriver().mkdir(extractPath);
await shim.fsDriver().cabExtract({
source: archivePath,
extractTo: extractPath,
// Only the .one files are used--there's no need to extract
// other files.
fileNamePattern: '*.one',
});
} else {
throw new Error(`Unknown file extension: ${fileExtension}`);
}
return await shim.fsDriver().readDirStats(targetPath, { recursive: true });
}
private async execImpl_(result: ImportExportResult, unzipTempDirectory: string, tempOutputDirectory: string) {
const sourcePath = rtrimSlashes(this.sourcePath_);
const files = await this.extractFiles_(sourcePath, unzipTempDirectory);
if (files.length === 0) {
result.warnings.push('Zip file has no files.');
return result;
}
const baseFolder = this.getEntryDirectory(unzipTempDirectory, files[0].path);
const notebookBaseDir = join(unzipTempDirectory, baseFolder, sep);
const outputDirectory2 = join(tempOutputDirectory, baseFolder);
const notebookFiles = files.filter(e => {
return extname(e.path) !== '.onetoc2' && basename(e.path) !== 'OneNote_RecycleBin.onetoc2';
});
const { oneNoteConverter } = shim.requireDynamic('@joplin/onenote-converter');
logger.info('Extracting OneNote to HTML');
const skippedFiles = [];
for (const notebookFile of notebookFiles) {
const notebookFilePath = join(unzipTempDirectory, notebookFile.path);
// In some cases, the OneNote zip file can include folders and other files
// that shouldn't be imported directly. Skip these:
if (!['.one', '.onetoc2'].includes(extname(notebookFilePath).toLowerCase())) {
logger.info('Skipping non-OneNote file:', notebookFile.path);
skippedFiles.push(notebookFile.path);
continue;
}
try {
await oneNoteConverter(notebookFilePath, resolve(outputDirectory2), notebookBaseDir);
} catch (error) {
this.options_.onError?.(error);
console.error(error);
}
}
if (skippedFiles.length === notebookFiles.length) {
this.options_.onError?.(new Error(`None of the files appear to be from OneNote. Skipped files include: ${JSON.stringify(skippedFiles)}`));
}
logger.info('Extracting SVGs into files');
await this.moveSvgToLocalFile(tempOutputDirectory);
logger.info('Importing HTML into Joplin');
const importer = new InteropService_Importer_Md();
importer.setMetadata({ fileExtensions: ['html'] });
await importer.init(tempOutputDirectory, {
...this.options_,
format: 'html',
outputFormat: ImportModuleOutputFormat.Html,
});
logger.info('Finished');
result = await importer.exec(result);
return result;
}
public async exec(result: ImportExportResult) {
const unzipTempDirectory = await this.temporaryDirectory_(true);
const tempOutputDirectory = await this.temporaryDirectory_(true);
try {
return await this.execImpl_(result, unzipTempDirectory, tempOutputDirectory);
} finally {
await shim.fsDriver().remove(unzipTempDirectory);
await shim.fsDriver().remove(tempOutputDirectory);
}
}
private async moveSvgToLocalFile(baseFolder: string) {
const htmlFiles = await this.getValidHtmlFiles(resolve(baseFolder));
for (const file of htmlFiles) {
const fileLocation = join(baseFolder, file.path);
const originalHtml = await shim.fsDriver().readFile(fileLocation);
const { svgs, html: updatedHtml } = this.extractSvgs(originalHtml, () => uuidgen(10));
if (!svgs || !svgs.length) continue;
await shim.fsDriver().writeFile(fileLocation, updatedHtml, 'utf8');
await this.createSvgFiles(svgs, join(baseFolder, dirname(file.path)));
}
}
private async getValidHtmlFiles(baseFolder: string) {
const files = await shim.fsDriver().readDirStats(baseFolder, { recursive: true });
const htmlFiles = files.filter(f => !f.isDirectory() && f.path.endsWith('.html'));
return htmlFiles;
}
private async createSvgFiles(svgs: SvgXml[], svgBaseFolder: string) {
for (const svg of svgs) {
await shim.fsDriver().writeFile(join(svgBaseFolder, svg.title), svg.content, 'utf8');
}
}
public extractSvgs(html: string, titleGenerator: ()=> string): ExtractSvgsReturn {
const htmlDocument = this.document.implementation.createHTMLDocument('htmlDocument');
const root = htmlDocument.createElement('html');
const body = htmlDocument.createElement('body');
root.appendChild(body);
root.innerHTML = html;
// get all "top-level" SVGS (ignore nested)
const svgNodeList = root.querySelectorAll('svg');
if (!svgNodeList || !svgNodeList.length) {
return { svgs: [], html };
}
const svgs: SvgXml[] = [];
for (const svgNode of svgNodeList) {
const title = `${titleGenerator()}.svg`;
const img = htmlDocument.createElement('img');
img.setAttribute('style', svgNode.getAttribute('style'));
img.setAttribute('src', `./${title}`);
svgNode.removeAttribute('style');
svgs.push({
title,
content: this.xmlSerializer.serializeToString(svgNode),
});
svgNode.parentElement.replaceChild(img, svgNode);
}
return {
svgs,
html: this.xmlSerializer.serializeToString(root),
};
}
}