1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-11 18:24:43 +02:00

Desktop: Resolves #9293: Preserve nested tables in RTE

This commit is contained in:
Laurent Cozic 2023-11-13 14:34:30 +00:00
parent 9a06e59cfe
commit 9923e5c821
7 changed files with 54 additions and 15 deletions

View File

@ -36,6 +36,10 @@ describe('HtmlToMd', () => {
htmlToMdOptions.preserveImageTagsWithSize = true;
}
if (htmlFilename.indexOf('preserve_nested_tables') === 0) {
htmlToMdOptions.preserveNestedTables = true;
}
const html = await readFile(htmlPath, 'utf8');
let expectedMd = await readFile(mdPath, 'utf8');

View File

@ -0,0 +1,19 @@
<body>
<table border="5px" bordercolor="#8707B0">
<tr>
<td>Left side of the main table</td>
<td>
<table border="5px" bordercolor="#F35557">
<h4 align="center">Nested Table</h4>
<tr>
<td>nested table C1</td>
<td>nested table C2</td>
</tr>
<tr>
<td>nested table</td>
<td>nested table</td>
</tr>
</table>
</td>
</tr>
</table>

View File

@ -0,0 +1 @@
<table border="5px" bordercolor="#8707B0"><tbody><tr><td>Left side of the main table</td><td><h4 align="center">Nested Table</h4><table border="5px" bordercolor="#F35557"><tbody><tr><td>nested table C1</td><td>nested table C2</td></tr><tr><td>nested table</td><td>nested table</td></tr></tbody></table></td></tr></tbody></table>

View File

@ -9,7 +9,10 @@ export async function htmlToMarkdown(markupLanguage: number, html: string, origi
if (markupLanguage === MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN) {
const htmlToMd = new HtmlToMd();
newBody = htmlToMd.parse(html, { preserveImageTagsWithSize: true });
newBody = htmlToMd.parse(html, {
preserveImageTagsWithSize: true,
preserveNestedTables: true,
});
newBody = await Note.replaceResourceExternalToInternalLinks(newBody, { useAbsolutePaths: true });
} else {
newBody = await Note.replaceResourceExternalToInternalLinks(html, { useAbsolutePaths: true });

View File

@ -7,6 +7,7 @@ const pdfUrlRegex = /[\s\S]*?\.pdf$/i;
export interface ParseOptions {
anchorNames?: string[];
preserveImageTagsWithSize?: boolean;
preserveNestedTables?: boolean;
baseUrl?: string;
disableEscapeContent?: boolean;
convertEmbeddedPdfsToLinks?: boolean;
@ -20,6 +21,7 @@ export default class HtmlToMd {
anchorNames: options.anchorNames ? options.anchorNames.map(n => n.trim().toLowerCase()) : [],
codeBlockStyle: 'fenced',
preserveImageTagsWithSize: !!options.preserveImageTagsWithSize,
preserveNestedTables: !!options.preserveNestedTables,
bulletListMarker: '-',
emDelimiter: '*',
strongDelimiter: '**',

View File

@ -174,20 +174,29 @@ const nodeContains = (node, types) => {
return false;
}
const tableShouldBeHtml = (tableNode) => {
const tableShouldBeHtml = (tableNode, preserveNestedTables) => {
const possibleTags = [
'UL',
'OL',
'H1',
'H2',
'H3',
'H4',
'H5',
'H6',
'HR',
'BLOCKQUOTE',
];
// In general we should leave as HTML tables that include other tables. The
// exception is with the Web Clipper when we import a web page with a layout
// that's made of HTML tables. In that case we have this logic of removing the
// outer table and keeping only the inner ones. For the Rich Text editor
// however we always want to keep nested tables.
if (preserveNestedTables) possibleTags.push('TABLE');
return nodeContains(tableNode, 'code') ||
nodeContains(tableNode, [
'UL',
'OL',
'H1',
'H2',
'H3',
'H4',
'H5',
'H6',
'HR',
'BLOCKQUOTE',
]);
nodeContains(tableNode, possibleTags);
}
// Various conditions under which a table should be skipped - i.e. each cell
@ -240,7 +249,7 @@ export default function tables (turndownService) {
isCodeBlock_ = turndownService.isCodeBlock;
turndownService.keep(function (node) {
if (node.nodeName === 'TABLE' && tableShouldBeHtml(node)) return true;
if (node.nodeName === 'TABLE' && tableShouldBeHtml(node, turndownService.options.preserveNestedTables)) return true;
return false;
});
for (var key in rules) turndownService.addRule(key, rules[key])

View File

@ -39,6 +39,7 @@ export default function TurndownService (options) {
br: ' ',
disableEscapeContent: false,
preformattedCode: false,
preserveNestedTables: false,
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},