1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-24 10:27:10 +02:00

Desktop, Cli: Improved bold formatting support in Enex import (#1708)

* Dekstop,CLI: enex_to_md: add html/md test file pairs

* one pair for basic text formatting tags: strong, b, i, em
* and one using span tags with inline styles for bold formatting

Note: The html files include the Evernote-typical "linebreak tags inside of separate <div> tags"
to represent empty lines!

* Desktop,Cli: enex_to_md: support bold in span tags using inline styles

* function isSpanWithStyle() checks if further processing of a span tag
  makes sense
* function isSpanStyleBold() checks if bold formatting via styles is
  used - a similar function could be written for each span-inline-style-format
  that should be supported

* Desktop,Cli: enex_to_md: fix saving span attrs in state object

pushing attributes of span tag to state object now
happens outside of isSpanWithStyle()
This commit is contained in:
J0J0 T 2019-07-29 12:25:25 +02:00 committed by Laurent Cozic
parent 8c42ddf6c3
commit b47cb4e29a
5 changed files with 110 additions and 2 deletions

View File

@ -0,0 +1,19 @@
<div><strong>singleline strong text.</strong></div><div><br/></div>
<div><b>singleline bold text.</b></div><div><br/></div>
<div><strong>multiline strong
text.</strong></div><div><br/></div>
<div><b>multiline bold
text.</b></div><div><br/></div>
<div><em>singleline emphasized text.</em></div><div><br/></div>
<div><i>singleline italic text.</i></div><div><br/></div>
<div><em>multiline emphasized
text.</em><div><br/></div>
<div><i>multiline italic
text.</i><div><br/></div>
<div><b>singleline bold text</b> next to normal text with leading space.</div><div><br/></div>
<div><b>singleline bold text with trailing space </b>next to normal text.</div><div><br/></div>
<div><b>singleline bold text</b><b> next to more bold text with leading space.</b></div><div><br/></div>
<div><b>singleline bold text with trailing space </b><b>next to more bold text.</b></div><div><br/></div>

View File

@ -0,0 +1,23 @@
**singleline strong text.**
**singleline bold text.**
**multiline strong text.**
**multiline bold text.**
*singleline emphasized text.*
*singleline italic text.*
*multiline emphasized text.*
*multiline italic text.*
**singleline bold text** next to normal text with leading space.
**singleline bold text with trailing space **next to normal text.
**singleline bold text**** next to more bold text with leading space.**
**singleline bold text with trailing space ****next to more bold text.**

View File

@ -0,0 +1,11 @@
<div><span style="font-weight: bold;">singleline bold text with span style font-weight: bold;.</span></div><div><br/></div>
<div><span style="font-family: 'TimesNewRoman,Bold';">singleline bold text with span style font-family: 'TimesNewRoman,Bold';.</span></div><div><br/></div>
<div><span style="font-weight: bold;">multiline bold
text with span style font-weight: bold;.</span></div><div><br/></div>
<div><span style="font-family: 'TimesNewRoman,Bold';">multiline bold
text with span style font-family: 'TimesNewRoman,Bold';.</span></div><div><br/></div>
<div><span style="font-weight: bold;">singleline bold text with span style font-weight: bold;</span> next to normal text with leading space.</div><div><br/></div>
<div><span style="font-weight: bold;">singleline bold text with span style font-weight: bold; and with trailing space </span>next to normal text.</div><div><br/></div>
<div><span style="font-weight: bold;">singleline bold text with span style font-weight: bold;</span><span style="font-weight: bold;"> next to more bold text with span style font-weight: bold; and with leading space.</span></div><div><br/></div>
<div><span style="font-weight: bold;">singleline bold text with span style font-weight: bold; and with trailing space </span><span style="font-weight: bold;">next to more bold text with span style font-weight: bold;.</span></div>

View File

@ -0,0 +1,15 @@
**singleline bold text with span style font-weight: bold;.**
**singleline bold text with span style font-family: 'TimesNewRoman,Bold';.**
**multiline bold text with span style font-weight: bold;.**
**multiline bold text with span style font-family: 'TimesNewRoman,Bold';.**
**singleline bold text with span style font-weight: bold;** next to normal text with leading space.
**singleline bold text with span style font-weight: bold; and with trailing space **next to normal text.
**singleline bold text with span style font-weight: bold;**** next to more bold text with span style font-weight: bold; and with leading space.**
**singleline bold text with span style font-weight: bold; and with trailing space ****next to more bold text with span style font-weight: bold;.**

View File

@ -361,7 +361,7 @@ function isAnchor(n) {
}
function isIgnoredEndTag(n) {
return ["en-note", "en-todo", "span", "body", "html", "font", "br", 'hr', 'tbody', 'sup', 'img', 'abbr', 'cite', 'thead', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0;
return ["en-note", "en-todo", "body", "html", "font", "br", 'hr', 'tbody', 'sup', 'img', 'abbr', 'cite', 'thead', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0;
}
function isListTag(n) {
@ -396,6 +396,29 @@ function attributeToLowerCase(node) {
return output;
}
function isSpanWithStyle(attributes, state) {
if (attributes != undefined) {
if ('style' in attributes) {
return true;
} else {
return false;
}
}
}
function isSpanStyleBold(attributes) {
let style = attributes.style;
if (style.includes('font-weight: bold;')) {
return true;
} else if (style.search( /font-family:.*,Bold.*;/ ) != -1) {
//console.debug('font-family regex matched');
return true;
} else {
//console.debug('Found unsupported style(s) in span tag: %s', style);
return false;
}
}
function enexXmlToMdArray(stream, resources) {
let remainingResources = resources.slice();
@ -415,6 +438,7 @@ function enexXmlToMdArray(stream, resources) {
inQuote: false,
lists: [],
anchorAttributes: [],
spanAttributes: [],
};
let options = {};
@ -681,7 +705,15 @@ function enexXmlToMdArray(stream, resources) {
if (resource && !!resource.id) {
section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
}
} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
} else if (n == "span") {
if (isSpanWithStyle(nodeAttributes)) {
state.spanAttributes.push(nodeAttributes);
if (isSpanStyleBold(nodeAttributes)) {
//console.debug('Applying style found in span tag: bold')
section.lines.push("**");
}
}
} else if (["font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
// Inline tags that can be ignored in Markdown
} else {
console.warn("Unsupported start tag: " + n);
@ -862,6 +894,14 @@ function enexXmlToMdArray(stream, resources) {
state.lists.pop();
} else if (n == "en-media") {
// Skip
} else if (n == 'span') {
let attributes = state.spanAttributes.pop();
if (isSpanWithStyle(attributes)) {
if (isSpanStyleBold(attributes)) {
//console.debug('Applying style found in span tag (closing): bold')
section.lines.push("**");
}
}
} else if (isIgnoredEndTag(n)) {
// Skip
} else {