From b47cb4e29ada3dd41de7702a8f90a6b90d408cd4 Mon Sep 17 00:00:00 2001 From: J0J0 T <2733783+JOJ0@users.noreply.github.com> Date: Mon, 29 Jul 2019 12:25:25 +0200 Subject: [PATCH] Desktop, Cli: Improved bold formatting support in Enex import (#1708) * Dekstop,CLI: enex_to_md: add html/md test file pairs * one pair for basic text formatting tags: strong, b, i, em * and one using span tags with inline styles for bold formatting Note: The html files include the Evernote-typical "linebreak tags inside of separate
tags" to represent empty lines! * Desktop,Cli: enex_to_md: support bold in span tags using inline styles * function isSpanWithStyle() checks if further processing of a span tag makes sense * function isSpanStyleBold() checks if bold formatting via styles is used - a similar function could be written for each span-inline-style-format that should be supported * Desktop,Cli: enex_to_md: fix saving span attrs in state object pushing attributes of span tag to state object now happens outside of isSpanWithStyle() --- .../tests/enex_to_md/text_formatting.html | 19 ++++++++ CliClient/tests/enex_to_md/text_formatting.md | 23 ++++++++++ .../enex_to_md/text_formatting_span_bold.html | 11 +++++ .../enex_to_md/text_formatting_span_bold.md | 15 +++++++ ReactNativeClient/lib/import-enex-md-gen.js | 44 ++++++++++++++++++- 5 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 CliClient/tests/enex_to_md/text_formatting.html create mode 100644 CliClient/tests/enex_to_md/text_formatting.md create mode 100644 CliClient/tests/enex_to_md/text_formatting_span_bold.html create mode 100644 CliClient/tests/enex_to_md/text_formatting_span_bold.md diff --git a/CliClient/tests/enex_to_md/text_formatting.html b/CliClient/tests/enex_to_md/text_formatting.html new file mode 100644 index 000000000..273a1f984 --- /dev/null +++ b/CliClient/tests/enex_to_md/text_formatting.html @@ -0,0 +1,19 @@ +
singleline strong text.

+
singleline bold text.

+
multiline strong + text.

+
multiline bold + text.

+ +
singleline emphasized text.

+
singleline italic text.

+
multiline emphasized + text.

+
multiline italic + text.

+ + +
singleline bold text next to normal text with leading space.

+
singleline bold text with trailing space next to normal text.

+
singleline bold text next to more bold text with leading space.

+
singleline bold text with trailing space next to more bold text.

diff --git a/CliClient/tests/enex_to_md/text_formatting.md b/CliClient/tests/enex_to_md/text_formatting.md new file mode 100644 index 000000000..6119db492 --- /dev/null +++ b/CliClient/tests/enex_to_md/text_formatting.md @@ -0,0 +1,23 @@ +**singleline strong text.** + +**singleline bold text.** + +**multiline strong text.** + +**multiline bold text.** + +*singleline emphasized text.* + +*singleline italic text.* + +*multiline emphasized text.* + +*multiline italic text.* + +**singleline bold text** next to normal text with leading space. + +**singleline bold text with trailing space **next to normal text. + +**singleline bold text**** next to more bold text with leading space.** + +**singleline bold text with trailing space ****next to more bold text.** \ No newline at end of file diff --git a/CliClient/tests/enex_to_md/text_formatting_span_bold.html b/CliClient/tests/enex_to_md/text_formatting_span_bold.html new file mode 100644 index 000000000..3719d6cb2 --- /dev/null +++ b/CliClient/tests/enex_to_md/text_formatting_span_bold.html @@ -0,0 +1,11 @@ +
singleline bold text with span style font-weight: bold;.

+
singleline bold text with span style font-family: 'TimesNewRoman,Bold';.

+
multiline bold + text with span style font-weight: bold;.

+
multiline bold + text with span style font-family: 'TimesNewRoman,Bold';.

+ +
singleline bold text with span style font-weight: bold; next to normal text with leading space.

+
singleline bold text with span style font-weight: bold; and with trailing space next to normal text.

+
singleline bold text with span style font-weight: bold; next to more bold text with span style font-weight: bold; and with leading space.

+
singleline bold text with span style font-weight: bold; and with trailing space next to more bold text with span style font-weight: bold;.
diff --git a/CliClient/tests/enex_to_md/text_formatting_span_bold.md b/CliClient/tests/enex_to_md/text_formatting_span_bold.md new file mode 100644 index 000000000..48b99b24c --- /dev/null +++ b/CliClient/tests/enex_to_md/text_formatting_span_bold.md @@ -0,0 +1,15 @@ +**singleline bold text with span style font-weight: bold;.** + +**singleline bold text with span style font-family: 'TimesNewRoman,Bold';.** + +**multiline bold text with span style font-weight: bold;.** + +**multiline bold text with span style font-family: 'TimesNewRoman,Bold';.** + +**singleline bold text with span style font-weight: bold;** next to normal text with leading space. + +**singleline bold text with span style font-weight: bold; and with trailing space **next to normal text. + +**singleline bold text with span style font-weight: bold;**** next to more bold text with span style font-weight: bold; and with leading space.** + +**singleline bold text with span style font-weight: bold; and with trailing space ****next to more bold text with span style font-weight: bold;.** \ No newline at end of file diff --git a/ReactNativeClient/lib/import-enex-md-gen.js b/ReactNativeClient/lib/import-enex-md-gen.js index d3b900ab2..d8ac3ad01 100644 --- a/ReactNativeClient/lib/import-enex-md-gen.js +++ b/ReactNativeClient/lib/import-enex-md-gen.js @@ -361,7 +361,7 @@ function isAnchor(n) { } function isIgnoredEndTag(n) { - return ["en-note", "en-todo", "span", "body", "html", "font", "br", 'hr', 'tbody', 'sup', 'img', 'abbr', 'cite', 'thead', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0; + return ["en-note", "en-todo", "body", "html", "font", "br", 'hr', 'tbody', 'sup', 'img', 'abbr', 'cite', 'thead', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0; } function isListTag(n) { @@ -396,6 +396,29 @@ function attributeToLowerCase(node) { return output; } +function isSpanWithStyle(attributes, state) { + if (attributes != undefined) { + if ('style' in attributes) { + return true; + } else { + return false; + } + } +} + +function isSpanStyleBold(attributes) { + let style = attributes.style; + if (style.includes('font-weight: bold;')) { + return true; + } else if (style.search( /font-family:.*,Bold.*;/ ) != -1) { + //console.debug('font-family regex matched'); + return true; + } else { + //console.debug('Found unsupported style(s) in span tag: %s', style); + return false; + } +} + function enexXmlToMdArray(stream, resources) { let remainingResources = resources.slice(); @@ -415,6 +438,7 @@ function enexXmlToMdArray(stream, resources) { inQuote: false, lists: [], anchorAttributes: [], + spanAttributes: [], }; let options = {}; @@ -681,7 +705,15 @@ function enexXmlToMdArray(stream, resources) { if (resource && !!resource.id) { section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt); } - } else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) { + } else if (n == "span") { + if (isSpanWithStyle(nodeAttributes)) { + state.spanAttributes.push(nodeAttributes); + if (isSpanStyleBold(nodeAttributes)) { + //console.debug('Applying style found in span tag: bold') + section.lines.push("**"); + } + } + } else if (["font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) { // Inline tags that can be ignored in Markdown } else { console.warn("Unsupported start tag: " + n); @@ -862,6 +894,14 @@ function enexXmlToMdArray(stream, resources) { state.lists.pop(); } else if (n == "en-media") { // Skip + } else if (n == 'span') { + let attributes = state.spanAttributes.pop(); + if (isSpanWithStyle(attributes)) { + if (isSpanStyleBold(attributes)) { + //console.debug('Applying style found in span tag (closing): bold') + section.lines.push("**"); + } + } } else if (isIgnoredEndTag(n)) { // Skip } else {