1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-03-03 15:32:30 +02:00

Desktop: Fixes #8448: Merge changes from upstream turndown project (#8468)

This commit is contained in:
Henry Heino 2023-07-17 04:19:34 -07:00 committed by GitHub
parent 4d7399973e
commit 1e2e8ed099
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 173 additions and 126 deletions

View File

@ -0,0 +1 @@
<p><b>&nbsp; &nbsp;A test...&nbsp;</b>Test</p>

View File

@ -0,0 +1 @@
&nbsp; &nbsp;**A test...**&nbsp;Test

View File

@ -1 +1 @@
Some pictures: ![](https://static2.cbrimages.com/wp-content/uploads/2018/09/Die-01-cvrA.jpg?q=35&w=588&h=900&fit=crop&dpr=1.5) ![](http://example.com/test.gif)
Some pictures: ![](https://static2.cbrimages.com/wp-content/uploads/2018/09/Die-01-cvrA.jpg?q=35&w=588&h=900&fit=crop&dpr=1.5)![](http://example.com/test.gif)

View File

@ -5,5 +5,6 @@
| [Source](https://github.com/nim-lang/nim) | The github project |
| [nimble](https://github.com/nim-lang/nimble) | The nim package manager |
| [choosenim](https://github.com/dom96/choosenim) | Toolchain installer |
| | |
| **Community** | |
| [Forums](https://forum.nim-lang.org) | An async discussion board |

View File

@ -19,6 +19,9 @@ Convert HTML into Markdown with JavaScript.
- Allow a rule to specify whether it wants its content escaped or not
- Handle [non-OL ordered lists](https://developer.mozilla.org/en-US/docs/Web/CSS/list-style-type)
- Added option `preserveImageTagsWithSize` to keep `<img/>` tags as HTML (no Markdown conversion) if they have width or height attributes
- Added support for replacing unicode nonbreaking spaces with `&nbsp;` in output markdown.
The `src/` folder of this fork is currently based on commit `97e4535ca76bb2e70d9caa2aa4d4686956b06d44` of the [upstream Turndown project](https://github.com/mixmark-io/turndown). The `test` and `config` folders are based on an earlier commit.
### to-markdown has been renamed to Turndown. See the [migration guide](https://github.com/domchristie/to-markdown/wiki/Migrating-from-to-markdown-to-Turndown) for details.

View File

@ -49,7 +49,7 @@ function collapseWhitespace (options) {
if (!element.firstChild || isPre(element)) return
var prevText = null
var prevVoid = false
var keepLeadingWs = false
var prev = null
var node = next(prev, element, isPre)
@ -58,13 +58,12 @@ function collapseWhitespace (options) {
// added, which results in multiple spaces. This spaces are then incorrectly interpreted as a code block by renderers.
// So by keeping track of this, we make sure that only one space at most is added.
var prevTextIsOnlySpaces = false;
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ')
if ((!prevText || / $/.test(prevText.data)) &&
!prevVoid && text[0] === ' ') {
!keepLeadingWs && text[0] === ' ') {
text = text.substr(1)
}
@ -87,11 +86,14 @@ function collapseWhitespace (options) {
}
prevText = null
prevVoid = false
} else if (isVoid(node)) {
// Avoid trimming space around non-block, non-BR void elements.
keepLeadingWs = false
} else if (isVoid(node) || isPre(node)) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null
prevVoid = true
keepLeadingWs = true
} else if (prevText) {
// Drop protection if set previously.
keepLeadingWs = false
}
} else {
node = remove(node)

View File

@ -215,11 +215,25 @@ rules.fencedCodeBlock = {
var className = handledNode.className || ''
var language = (className.match(/language-(\S+)/) || [null, ''])[1]
var code = content
var fenceChar = options.fence.charAt(0)
var fenceSize = 3
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm')
var match
while ((match = fenceInCodeRegex.exec(code))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1
}
}
var fence = repeat(fenceChar, fenceSize)
return (
'\n\n' + options.fence + language + '\n' +
content +
'\n' + options.fence + '\n\n'
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
}
@ -407,19 +421,15 @@ rules.code = {
},
replacement: function (content) {
if (!content.trim()) return ''
if (!content) return ''
content = content.replace(/\r?\n|\r/g, ' ')
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : ''
var delimiter = '`'
var leadingSpace = ''
var trailingSpace = ''
var matches = content.match(/`+/gm)
if (matches) {
if (/^`/.test(content)) leadingSpace = ' '
if (/`$/.test(content)) trailingSpace = ' '
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`'
}
var matches = content.match(/`+/gm) || []
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`'
return delimiter + leadingSpace + content + trailingSpace + delimiter
return delimiter + extraSpace + content + extraSpace + delimiter
}
}

View File

@ -1,42 +1,56 @@
import { isBlock, isVoid, hasVoid, isCodeBlock } from './utilities'
import { isBlock, isVoid, hasVoid, isCodeBlock, isMeaningfulWhenBlank, hasMeaningfulWhenBlank } from './utilities'
export default function Node (node) {
export default function Node (node, options) {
node.isBlock = isBlock(node)
node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode || isCodeBlock(node);
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode || isCodeBlock(node);
node.isBlank = isBlank(node)
node.flankingWhitespace = flankingWhitespace(node)
node.flankingWhitespace = flankingWhitespace(node, options)
return node
}
function isBlank (node) {
return (
['A', 'TH', 'TD'].indexOf(node.nodeName) === -1 &&
/^\s*$/i.test(node.textContent) &&
!isVoid(node) &&
!hasVoid(node)
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
}
function flankingWhitespace (node) {
var leading = ''
var trailing = ''
if (!node.isBlock) {
var hasLeading = /^[ \r\n\t]/.test(node.textContent)
var hasTrailing = /[ \r\n\t]$/.test(node.textContent)
if (hasLeading && !isFlankedByWhitespace('left', node)) {
leading = ' '
}
if (hasTrailing && !isFlankedByWhitespace('right', node)) {
trailing = ' '
}
function flankingWhitespace (node, options) {
if (node.isBlock || (options.preformattedCode && node.isCode)) {
return { leading: '', trailing: '' }
}
return { leading: leading, trailing: trailing }
var edges = edgeWhitespace(node.textContent)
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
edges.leading = edges.leadingNonAscii
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
edges.trailing = edges.trailingNonAscii
}
return { leading: edges.leading, trailing: edges.trailing }
}
function isFlankedByWhitespace (side, node) {
function edgeWhitespace (string) {
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/)
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
}
function isFlankedByWhitespace (side, node, options) {
var sibling
var regExp
var isFlanked
@ -52,6 +66,8 @@ function isFlankedByWhitespace (side, node) {
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue)
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
isFlanked = false
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent)
}

View File

@ -2,7 +2,7 @@ import collapseWhitespace from './collapse-whitespace'
import HTMLParser from './html-parser'
import { isBlock, isVoid } from './utilities'
export default function RootNode (input) {
export default function RootNode (input, options) {
var root
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
@ -19,7 +19,8 @@ export default function RootNode (input) {
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid
isVoid: isVoid,
isPre: options.preformattedCode ? isPreOrCode : null
})
return root
@ -30,3 +31,7 @@ function htmlParser () {
_htmlParser = _htmlParser || new HTMLParser()
return _htmlParser
}
function isPreOrCode (node) {
return node.nodeName === 'PRE' || node.nodeName === 'CODE'
}

View File

@ -1,11 +1,24 @@
import COMMONMARK_RULES from './commonmark-rules'
import Rules from './rules'
import { extend, isCodeBlock } from './utilities'
import { extend, isCodeBlock, trimLeadingNewlines, trimTrailingNewlines } from './utilities'
import RootNode from './root-node'
import Node from './node'
var reduce = Array.prototype.reduce
var leadingNewLinesRegExp = /^\n*/
var trailingNewLinesRegExp = /\n*$/
var escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
]
export default function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
@ -23,7 +36,9 @@ export default function TurndownService (options) {
linkReferenceStyle: 'full',
anchorNames: [],
br: ' ',
nonbreakingSpace: '&nbsp;',
disableEscapeContent: false,
preformattedCode: false,
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},
@ -56,7 +71,7 @@ TurndownService.prototype = {
if (input === '') return ''
var output = process.call(this, new RootNode(input))
var output = process.call(this, new RootNode(input, this.options))
return postProcess.call(this, output)
},
@ -128,48 +143,9 @@ TurndownService.prototype = {
*/
escape: function (string) {
return (
string
// Escape backslash escapes!
.replace(/\\(\S)/g, '\\\\$1')
// Escape headings
.replace(/^(#{1,6} )/gm, '\\$1')
// Escape hr
.replace(/^([-*_] *){3,}$/gm, function (match, character) {
return match.split(character).join('\\' + character)
})
// Escape ol bullet points
.replace(/^(\W* {0,3})(\d+)\. /gm, '$1$2\\. ')
// Escape ul bullet points
.replace(/^([^\\\w]*)[*+-] /gm, function (match) {
return match.replace(/([*+-])/g, '\\$1')
})
// Escape blockquote indents
.replace(/^(\W* {0,3})> /gm, '$1\\> ')
// Escape em/strong *
.replace(/\*+(?![*\s\W]).+?\*+/g, function (match) {
return match.replace(/\*/g, '\\*')
})
// Escape em/strong _
.replace(/_+(?![_\s\W]).+?_+/g, function (match) {
return match.replace(/_/g, '\\_')
})
// Escape code _
.replace(/`+(?![`\s\W]).+?`+/g, function (match) {
return match.replace(/`/g, '\\`')
})
// Escape link brackets
.replace(/[\[\]]/g, '\\$&') // eslint-disable-line no-useless-escape
)
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1])
}, string)
}
}
@ -186,7 +162,7 @@ function process (parentNode, escapeContent = 'auto') {
var self = this
return reduce.call(parentNode.childNodes, function (output, node) {
node = new Node(node)
node = new Node(node, self.options)
var replacement = ''
if (node.nodeType === 3) {
@ -239,39 +215,35 @@ function replacementForNode (node) {
var content = process.call(this, node, rule.escapeContent ? rule.escapeContent() : 'auto')
var whitespace = node.flankingWhitespace
if (whitespace.leading || whitespace.trailing) content = content.trim()
const replaceNonbreakingSpaces = space => {
// \u{00A0} is a nonbreaking space
return space.replace(/\u{00A0}/ug, this.options.nonbreakingSpace);
};
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
replaceNonbreakingSpaces(whitespace.leading) +
replaceNonbreakingSpaces(rule.replacement(content, node, this.options)) +
replaceNonbreakingSpaces(whitespace.trailing)
)
}
/**
* Determines the new lines between the current output and the replacement
* Joins replacement to the current output with appropriate number of new lines
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns The whitespace to separate the current output and the replacement
* @returns Joined output
* @type String
*/
function separatingNewlines (output, replacement) {
var newlines = [
output.match(trailingNewLinesRegExp)[0],
replacement.match(leadingNewLinesRegExp)[0]
].sort()
var maxNewlines = newlines[newlines.length - 1]
return maxNewlines.length < 2 ? maxNewlines : '\n\n'
}
function join (output, replacement) {
var s1 = trimTrailingNewlines(output)
var s2 = trimLeadingNewlines(replacement)
var nls = Math.max(output.length - s1.length, replacement.length - s2.length)
var separator = '\n\n'.substring(0, nls)
function join (string1, string2) {
var separator = separatingNewlines(string1, string2)
// Remove trailing/leading newlines and replace with separator
string1 = string1.replace(trailingNewLinesRegExp, '')
string2 = string2.replace(leadingNewLinesRegExp, '')
return string1 + separator + string2
return s1 + separator + s2
}
/**

View File

@ -14,31 +14,67 @@ export function repeat (character, count) {
return Array(count + 1).join(character)
}
export function trimLeadingNewlines (string) {
return string.replace(/^\n*/, '')
}
export function trimTrailingNewlines (string) {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string.length
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--
return string.substring(0, indexEnd)
}
export var blockElements = [
'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
]
export function isBlock (node) {
return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
return is(node, blockElements)
}
export var voidElements = [
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
]
export function isVoid (node) {
return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
return is(node, voidElements)
}
var voidSelector = voidElements.join()
export function hasVoid (node) {
return node.querySelector && node.querySelector(voidSelector)
return has(node, voidElements)
}
var meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
]
export function isMeaningfulWhenBlank (node) {
return is(node, meaningfulWhenBlankElements)
}
export function hasMeaningfulWhenBlank (node) {
return has(node, meaningfulWhenBlankElements)
}
function is (node, tagNames) {
return tagNames.indexOf(node.nodeName) >= 0
}
function has (node, tagNames) {
return (
node.getElementsByTagName &&
tagNames.some(function (tagName) {
return node.getElementsByTagName(tagName).length
})
)
}
// To handle code that is presented as below (see https://github.com/laurent22/joplin/issues/573)