mirror of
https://github.com/laurent22/joplin.git
synced 2025-03-03 15:32:30 +02:00
This commit is contained in:
parent
4d7399973e
commit
1e2e8ed099
@ -0,0 +1 @@
|
||||
<p><b> A test... </b>Test</p>
|
@ -0,0 +1 @@
|
||||
**A test...** Test
|
@ -1 +1 @@
|
||||
Some pictures:  
|
||||
Some pictures: 
|
@ -5,5 +5,6 @@
|
||||
| [Source](https://github.com/nim-lang/nim) | The github project |
|
||||
| [nimble](https://github.com/nim-lang/nimble) | The nim package manager |
|
||||
| [choosenim](https://github.com/dom96/choosenim) | Toolchain installer |
|
||||
| | |
|
||||
| **Community** | |
|
||||
| [Forums](https://forum.nim-lang.org) | An async discussion board |
|
@ -19,6 +19,9 @@ Convert HTML into Markdown with JavaScript.
|
||||
- Allow a rule to specify whether it wants its content escaped or not
|
||||
- Handle [non-OL ordered lists](https://developer.mozilla.org/en-US/docs/Web/CSS/list-style-type)
|
||||
- Added option `preserveImageTagsWithSize` to keep `<img/>` tags as HTML (no Markdown conversion) if they have width or height attributes
|
||||
- Added support for replacing unicode nonbreaking spaces with ` ` in output markdown.
|
||||
|
||||
The `src/` folder of this fork is currently based on commit `97e4535ca76bb2e70d9caa2aa4d4686956b06d44` of the [upstream Turndown project](https://github.com/mixmark-io/turndown). The `test` and `config` folders are based on an earlier commit.
|
||||
|
||||
### to-markdown has been renamed to Turndown. See the [migration guide](https://github.com/domchristie/to-markdown/wiki/Migrating-from-to-markdown-to-Turndown) for details.
|
||||
|
||||
|
@ -49,7 +49,7 @@ function collapseWhitespace (options) {
|
||||
if (!element.firstChild || isPre(element)) return
|
||||
|
||||
var prevText = null
|
||||
var prevVoid = false
|
||||
var keepLeadingWs = false
|
||||
|
||||
var prev = null
|
||||
var node = next(prev, element, isPre)
|
||||
@ -58,13 +58,12 @@ function collapseWhitespace (options) {
|
||||
// added, which results in multiple spaces. This spaces are then incorrectly interpreted as a code block by renderers.
|
||||
// So by keeping track of this, we make sure that only one space at most is added.
|
||||
var prevTextIsOnlySpaces = false;
|
||||
|
||||
while (node !== element) {
|
||||
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
|
||||
var text = node.data.replace(/[ \r\n\t]+/g, ' ')
|
||||
|
||||
if ((!prevText || / $/.test(prevText.data)) &&
|
||||
!prevVoid && text[0] === ' ') {
|
||||
!keepLeadingWs && text[0] === ' ') {
|
||||
text = text.substr(1)
|
||||
}
|
||||
|
||||
@ -87,11 +86,14 @@ function collapseWhitespace (options) {
|
||||
}
|
||||
|
||||
prevText = null
|
||||
prevVoid = false
|
||||
} else if (isVoid(node)) {
|
||||
// Avoid trimming space around non-block, non-BR void elements.
|
||||
keepLeadingWs = false
|
||||
} else if (isVoid(node) || isPre(node)) {
|
||||
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
|
||||
prevText = null
|
||||
prevVoid = true
|
||||
keepLeadingWs = true
|
||||
} else if (prevText) {
|
||||
// Drop protection if set previously.
|
||||
keepLeadingWs = false
|
||||
}
|
||||
} else {
|
||||
node = remove(node)
|
||||
|
@ -215,11 +215,25 @@ rules.fencedCodeBlock = {
|
||||
|
||||
var className = handledNode.className || ''
|
||||
var language = (className.match(/language-(\S+)/) || [null, ''])[1]
|
||||
var code = content
|
||||
|
||||
var fenceChar = options.fence.charAt(0)
|
||||
var fenceSize = 3
|
||||
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm')
|
||||
|
||||
var match
|
||||
while ((match = fenceInCodeRegex.exec(code))) {
|
||||
if (match[0].length >= fenceSize) {
|
||||
fenceSize = match[0].length + 1
|
||||
}
|
||||
}
|
||||
|
||||
var fence = repeat(fenceChar, fenceSize)
|
||||
|
||||
return (
|
||||
'\n\n' + options.fence + language + '\n' +
|
||||
content +
|
||||
'\n' + options.fence + '\n\n'
|
||||
'\n\n' + fence + language + '\n' +
|
||||
code.replace(/\n$/, '') +
|
||||
'\n' + fence + '\n\n'
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -407,19 +421,15 @@ rules.code = {
|
||||
},
|
||||
|
||||
replacement: function (content) {
|
||||
if (!content.trim()) return ''
|
||||
if (!content) return ''
|
||||
content = content.replace(/\r?\n|\r/g, ' ')
|
||||
|
||||
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : ''
|
||||
var delimiter = '`'
|
||||
var leadingSpace = ''
|
||||
var trailingSpace = ''
|
||||
var matches = content.match(/`+/gm)
|
||||
if (matches) {
|
||||
if (/^`/.test(content)) leadingSpace = ' '
|
||||
if (/`$/.test(content)) trailingSpace = ' '
|
||||
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`'
|
||||
}
|
||||
var matches = content.match(/`+/gm) || []
|
||||
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`'
|
||||
|
||||
return delimiter + leadingSpace + content + trailingSpace + delimiter
|
||||
return delimiter + extraSpace + content + extraSpace + delimiter
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,42 +1,56 @@
|
||||
import { isBlock, isVoid, hasVoid, isCodeBlock } from './utilities'
|
||||
import { isBlock, isVoid, hasVoid, isCodeBlock, isMeaningfulWhenBlank, hasMeaningfulWhenBlank } from './utilities'
|
||||
|
||||
export default function Node (node) {
|
||||
export default function Node (node, options) {
|
||||
node.isBlock = isBlock(node)
|
||||
node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode || isCodeBlock(node);
|
||||
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode || isCodeBlock(node);
|
||||
node.isBlank = isBlank(node)
|
||||
node.flankingWhitespace = flankingWhitespace(node)
|
||||
node.flankingWhitespace = flankingWhitespace(node, options)
|
||||
return node
|
||||
}
|
||||
|
||||
function isBlank (node) {
|
||||
return (
|
||||
['A', 'TH', 'TD'].indexOf(node.nodeName) === -1 &&
|
||||
/^\s*$/i.test(node.textContent) &&
|
||||
!isVoid(node) &&
|
||||
!hasVoid(node)
|
||||
!isMeaningfulWhenBlank(node) &&
|
||||
/^\s*$/i.test(node.textContent) &&
|
||||
!hasVoid(node) &&
|
||||
!hasMeaningfulWhenBlank(node)
|
||||
)
|
||||
}
|
||||
|
||||
function flankingWhitespace (node) {
|
||||
var leading = ''
|
||||
var trailing = ''
|
||||
|
||||
if (!node.isBlock) {
|
||||
var hasLeading = /^[ \r\n\t]/.test(node.textContent)
|
||||
var hasTrailing = /[ \r\n\t]$/.test(node.textContent)
|
||||
|
||||
if (hasLeading && !isFlankedByWhitespace('left', node)) {
|
||||
leading = ' '
|
||||
}
|
||||
if (hasTrailing && !isFlankedByWhitespace('right', node)) {
|
||||
trailing = ' '
|
||||
}
|
||||
function flankingWhitespace (node, options) {
|
||||
if (node.isBlock || (options.preformattedCode && node.isCode)) {
|
||||
return { leading: '', trailing: '' }
|
||||
}
|
||||
|
||||
return { leading: leading, trailing: trailing }
|
||||
var edges = edgeWhitespace(node.textContent)
|
||||
|
||||
// abandon leading ASCII WS if left-flanked by ASCII WS
|
||||
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
|
||||
edges.leading = edges.leadingNonAscii
|
||||
}
|
||||
|
||||
// abandon trailing ASCII WS if right-flanked by ASCII WS
|
||||
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
|
||||
edges.trailing = edges.trailingNonAscii
|
||||
}
|
||||
|
||||
return { leading: edges.leading, trailing: edges.trailing }
|
||||
}
|
||||
|
||||
function isFlankedByWhitespace (side, node) {
|
||||
function edgeWhitespace (string) {
|
||||
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/)
|
||||
return {
|
||||
leading: m[1], // whole string for whitespace-only strings
|
||||
leadingAscii: m[2],
|
||||
leadingNonAscii: m[3],
|
||||
trailing: m[4], // empty for whitespace-only strings
|
||||
trailingNonAscii: m[5],
|
||||
trailingAscii: m[6]
|
||||
}
|
||||
}
|
||||
|
||||
function isFlankedByWhitespace (side, node, options) {
|
||||
var sibling
|
||||
var regExp
|
||||
var isFlanked
|
||||
@ -52,6 +66,8 @@ function isFlankedByWhitespace (side, node) {
|
||||
if (sibling) {
|
||||
if (sibling.nodeType === 3) {
|
||||
isFlanked = regExp.test(sibling.nodeValue)
|
||||
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
|
||||
isFlanked = false
|
||||
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
|
||||
isFlanked = regExp.test(sibling.textContent)
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ import collapseWhitespace from './collapse-whitespace'
|
||||
import HTMLParser from './html-parser'
|
||||
import { isBlock, isVoid } from './utilities'
|
||||
|
||||
export default function RootNode (input) {
|
||||
export default function RootNode (input, options) {
|
||||
var root
|
||||
if (typeof input === 'string') {
|
||||
var doc = htmlParser().parseFromString(
|
||||
@ -19,7 +19,8 @@ export default function RootNode (input) {
|
||||
collapseWhitespace({
|
||||
element: root,
|
||||
isBlock: isBlock,
|
||||
isVoid: isVoid
|
||||
isVoid: isVoid,
|
||||
isPre: options.preformattedCode ? isPreOrCode : null
|
||||
})
|
||||
|
||||
return root
|
||||
@ -30,3 +31,7 @@ function htmlParser () {
|
||||
_htmlParser = _htmlParser || new HTMLParser()
|
||||
return _htmlParser
|
||||
}
|
||||
|
||||
function isPreOrCode (node) {
|
||||
return node.nodeName === 'PRE' || node.nodeName === 'CODE'
|
||||
}
|
@ -1,11 +1,24 @@
|
||||
import COMMONMARK_RULES from './commonmark-rules'
|
||||
import Rules from './rules'
|
||||
import { extend, isCodeBlock } from './utilities'
|
||||
import { extend, isCodeBlock, trimLeadingNewlines, trimTrailingNewlines } from './utilities'
|
||||
import RootNode from './root-node'
|
||||
import Node from './node'
|
||||
var reduce = Array.prototype.reduce
|
||||
var leadingNewLinesRegExp = /^\n*/
|
||||
var trailingNewLinesRegExp = /\n*$/
|
||||
var escapes = [
|
||||
[/\\/g, '\\\\'],
|
||||
[/\*/g, '\\*'],
|
||||
[/^-/g, '\\-'],
|
||||
[/^\+ /g, '\\+ '],
|
||||
[/^(=+)/g, '\\$1'],
|
||||
[/^(#{1,6}) /g, '\\$1 '],
|
||||
[/`/g, '\\`'],
|
||||
[/^~~~/g, '\\~~~'],
|
||||
[/\[/g, '\\['],
|
||||
[/\]/g, '\\]'],
|
||||
[/^>/g, '\\>'],
|
||||
[/_/g, '\\_'],
|
||||
[/^(\d+)\. /g, '$1\\. ']
|
||||
]
|
||||
|
||||
export default function TurndownService (options) {
|
||||
if (!(this instanceof TurndownService)) return new TurndownService(options)
|
||||
@ -23,7 +36,9 @@ export default function TurndownService (options) {
|
||||
linkReferenceStyle: 'full',
|
||||
anchorNames: [],
|
||||
br: ' ',
|
||||
nonbreakingSpace: ' ',
|
||||
disableEscapeContent: false,
|
||||
preformattedCode: false,
|
||||
blankReplacement: function (content, node) {
|
||||
return node.isBlock ? '\n\n' : ''
|
||||
},
|
||||
@ -56,7 +71,7 @@ TurndownService.prototype = {
|
||||
|
||||
if (input === '') return ''
|
||||
|
||||
var output = process.call(this, new RootNode(input))
|
||||
var output = process.call(this, new RootNode(input, this.options))
|
||||
return postProcess.call(this, output)
|
||||
},
|
||||
|
||||
@ -128,48 +143,9 @@ TurndownService.prototype = {
|
||||
*/
|
||||
|
||||
escape: function (string) {
|
||||
return (
|
||||
string
|
||||
// Escape backslash escapes!
|
||||
.replace(/\\(\S)/g, '\\\\$1')
|
||||
|
||||
// Escape headings
|
||||
.replace(/^(#{1,6} )/gm, '\\$1')
|
||||
|
||||
// Escape hr
|
||||
.replace(/^([-*_] *){3,}$/gm, function (match, character) {
|
||||
return match.split(character).join('\\' + character)
|
||||
})
|
||||
|
||||
// Escape ol bullet points
|
||||
.replace(/^(\W* {0,3})(\d+)\. /gm, '$1$2\\. ')
|
||||
|
||||
// Escape ul bullet points
|
||||
.replace(/^([^\\\w]*)[*+-] /gm, function (match) {
|
||||
return match.replace(/([*+-])/g, '\\$1')
|
||||
})
|
||||
|
||||
// Escape blockquote indents
|
||||
.replace(/^(\W* {0,3})> /gm, '$1\\> ')
|
||||
|
||||
// Escape em/strong *
|
||||
.replace(/\*+(?![*\s\W]).+?\*+/g, function (match) {
|
||||
return match.replace(/\*/g, '\\*')
|
||||
})
|
||||
|
||||
// Escape em/strong _
|
||||
.replace(/_+(?![_\s\W]).+?_+/g, function (match) {
|
||||
return match.replace(/_/g, '\\_')
|
||||
})
|
||||
|
||||
// Escape code _
|
||||
.replace(/`+(?![`\s\W]).+?`+/g, function (match) {
|
||||
return match.replace(/`/g, '\\`')
|
||||
})
|
||||
|
||||
// Escape link brackets
|
||||
.replace(/[\[\]]/g, '\\$&') // eslint-disable-line no-useless-escape
|
||||
)
|
||||
return escapes.reduce(function (accumulator, escape) {
|
||||
return accumulator.replace(escape[0], escape[1])
|
||||
}, string)
|
||||
}
|
||||
}
|
||||
|
||||
@ -186,7 +162,7 @@ function process (parentNode, escapeContent = 'auto') {
|
||||
|
||||
var self = this
|
||||
return reduce.call(parentNode.childNodes, function (output, node) {
|
||||
node = new Node(node)
|
||||
node = new Node(node, self.options)
|
||||
|
||||
var replacement = ''
|
||||
if (node.nodeType === 3) {
|
||||
@ -239,39 +215,35 @@ function replacementForNode (node) {
|
||||
var content = process.call(this, node, rule.escapeContent ? rule.escapeContent() : 'auto')
|
||||
var whitespace = node.flankingWhitespace
|
||||
if (whitespace.leading || whitespace.trailing) content = content.trim()
|
||||
|
||||
const replaceNonbreakingSpaces = space => {
|
||||
// \u{00A0} is a nonbreaking space
|
||||
return space.replace(/\u{00A0}/ug, this.options.nonbreakingSpace);
|
||||
};
|
||||
|
||||
return (
|
||||
whitespace.leading +
|
||||
rule.replacement(content, node, this.options) +
|
||||
whitespace.trailing
|
||||
replaceNonbreakingSpaces(whitespace.leading) +
|
||||
replaceNonbreakingSpaces(rule.replacement(content, node, this.options)) +
|
||||
replaceNonbreakingSpaces(whitespace.trailing)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the new lines between the current output and the replacement
|
||||
* Joins replacement to the current output with appropriate number of new lines
|
||||
* @private
|
||||
* @param {String} output The current conversion output
|
||||
* @param {String} replacement The string to append to the output
|
||||
* @returns The whitespace to separate the current output and the replacement
|
||||
* @returns Joined output
|
||||
* @type String
|
||||
*/
|
||||
|
||||
function separatingNewlines (output, replacement) {
|
||||
var newlines = [
|
||||
output.match(trailingNewLinesRegExp)[0],
|
||||
replacement.match(leadingNewLinesRegExp)[0]
|
||||
].sort()
|
||||
var maxNewlines = newlines[newlines.length - 1]
|
||||
return maxNewlines.length < 2 ? maxNewlines : '\n\n'
|
||||
}
|
||||
function join (output, replacement) {
|
||||
var s1 = trimTrailingNewlines(output)
|
||||
var s2 = trimLeadingNewlines(replacement)
|
||||
var nls = Math.max(output.length - s1.length, replacement.length - s2.length)
|
||||
var separator = '\n\n'.substring(0, nls)
|
||||
|
||||
function join (string1, string2) {
|
||||
var separator = separatingNewlines(string1, string2)
|
||||
|
||||
// Remove trailing/leading newlines and replace with separator
|
||||
string1 = string1.replace(trailingNewLinesRegExp, '')
|
||||
string2 = string2.replace(leadingNewLinesRegExp, '')
|
||||
|
||||
return string1 + separator + string2
|
||||
return s1 + separator + s2
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -14,31 +14,67 @@ export function repeat (character, count) {
|
||||
return Array(count + 1).join(character)
|
||||
}
|
||||
|
||||
export function trimLeadingNewlines (string) {
|
||||
return string.replace(/^\n*/, '')
|
||||
}
|
||||
|
||||
export function trimTrailingNewlines (string) {
|
||||
// avoid match-at-end regexp bottleneck, see #370
|
||||
var indexEnd = string.length
|
||||
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--
|
||||
return string.substring(0, indexEnd)
|
||||
}
|
||||
|
||||
export var blockElements = [
|
||||
'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
|
||||
'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
|
||||
'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||
'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
|
||||
'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
|
||||
'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
|
||||
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
|
||||
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
|
||||
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
|
||||
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
|
||||
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
|
||||
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
|
||||
]
|
||||
|
||||
export function isBlock (node) {
|
||||
return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
|
||||
return is(node, blockElements)
|
||||
}
|
||||
|
||||
export var voidElements = [
|
||||
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
|
||||
'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
|
||||
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
|
||||
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
|
||||
]
|
||||
|
||||
export function isVoid (node) {
|
||||
return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
|
||||
return is(node, voidElements)
|
||||
}
|
||||
|
||||
var voidSelector = voidElements.join()
|
||||
export function hasVoid (node) {
|
||||
return node.querySelector && node.querySelector(voidSelector)
|
||||
return has(node, voidElements)
|
||||
}
|
||||
|
||||
var meaningfulWhenBlankElements = [
|
||||
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
|
||||
'AUDIO', 'VIDEO'
|
||||
]
|
||||
|
||||
export function isMeaningfulWhenBlank (node) {
|
||||
return is(node, meaningfulWhenBlankElements)
|
||||
}
|
||||
|
||||
export function hasMeaningfulWhenBlank (node) {
|
||||
return has(node, meaningfulWhenBlankElements)
|
||||
}
|
||||
|
||||
function is (node, tagNames) {
|
||||
return tagNames.indexOf(node.nodeName) >= 0
|
||||
}
|
||||
|
||||
function has (node, tagNames) {
|
||||
return (
|
||||
node.getElementsByTagName &&
|
||||
tagNames.some(function (tagName) {
|
||||
return node.getElementsByTagName(tagName).length
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
// To handle code that is presented as below (see https://github.com/laurent22/joplin/issues/573)
|
||||
|
Loading…
x
Reference in New Issue
Block a user