mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-24 10:27:10 +02:00
Clipper: Improved Html To Md conversion
This commit is contained in:
parent
7ed9c2770c
commit
7cf267254f
26
CliClient/package-lock.json
generated
26
CliClient/package-lock.json
generated
@ -885,6 +885,19 @@
|
|||||||
"integrity": "sha1-vMl5rh+f0FcB5F5S5l06XWPxok4=",
|
"integrity": "sha1-vMl5rh+f0FcB5F5S5l06XWPxok4=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"joplin-turndown": {
|
||||||
|
"version": "4.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/joplin-turndown/-/joplin-turndown-4.0.3.tgz",
|
||||||
|
"integrity": "sha512-WbAXje8wq4/ZLNtPDUFBEtG5zKEbz7Wth5N3vB4Nw7k+PUs3mMF49LVEPP7Kc6H4Ui671qdjpSShvdsmiLY2gA==",
|
||||||
|
"requires": {
|
||||||
|
"jsdom": "^11.9.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"joplin-turndown-plugin-gfm": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/joplin-turndown-plugin-gfm/-/joplin-turndown-plugin-gfm-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-GRXmjHFrEyUnXOYzOZvUGGtKxPm5LuK98+73ZADqQYdGzMWp/o8Qx22YYAeIBsOV2WtVsRxe2IpUGBG4foSRyQ=="
|
||||||
|
},
|
||||||
"jpeg-js": {
|
"jpeg-js": {
|
||||||
"version": "0.1.2",
|
"version": "0.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/jpeg-js/-/jpeg-js-0.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/jpeg-js/-/jpeg-js-0.1.2.tgz",
|
||||||
@ -2451,19 +2464,6 @@
|
|||||||
"safe-buffer": "^5.0.1"
|
"safe-buffer": "^5.0.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"turndown": {
|
|
||||||
"version": "4.0.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/turndown/-/turndown-4.0.2.tgz",
|
|
||||||
"integrity": "sha512-pqZ6WrHFGnxXC9q2xJ3Qa7EoLAwrojgFRajWZjxTKwbz9vnNnyi8lLjiD5h86UTPOcMlEyHjm6NMhjEDdlc25A==",
|
|
||||||
"requires": {
|
|
||||||
"jsdom": "^11.9.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"turndown-plugin-gfm": {
|
|
||||||
"version": "1.0.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
|
|
||||||
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="
|
|
||||||
},
|
|
||||||
"tweetnacl": {
|
"tweetnacl": {
|
||||||
"version": "0.14.5",
|
"version": "0.14.5",
|
||||||
"resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz",
|
"resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz",
|
||||||
|
@ -36,6 +36,8 @@
|
|||||||
"fs-extra": "^5.0.0",
|
"fs-extra": "^5.0.0",
|
||||||
"html-entities": "^1.2.1",
|
"html-entities": "^1.2.1",
|
||||||
"html-minifier": "^3.5.15",
|
"html-minifier": "^3.5.15",
|
||||||
|
"joplin-turndown": "^4.0.3",
|
||||||
|
"joplin-turndown-plugin-gfm": "^1.0.2",
|
||||||
"jssha": "^2.3.0",
|
"jssha": "^2.3.0",
|
||||||
"levenshtein": "^1.0.5",
|
"levenshtein": "^1.0.5",
|
||||||
"lodash": "^4.17.4",
|
"lodash": "^4.17.4",
|
||||||
@ -60,8 +62,6 @@
|
|||||||
"tar": "^4.4.0",
|
"tar": "^4.4.0",
|
||||||
"tcp-port-used": "^0.1.2",
|
"tcp-port-used": "^0.1.2",
|
||||||
"tkwidgets": "^0.5.26",
|
"tkwidgets": "^0.5.26",
|
||||||
"turndown": "^4.0.2",
|
|
||||||
"turndown-plugin-gfm": "^1.0.2",
|
|
||||||
"url-parse": "^1.2.0",
|
"url-parse": "^1.2.0",
|
||||||
"uuid": "^3.0.1",
|
"uuid": "^3.0.1",
|
||||||
"valid-url": "^1.0.9",
|
"valid-url": "^1.0.9",
|
||||||
|
@ -24,7 +24,7 @@ describe('HtmlToMd', function() {
|
|||||||
done();
|
done();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should convert from Enex to Markdown', asyncTest(async () => {
|
it('should convert from Html to Markdown', asyncTest(async () => {
|
||||||
const basePath = __dirname + '/html_to_md';
|
const basePath = __dirname + '/html_to_md';
|
||||||
const files = await shim.fsDriver().readDirStats(basePath);
|
const files = await shim.fsDriver().readDirStats(basePath);
|
||||||
const htmlToMd = new HtmlToMd();
|
const htmlToMd = new HtmlToMd();
|
||||||
@ -36,7 +36,7 @@ describe('HtmlToMd', function() {
|
|||||||
const htmlPath = basePath + '/' + htmlFilename;
|
const htmlPath = basePath + '/' + htmlFilename;
|
||||||
const mdPath = basePath + '/' + filename(htmlFilename) + '.md';
|
const mdPath = basePath + '/' + filename(htmlFilename) + '.md';
|
||||||
|
|
||||||
if (htmlFilename !== 'table_no_header.html') continue;
|
// if (htmlFilename !== 'anchor_with_newlines.html') continue;
|
||||||
|
|
||||||
const html = await shim.fsDriver().readFile(htmlPath);
|
const html = await shim.fsDriver().readFile(htmlPath);
|
||||||
const expectedMd = await shim.fsDriver().readFile(mdPath);
|
const expectedMd = await shim.fsDriver().readFile(mdPath);
|
||||||
@ -47,6 +47,8 @@ describe('HtmlToMd', function() {
|
|||||||
console.info('');
|
console.info('');
|
||||||
console.info('Error converting file: ' + htmlFilename);
|
console.info('Error converting file: ' + htmlFilename);
|
||||||
console.info('--------------------------------- Got:');
|
console.info('--------------------------------- Got:');
|
||||||
|
console.info(actualMd);
|
||||||
|
console.info('--------------------------------- Raw:');
|
||||||
console.info(actualMd.split('\n'));
|
console.info(actualMd.split('\n'));
|
||||||
console.info('--------------------------------- Expected:');
|
console.info('--------------------------------- Expected:');
|
||||||
console.info(expectedMd.split('\n'));
|
console.info(expectedMd.split('\n'));
|
||||||
|
1
CliClient/tests/html_to_md/anchor_with_inner_tags.html
Normal file
1
CliClient/tests/html_to_md/anchor_with_inner_tags.html
Normal file
@ -0,0 +1 @@
|
|||||||
|
<a href="https://joplin.cozic.net"><h1 id="joplin"><img class="title-icon" src="https://joplin.cozic.net/images/Icon512.png">oplin</h1></a>
|
1
CliClient/tests/html_to_md/anchor_with_inner_tags.md
Normal file
1
CliClient/tests/html_to_md/anchor_with_inner_tags.md
Normal file
@ -0,0 +1 @@
|
|||||||
|
[# ![](https://joplin.cozic.net/images/Icon512.png)oplin](https://joplin.cozic.net)
|
1
CliClient/tests/html_to_md/anchor_with_newlines.html
Normal file
1
CliClient/tests/html_to_md/anchor_with_newlines.html
Normal file
@ -0,0 +1 @@
|
|||||||
|
<a href="http://example.com"><p>That</p><p>Shouldn't be allowed</p></a>
|
1
CliClient/tests/html_to_md/anchor_with_newlines.md
Normal file
1
CliClient/tests/html_to_md/anchor_with_newlines.md
Normal file
@ -0,0 +1 @@
|
|||||||
|
[That<br>Shouldn't be allowed](http://example.com)
|
10
CliClient/tests/html_to_md/table_with_empty_cells.html
Normal file
10
CliClient/tests/html_to_md/table_with_empty_cells.html
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td>Previous is empty</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Next is empty</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
4
CliClient/tests/html_to_md/table_with_empty_cells.md
Normal file
4
CliClient/tests/html_to_md/table_with_empty_cells.md
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
| | |
|
||||||
|
| --- | --- |
|
||||||
|
| | Previous is empty |
|
||||||
|
| Next is empty | |
|
13
CliClient/tests/html_to_md/table_with_empty_row.html
Normal file
13
CliClient/tests/html_to_md/table_with_empty_row.html
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td>One</td><td>Two</td>
|
||||||
|
</tr>
|
||||||
|
<tr></tr>
|
||||||
|
<tr>
|
||||||
|
<td>One</td><td>Two</td>
|
||||||
|
</tr>
|
||||||
|
<tr></tr>
|
||||||
|
<tr>
|
||||||
|
<td>One</td><td>Two</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
5
CliClient/tests/html_to_md/table_with_empty_row.md
Normal file
5
CliClient/tests/html_to_md/table_with_empty_row.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
| | |
|
||||||
|
| --- | --- |
|
||||||
|
| One | Two |
|
||||||
|
| One | Two |
|
||||||
|
| One | Two |
|
6
CliClient/tests/html_to_md/table_with_newlines.html
Normal file
6
CliClient/tests/html_to_md/table_with_newlines.html
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td><p>Some paragraph</p><p>inside a table cell</p></td>
|
||||||
|
<td>Second column</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
3
CliClient/tests/html_to_md/table_with_newlines.md
Normal file
3
CliClient/tests/html_to_md/table_with_newlines.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
| | |
|
||||||
|
| --- | --- |
|
||||||
|
| Some paragraph<br><br>inside a table cell | Second column |
|
16
CliClient/tests/html_to_md/table_within_table.html
Normal file
16
CliClient/tests/html_to_md/table_within_table.html
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<!--
|
||||||
|
The inner table is rendered but not the outer one.
|
||||||
|
Basically if any table contains another table, it is rendered as plain text
|
||||||
|
-->
|
||||||
|
<table>
|
||||||
|
<tr><td>
|
||||||
|
First column, and an inner table:
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr><td>One</td><td>Two</td></tr>
|
||||||
|
<tr><td>One</td><td>Two</td></tr>
|
||||||
|
</table>
|
||||||
|
</td>
|
||||||
|
<td>Second column</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
8
CliClient/tests/html_to_md/table_within_table.md
Normal file
8
CliClient/tests/html_to_md/table_within_table.md
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
First column, and an inner table:
|
||||||
|
|
||||||
|
| | |
|
||||||
|
| --- | --- |
|
||||||
|
| One | Two |
|
||||||
|
| One | Two |
|
||||||
|
|
||||||
|
Second column
|
2227
ElectronClient/app/package-lock.json
generated
2227
ElectronClient/app/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -87,6 +87,8 @@
|
|||||||
"fs-extra": "^5.0.0",
|
"fs-extra": "^5.0.0",
|
||||||
"highlight.js": "^9.12.0",
|
"highlight.js": "^9.12.0",
|
||||||
"html-entities": "^1.2.1",
|
"html-entities": "^1.2.1",
|
||||||
|
"joplin-turndown": "^4.0.3",
|
||||||
|
"joplin-turndown-plugin-gfm": "^1.0.5",
|
||||||
"jssha": "^2.3.1",
|
"jssha": "^2.3.1",
|
||||||
"katex": "^0.9.0-beta1",
|
"katex": "^0.9.0-beta1",
|
||||||
"levenshtein": "^1.0.5",
|
"levenshtein": "^1.0.5",
|
||||||
@ -116,8 +118,6 @@
|
|||||||
"string-to-stream": "^1.1.0",
|
"string-to-stream": "^1.1.0",
|
||||||
"tar": "^4.4.0",
|
"tar": "^4.4.0",
|
||||||
"tcp-port-used": "^0.1.2",
|
"tcp-port-used": "^0.1.2",
|
||||||
"turndown": "^4.0.2",
|
|
||||||
"turndown-plugin-gfm": "^1.0.2",
|
|
||||||
"url-parse": "^1.2.0",
|
"url-parse": "^1.2.0",
|
||||||
"uuid": "^3.1.0",
|
"uuid": "^3.1.0",
|
||||||
"valid-url": "^1.0.9",
|
"valid-url": "^1.0.9",
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
const TurndownService = require('turndown')
|
const TurndownService = require('joplin-turndown')
|
||||||
|
|
||||||
class HtmlToMd {
|
class HtmlToMd {
|
||||||
|
|
||||||
parse(html) {
|
parse(html) {
|
||||||
const turndownPluginGfm = require('turndown-plugin-gfm').gfm
|
const turndownPluginGfm = require('joplin-turndown-plugin-gfm').gfm
|
||||||
const turndown = new TurndownService()
|
const turndown = new TurndownService({
|
||||||
|
headingStyle: 'atx',
|
||||||
|
})
|
||||||
turndown.use(turndownPluginGfm)
|
turndown.use(turndownPluginGfm)
|
||||||
turndown.remove('script');
|
turndown.remove('script');
|
||||||
let markdown = turndown.turndown(html)
|
let markdown = turndown.turndown(html)
|
||||||
|
@ -2,5 +2,7 @@
|
|||||||
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
|
|
||||||
cd "$ROOT_DIR/CliClient/node_modules"
|
cd "$ROOT_DIR/CliClient/node_modules"
|
||||||
rm -rf tkwidgets
|
rm -rf tkwidgets joplin-turndown joplin-turndown-plugin-gfm
|
||||||
ln -s /mnt/d/Docs/PROGS/Node/tkwidgets/src tkwidgets
|
ln -s /mnt/d/Docs/PROGS/Node/tkwidgets/src tkwidgets
|
||||||
|
ln -s /mnt/d/Temp/turndown-plugin-gfm joplin-turndown-plugin-gfm
|
||||||
|
ln -s /mnt/d/Temp/turndown joplin-turndown
|
||||||
|
Loading…
Reference in New Issue
Block a user