mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-24 10:27:10 +02:00
Clipper: Improved Html To Md conversion
This commit is contained in:
parent
7ed9c2770c
commit
7cf267254f
26
CliClient/package-lock.json
generated
26
CliClient/package-lock.json
generated
@ -885,6 +885,19 @@
|
||||
"integrity": "sha1-vMl5rh+f0FcB5F5S5l06XWPxok4=",
|
||||
"dev": true
|
||||
},
|
||||
"joplin-turndown": {
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/joplin-turndown/-/joplin-turndown-4.0.3.tgz",
|
||||
"integrity": "sha512-WbAXje8wq4/ZLNtPDUFBEtG5zKEbz7Wth5N3vB4Nw7k+PUs3mMF49LVEPP7Kc6H4Ui671qdjpSShvdsmiLY2gA==",
|
||||
"requires": {
|
||||
"jsdom": "^11.9.0"
|
||||
}
|
||||
},
|
||||
"joplin-turndown-plugin-gfm": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/joplin-turndown-plugin-gfm/-/joplin-turndown-plugin-gfm-1.0.2.tgz",
|
||||
"integrity": "sha512-GRXmjHFrEyUnXOYzOZvUGGtKxPm5LuK98+73ZADqQYdGzMWp/o8Qx22YYAeIBsOV2WtVsRxe2IpUGBG4foSRyQ=="
|
||||
},
|
||||
"jpeg-js": {
|
||||
"version": "0.1.2",
|
||||
"resolved": "https://registry.npmjs.org/jpeg-js/-/jpeg-js-0.1.2.tgz",
|
||||
@ -2451,19 +2464,6 @@
|
||||
"safe-buffer": "^5.0.1"
|
||||
}
|
||||
},
|
||||
"turndown": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/turndown/-/turndown-4.0.2.tgz",
|
||||
"integrity": "sha512-pqZ6WrHFGnxXC9q2xJ3Qa7EoLAwrojgFRajWZjxTKwbz9vnNnyi8lLjiD5h86UTPOcMlEyHjm6NMhjEDdlc25A==",
|
||||
"requires": {
|
||||
"jsdom": "^11.9.0"
|
||||
}
|
||||
},
|
||||
"turndown-plugin-gfm": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
|
||||
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="
|
||||
},
|
||||
"tweetnacl": {
|
||||
"version": "0.14.5",
|
||||
"resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz",
|
||||
|
@ -36,6 +36,8 @@
|
||||
"fs-extra": "^5.0.0",
|
||||
"html-entities": "^1.2.1",
|
||||
"html-minifier": "^3.5.15",
|
||||
"joplin-turndown": "^4.0.3",
|
||||
"joplin-turndown-plugin-gfm": "^1.0.2",
|
||||
"jssha": "^2.3.0",
|
||||
"levenshtein": "^1.0.5",
|
||||
"lodash": "^4.17.4",
|
||||
@ -60,8 +62,6 @@
|
||||
"tar": "^4.4.0",
|
||||
"tcp-port-used": "^0.1.2",
|
||||
"tkwidgets": "^0.5.26",
|
||||
"turndown": "^4.0.2",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"url-parse": "^1.2.0",
|
||||
"uuid": "^3.0.1",
|
||||
"valid-url": "^1.0.9",
|
||||
|
@ -24,7 +24,7 @@ describe('HtmlToMd', function() {
|
||||
done();
|
||||
});
|
||||
|
||||
it('should convert from Enex to Markdown', asyncTest(async () => {
|
||||
it('should convert from Html to Markdown', asyncTest(async () => {
|
||||
const basePath = __dirname + '/html_to_md';
|
||||
const files = await shim.fsDriver().readDirStats(basePath);
|
||||
const htmlToMd = new HtmlToMd();
|
||||
@ -36,7 +36,7 @@ describe('HtmlToMd', function() {
|
||||
const htmlPath = basePath + '/' + htmlFilename;
|
||||
const mdPath = basePath + '/' + filename(htmlFilename) + '.md';
|
||||
|
||||
if (htmlFilename !== 'table_no_header.html') continue;
|
||||
// if (htmlFilename !== 'anchor_with_newlines.html') continue;
|
||||
|
||||
const html = await shim.fsDriver().readFile(htmlPath);
|
||||
const expectedMd = await shim.fsDriver().readFile(mdPath);
|
||||
@ -47,6 +47,8 @@ describe('HtmlToMd', function() {
|
||||
console.info('');
|
||||
console.info('Error converting file: ' + htmlFilename);
|
||||
console.info('--------------------------------- Got:');
|
||||
console.info(actualMd);
|
||||
console.info('--------------------------------- Raw:');
|
||||
console.info(actualMd.split('\n'));
|
||||
console.info('--------------------------------- Expected:');
|
||||
console.info(expectedMd.split('\n'));
|
||||
|
1
CliClient/tests/html_to_md/anchor_with_inner_tags.html
Normal file
1
CliClient/tests/html_to_md/anchor_with_inner_tags.html
Normal file
@ -0,0 +1 @@
|
||||
<a href="https://joplin.cozic.net"><h1 id="joplin"><img class="title-icon" src="https://joplin.cozic.net/images/Icon512.png">oplin</h1></a>
|
1
CliClient/tests/html_to_md/anchor_with_inner_tags.md
Normal file
1
CliClient/tests/html_to_md/anchor_with_inner_tags.md
Normal file
@ -0,0 +1 @@
|
||||
[# ![](https://joplin.cozic.net/images/Icon512.png)oplin](https://joplin.cozic.net)
|
1
CliClient/tests/html_to_md/anchor_with_newlines.html
Normal file
1
CliClient/tests/html_to_md/anchor_with_newlines.html
Normal file
@ -0,0 +1 @@
|
||||
<a href="http://example.com"><p>That</p><p>Shouldn't be allowed</p></a>
|
1
CliClient/tests/html_to_md/anchor_with_newlines.md
Normal file
1
CliClient/tests/html_to_md/anchor_with_newlines.md
Normal file
@ -0,0 +1 @@
|
||||
[That<br>Shouldn't be allowed](http://example.com)
|
@ -1,4 +1,4 @@
|
||||
| | |
|
||||
| --- | --- |
|
||||
| No | header |
|
||||
| No | header |
|
||||
| And no | suprises |
|
10
CliClient/tests/html_to_md/table_with_empty_cells.html
Normal file
10
CliClient/tests/html_to_md/table_with_empty_cells.html
Normal file
@ -0,0 +1,10 @@
|
||||
<table>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td>Previous is empty</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Next is empty</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
4
CliClient/tests/html_to_md/table_with_empty_cells.md
Normal file
4
CliClient/tests/html_to_md/table_with_empty_cells.md
Normal file
@ -0,0 +1,4 @@
|
||||
| | |
|
||||
| --- | --- |
|
||||
| | Previous is empty |
|
||||
| Next is empty | |
|
13
CliClient/tests/html_to_md/table_with_empty_row.html
Normal file
13
CliClient/tests/html_to_md/table_with_empty_row.html
Normal file
@ -0,0 +1,13 @@
|
||||
<table>
|
||||
<tr>
|
||||
<td>One</td><td>Two</td>
|
||||
</tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<td>One</td><td>Two</td>
|
||||
</tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<td>One</td><td>Two</td>
|
||||
</tr>
|
||||
</table>
|
5
CliClient/tests/html_to_md/table_with_empty_row.md
Normal file
5
CliClient/tests/html_to_md/table_with_empty_row.md
Normal file
@ -0,0 +1,5 @@
|
||||
| | |
|
||||
| --- | --- |
|
||||
| One | Two |
|
||||
| One | Two |
|
||||
| One | Two |
|
6
CliClient/tests/html_to_md/table_with_newlines.html
Normal file
6
CliClient/tests/html_to_md/table_with_newlines.html
Normal file
@ -0,0 +1,6 @@
|
||||
<table>
|
||||
<tr>
|
||||
<td><p>Some paragraph</p><p>inside a table cell</p></td>
|
||||
<td>Second column</td>
|
||||
</tr>
|
||||
</table>
|
3
CliClient/tests/html_to_md/table_with_newlines.md
Normal file
3
CliClient/tests/html_to_md/table_with_newlines.md
Normal file
@ -0,0 +1,3 @@
|
||||
| | |
|
||||
| --- | --- |
|
||||
| Some paragraph<br><br>inside a table cell | Second column |
|
16
CliClient/tests/html_to_md/table_within_table.html
Normal file
16
CliClient/tests/html_to_md/table_within_table.html
Normal file
@ -0,0 +1,16 @@
|
||||
<!--
|
||||
The inner table is rendered but not the outer one.
|
||||
Basically if any table contains another table, it is rendered as plain text
|
||||
-->
|
||||
<table>
|
||||
<tr><td>
|
||||
First column, and an inner table:
|
||||
|
||||
<table>
|
||||
<tr><td>One</td><td>Two</td></tr>
|
||||
<tr><td>One</td><td>Two</td></tr>
|
||||
</table>
|
||||
</td>
|
||||
<td>Second column</td>
|
||||
</tr>
|
||||
</table>
|
8
CliClient/tests/html_to_md/table_within_table.md
Normal file
8
CliClient/tests/html_to_md/table_within_table.md
Normal file
@ -0,0 +1,8 @@
|
||||
First column, and an inner table:
|
||||
|
||||
| | |
|
||||
| --- | --- |
|
||||
| One | Two |
|
||||
| One | Two |
|
||||
|
||||
Second column
|
2227
ElectronClient/app/package-lock.json
generated
2227
ElectronClient/app/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -87,6 +87,8 @@
|
||||
"fs-extra": "^5.0.0",
|
||||
"highlight.js": "^9.12.0",
|
||||
"html-entities": "^1.2.1",
|
||||
"joplin-turndown": "^4.0.3",
|
||||
"joplin-turndown-plugin-gfm": "^1.0.5",
|
||||
"jssha": "^2.3.1",
|
||||
"katex": "^0.9.0-beta1",
|
||||
"levenshtein": "^1.0.5",
|
||||
@ -116,8 +118,6 @@
|
||||
"string-to-stream": "^1.1.0",
|
||||
"tar": "^4.4.0",
|
||||
"tcp-port-used": "^0.1.2",
|
||||
"turndown": "^4.0.2",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"url-parse": "^1.2.0",
|
||||
"uuid": "^3.1.0",
|
||||
"valid-url": "^1.0.9",
|
||||
|
@ -1,10 +1,12 @@
|
||||
const TurndownService = require('turndown')
|
||||
const TurndownService = require('joplin-turndown')
|
||||
|
||||
class HtmlToMd {
|
||||
|
||||
parse(html) {
|
||||
const turndownPluginGfm = require('turndown-plugin-gfm').gfm
|
||||
const turndown = new TurndownService()
|
||||
const turndownPluginGfm = require('joplin-turndown-plugin-gfm').gfm
|
||||
const turndown = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
})
|
||||
turndown.use(turndownPluginGfm)
|
||||
turndown.remove('script');
|
||||
let markdown = turndown.turndown(html)
|
||||
|
@ -2,5 +2,7 @@
|
||||
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
cd "$ROOT_DIR/CliClient/node_modules"
|
||||
rm -rf tkwidgets
|
||||
ln -s /mnt/d/Docs/PROGS/Node/tkwidgets/src tkwidgets
|
||||
rm -rf tkwidgets joplin-turndown joplin-turndown-plugin-gfm
|
||||
ln -s /mnt/d/Docs/PROGS/Node/tkwidgets/src tkwidgets
|
||||
ln -s /mnt/d/Temp/turndown-plugin-gfm joplin-turndown-plugin-gfm
|
||||
ln -s /mnt/d/Temp/turndown joplin-turndown
|
||||
|
Loading…
Reference in New Issue
Block a user