1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-11 18:24:43 +02:00

Clipper: Improved download of images and conversion to resources

This commit is contained in:
Laurent Cozic 2018-05-23 14:25:59 +01:00
parent 3c5eb99c59
commit a8da469523
14 changed files with 255 additions and 147 deletions

View File

@ -457,6 +457,11 @@
"iconv-lite": "~0.4.13" "iconv-lite": "~0.4.13"
} }
}, },
"es6-promise-pool": {
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/es6-promise-pool/-/es6-promise-pool-2.5.0.tgz",
"integrity": "sha1-FHxhKza0fxBQJ/nSv1SlmKmdnMs="
},
"escape-string-regexp": { "escape-string-regexp": {
"version": "1.0.5", "version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
@ -536,6 +541,11 @@
"format": "^0.2.2" "format": "^0.2.2"
} }
}, },
"file-type": {
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/file-type/-/file-type-4.4.0.tgz",
"integrity": "sha1-G2AOX8ofvcboDApwxxyNul95BsU="
},
"follow-redirects": { "follow-redirects": {
"version": "1.2.5", "version": "1.2.5",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.2.5.tgz", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.2.5.tgz",
@ -756,6 +766,14 @@
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.19.tgz", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.19.tgz",
"integrity": "sha512-oTZqweIP51xaGPI4uPa56/Pri/480R+mo7SeU+YETByQNhDG55ycFyNLIgta9vXhILrxXDmF7ZGhqZIcuN0gJQ==" "integrity": "sha512-oTZqweIP51xaGPI4uPa56/Pri/480R+mo7SeU+YETByQNhDG55ycFyNLIgta9vXhILrxXDmF7ZGhqZIcuN0gJQ=="
}, },
"image-type": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/image-type/-/image-type-3.0.0.tgz",
"integrity": "sha1-FQKvMTX5BuEiyHfDHpSve3qRRsU=",
"requires": {
"file-type": "^4.1.0"
}
},
"inflight": { "inflight": {
"version": "1.0.6", "version": "1.0.6",
"resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
@ -1361,6 +1379,15 @@
"resolved": "https://registry.npmjs.org/querystringify/-/querystringify-1.0.0.tgz", "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-1.0.0.tgz",
"integrity": "sha1-YoYkIRLFtxL6ZU5SZlK/ahP/Bcs=" "integrity": "sha1-YoYkIRLFtxL6ZU5SZlK/ahP/Bcs="
}, },
"read-chunk": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/read-chunk/-/read-chunk-2.1.0.tgz",
"integrity": "sha1-agTAkoAF7Z1C4aasVgDhnLx/9lU=",
"requires": {
"pify": "^3.0.0",
"safe-buffer": "^5.1.1"
}
},
"readable-stream": { "readable-stream": {
"version": "2.3.3", "version": "2.3.3",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.3.tgz", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.3.tgz",

View File

@ -31,11 +31,13 @@
"async-mutex": "^0.1.3", "async-mutex": "^0.1.3",
"base-64": "^0.1.0", "base-64": "^0.1.0",
"compare-version": "^0.1.2", "compare-version": "^0.1.2",
"es6-promise-pool": "^2.5.0",
"follow-redirects": "^1.2.4", "follow-redirects": "^1.2.4",
"form-data": "^2.1.4", "form-data": "^2.1.4",
"fs-extra": "^5.0.0", "fs-extra": "^5.0.0",
"html-entities": "^1.2.1", "html-entities": "^1.2.1",
"html-minifier": "^3.5.15", "html-minifier": "^3.5.15",
"image-type": "^3.0.0",
"joplin-turndown": "^4.0.3", "joplin-turndown": "^4.0.3",
"joplin-turndown-plugin-gfm": "^1.0.2", "joplin-turndown-plugin-gfm": "^1.0.2",
"jssha": "^2.3.0", "jssha": "^2.3.0",
@ -50,6 +52,7 @@
"promise": "^7.1.1", "promise": "^7.1.1",
"proper-lockfile": "^2.0.1", "proper-lockfile": "^2.0.1",
"query-string": "4.3.4", "query-string": "4.3.4",
"read-chunk": "^2.1.0",
"redux": "^3.7.2", "redux": "^3.7.2",
"sax": "^1.2.2", "sax": "^1.2.2",
"server-destroy": "^1.0.1", "server-destroy": "^1.0.1",

View File

@ -0,0 +1 @@
<a href="javascript:alert('js')">Some text</a>

View File

@ -0,0 +1 @@
[Some text]()

View File

@ -34,4 +34,21 @@ describe('markdownUtils', function() {
done(); done();
}); });
it('should extract image URLs', async (done) => {
const testCases = [
['![something](http://test.com/img.png)', ['http://test.com/img.png']],
['![something](http://test.com/img.png) ![something2](http://test.com/img2.png)', ['http://test.com/img.png', 'http://test.com/img2.png']],
['![something](http://test.com/img.png "Some description")', ['http://test.com/img.png']],
];
for (let i = 0; i < testCases.length; i++) {
const md = testCases[i][0];
const expected = testCases[i][1];
expect(markdownUtils.extractImageUrls(md).join('')).toBe(expected.join(''));
}
done();
});
}); });

View File

@ -67,7 +67,7 @@ class AppComponent extends Component {
let msg = ''; let msg = '';
if (operation.uploading) { if (operation.uploading) {
msg = 'Sending to Joplin...'; msg = 'Processing note... The note will be available in Joplin as soon as the web page and images have been downloaded and converted. In the meantime you may close this popup.';
} else if (operation.success) { } else if (operation.success) {
msg = 'Note was successfully created!'; msg = 'Note was successfully created!';
} else { } else {

View File

@ -2256,6 +2256,11 @@
"resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-4.1.1.tgz", "resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-4.1.1.tgz",
"integrity": "sha512-OaU1hHjgJf+b0NzsxCg7NdIYERD6Hy/PEmFLTjw+b65scuisG3Kt4QoTvJ66BBkPZ581gr0kpoVzKnxniM8nng==" "integrity": "sha512-OaU1hHjgJf+b0NzsxCg7NdIYERD6Hy/PEmFLTjw+b65scuisG3Kt4QoTvJ66BBkPZ581gr0kpoVzKnxniM8nng=="
}, },
"es6-promise-pool": {
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/es6-promise-pool/-/es6-promise-pool-2.5.0.tgz",
"integrity": "sha1-FHxhKza0fxBQJ/nSv1SlmKmdnMs="
},
"escape-string-regexp": { "escape-string-regexp": {
"version": "1.0.5", "version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
@ -2446,6 +2451,11 @@
"pend": "1.2.0" "pend": "1.2.0"
} }
}, },
"file-type": {
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/file-type/-/file-type-4.4.0.tgz",
"integrity": "sha1-G2AOX8ofvcboDApwxxyNul95BsU="
},
"filename-regex": { "filename-regex": {
"version": "2.0.1", "version": "2.0.1",
"resolved": "https://registry.npmjs.org/filename-regex/-/filename-regex-2.0.1.tgz", "resolved": "https://registry.npmjs.org/filename-regex/-/filename-regex-2.0.1.tgz",
@ -2612,8 +2622,8 @@
"dev": true, "dev": true,
"optional": true, "optional": true,
"requires": { "requires": {
"delegates": "^1.0.0", "delegates": "1.0.0",
"readable-stream": "^2.0.6" "readable-stream": "2.2.9"
} }
}, },
"asn1": { "asn1": {
@ -2834,10 +2844,10 @@
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"requires": { "requires": {
"graceful-fs": "^4.1.2", "graceful-fs": "4.1.11",
"inherits": "~2.0.0", "inherits": "2.0.3",
"mkdirp": ">=0.5 0", "mkdirp": "0.5.1",
"rimraf": "2" "rimraf": "2.6.1"
} }
}, },
"fstream-ignore": { "fstream-ignore": {
@ -2857,14 +2867,14 @@
"dev": true, "dev": true,
"optional": true, "optional": true,
"requires": { "requires": {
"aproba": "^1.0.3", "aproba": "1.1.1",
"console-control-strings": "^1.0.0", "console-control-strings": "1.1.0",
"has-unicode": "^2.0.0", "has-unicode": "2.0.1",
"object-assign": "^4.1.0", "object-assign": "4.1.1",
"signal-exit": "^3.0.0", "signal-exit": "3.0.2",
"string-width": "^1.0.1", "string-width": "1.0.2",
"strip-ansi": "^3.0.1", "strip-ansi": "3.0.1",
"wide-align": "^1.1.0" "wide-align": "1.1.2"
} }
}, },
"getpass": { "getpass": {
@ -2889,12 +2899,12 @@
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"requires": { "requires": {
"fs.realpath": "^1.0.0", "fs.realpath": "1.0.0",
"inflight": "^1.0.4", "inflight": "1.0.6",
"inherits": "2", "inherits": "2.0.3",
"minimatch": "^3.0.4", "minimatch": "3.0.4",
"once": "^1.3.0", "once": "1.4.0",
"path-is-absolute": "^1.0.0" "path-is-absolute": "1.0.1"
} }
}, },
"graceful-fs": { "graceful-fs": {
@ -2946,9 +2956,9 @@
"dev": true, "dev": true,
"optional": true, "optional": true,
"requires": { "requires": {
"assert-plus": "^0.2.0", "assert-plus": "0.2.0",
"jsprim": "^1.2.2", "jsprim": "1.4.0",
"sshpk": "^1.7.0" "sshpk": "1.13.0"
} }
}, },
"inflight": { "inflight": {
@ -2956,8 +2966,8 @@
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"requires": { "requires": {
"once": "^1.3.0", "once": "1.4.0",
"wrappy": "1" "wrappy": "1.0.2"
} }
}, },
"inherits": { "inherits": {
@ -3132,10 +3142,10 @@
"dev": true, "dev": true,
"optional": true, "optional": true,
"requires": { "requires": {
"are-we-there-yet": "~1.1.2", "are-we-there-yet": "1.1.4",
"console-control-strings": "~1.1.0", "console-control-strings": "1.1.0",
"gauge": "~2.7.3", "gauge": "2.7.4",
"set-blocking": "~2.0.0" "set-blocking": "2.0.0"
} }
}, },
"number-is-nan": { "number-is-nan": {
@ -3160,7 +3170,7 @@
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"requires": { "requires": {
"wrappy": "1" "wrappy": "1.0.2"
} }
}, },
"os-homedir": { "os-homedir": {
@ -3238,13 +3248,13 @@
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"requires": { "requires": {
"buffer-shims": "~1.0.0", "buffer-shims": "1.0.0",
"core-util-is": "~1.0.0", "core-util-is": "1.0.2",
"inherits": "~2.0.1", "inherits": "2.0.3",
"isarray": "~1.0.0", "isarray": "1.0.0",
"process-nextick-args": "~1.0.6", "process-nextick-args": "1.0.7",
"string_decoder": "~1.0.0", "string_decoder": "1.0.1",
"util-deprecate": "~1.0.1" "util-deprecate": "1.0.2"
} }
}, },
"request": { "request": {
@ -3253,28 +3263,28 @@
"dev": true, "dev": true,
"optional": true, "optional": true,
"requires": { "requires": {
"aws-sign2": "~0.6.0", "aws-sign2": "0.6.0",
"aws4": "^1.2.1", "aws4": "1.6.0",
"caseless": "~0.12.0", "caseless": "0.12.0",
"combined-stream": "~1.0.5", "combined-stream": "1.0.5",
"extend": "~3.0.0", "extend": "3.0.1",
"forever-agent": "~0.6.1", "forever-agent": "0.6.1",
"form-data": "~2.1.1", "form-data": "2.1.4",
"har-validator": "~4.2.1", "har-validator": "4.2.1",
"hawk": "~3.1.3", "hawk": "3.1.3",
"http-signature": "~1.1.0", "http-signature": "1.1.1",
"is-typedarray": "~1.0.0", "is-typedarray": "1.0.0",
"isstream": "~0.1.2", "isstream": "0.1.2",
"json-stringify-safe": "~5.0.1", "json-stringify-safe": "5.0.1",
"mime-types": "~2.1.7", "mime-types": "2.1.15",
"oauth-sign": "~0.8.1", "oauth-sign": "0.8.2",
"performance-now": "^0.2.0", "performance-now": "0.2.0",
"qs": "~6.4.0", "qs": "6.4.0",
"safe-buffer": "^5.0.1", "safe-buffer": "5.0.1",
"stringstream": "~0.0.4", "stringstream": "0.0.5",
"tough-cookie": "~2.3.0", "tough-cookie": "2.3.2",
"tunnel-agent": "^0.6.0", "tunnel-agent": "0.6.0",
"uuid": "^3.0.0" "uuid": "3.0.1"
} }
}, },
"rimraf": { "rimraf": {
@ -3282,7 +3292,7 @@
"bundled": true, "bundled": true,
"dev": true, "dev": true,
"requires": { "requires": {
"glob": "^7.0.5" "glob": "7.1.2"
} }
}, },
"safe-buffer": { "safe-buffer": {
@ -3461,7 +3471,7 @@
"dev": true, "dev": true,
"optional": true, "optional": true,
"requires": { "requires": {
"string-width": "^1.0.2" "string-width": "1.0.2"
} }
}, },
"wrappy": { "wrappy": {
@ -3726,6 +3736,14 @@
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.19.tgz", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.19.tgz",
"integrity": "sha512-oTZqweIP51xaGPI4uPa56/Pri/480R+mo7SeU+YETByQNhDG55ycFyNLIgta9vXhILrxXDmF7ZGhqZIcuN0gJQ==" "integrity": "sha512-oTZqweIP51xaGPI4uPa56/Pri/480R+mo7SeU+YETByQNhDG55ycFyNLIgta9vXhILrxXDmF7ZGhqZIcuN0gJQ=="
}, },
"image-type": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/image-type/-/image-type-3.0.0.tgz",
"integrity": "sha1-FQKvMTX5BuEiyHfDHpSve3qRRsU=",
"requires": {
"file-type": "4.4.0"
}
},
"import-lazy": { "import-lazy": {
"version": "2.1.0", "version": "2.1.0",
"resolved": "https://registry.npmjs.org/import-lazy/-/import-lazy-2.1.0.tgz", "resolved": "https://registry.npmjs.org/import-lazy/-/import-lazy-2.1.0.tgz",
@ -4009,9 +4027,9 @@
"integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=" "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo="
}, },
"joplin-turndown": { "joplin-turndown": {
"version": "4.0.3", "version": "4.0.4",
"resolved": "https://registry.npmjs.org/joplin-turndown/-/joplin-turndown-4.0.3.tgz", "resolved": "https://registry.npmjs.org/joplin-turndown/-/joplin-turndown-4.0.4.tgz",
"integrity": "sha512-WbAXje8wq4/ZLNtPDUFBEtG5zKEbz7Wth5N3vB4Nw7k+PUs3mMF49LVEPP7Kc6H4Ui671qdjpSShvdsmiLY2gA==", "integrity": "sha512-Qgi9DvLGT2r86yiQjKO83tvGYF9FabjVSnP6S9ts/+jaVWvwmBGhcGklFfMArlxY3doKZmdIPspiESsLcpn2Jg==",
"requires": { "requires": {
"jsdom": "11.10.0" "jsdom": "11.10.0"
} }
@ -4860,8 +4878,7 @@
"pify": { "pify": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz", "resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz",
"integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=", "integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY="
"dev": true
}, },
"pinkie": { "pinkie": {
"version": "2.0.4", "version": "2.0.4",
@ -5150,6 +5167,15 @@
"prop-types": "15.6.0" "prop-types": "15.6.0"
} }
}, },
"read-chunk": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/read-chunk/-/read-chunk-2.1.0.tgz",
"integrity": "sha1-agTAkoAF7Z1C4aasVgDhnLx/9lU=",
"requires": {
"pify": "3.0.0",
"safe-buffer": "5.1.1"
}
},
"read-config-file": { "read-config-file": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/read-config-file/-/read-config-file-3.0.0.tgz", "resolved": "https://registry.npmjs.org/read-config-file/-/read-config-file-3.0.0.tgz",

View File

@ -82,12 +82,14 @@
"electron-context-menu": "^0.9.1", "electron-context-menu": "^0.9.1",
"electron-is-dev": "^0.3.0", "electron-is-dev": "^0.3.0",
"electron-window-state": "^4.1.1", "electron-window-state": "^4.1.1",
"es6-promise-pool": "^2.5.0",
"follow-redirects": "^1.2.5", "follow-redirects": "^1.2.5",
"form-data": "^2.3.1", "form-data": "^2.3.1",
"fs-extra": "^5.0.0", "fs-extra": "^5.0.0",
"highlight.js": "^9.12.0", "highlight.js": "^9.12.0",
"html-entities": "^1.2.1", "html-entities": "^1.2.1",
"joplin-turndown": "^4.0.3", "image-type": "^3.0.0",
"joplin-turndown": "^4.0.4",
"joplin-turndown-plugin-gfm": "^1.0.5", "joplin-turndown-plugin-gfm": "^1.0.5",
"jssha": "^2.3.1", "jssha": "^2.3.1",
"katex": "^0.9.0-beta1", "katex": "^0.9.0-beta1",
@ -109,6 +111,7 @@
"react-datetime": "^2.11.0", "react-datetime": "^2.11.0",
"react-dom": "^16.0.0", "react-dom": "^16.0.0",
"react-redux": "^5.0.6", "react-redux": "^5.0.6",
"read-chunk": "^2.1.0",
"readability-node": "^0.1.0", "readability-node": "^0.1.0",
"redux": "^3.7.2", "redux": "^3.7.2",
"smalltalk": "^2.5.1", "smalltalk": "^2.5.1",

View File

@ -387,20 +387,21 @@ class BaseApplication {
} }
async testing() { async testing() {
const markdownUtils = require('lib/markdownUtils');
const ClipperServer = require('lib/ClipperServer'); const ClipperServer = require('lib/ClipperServer');
const server = new ClipperServer(); const server = new ClipperServer();
const HtmlToMd = require('lib/HtmlToMd'); const HtmlToMd = require('lib/HtmlToMd');
const service = new HtmlToMd(); const service = new HtmlToMd();
const html = await shim.fsDriver().readFile('/mnt/d/test.html'); const html = await shim.fsDriver().readFile('/mnt/d/test.html');
let markdown = service.parse(html); let markdown = service.parse(html, { baseUrl: 'https://duckduckgo.com/' });
console.info(markdown); console.info(markdown);
console.info('--------------------------------------------------'); console.info('--------------------------------------------------');
const imageUrls = server.extractImageUrls(markdown); const imageUrls = markdownUtils.extractImageUrls(markdown);
let result = await server.downloadImages(imageUrls); let result = await server.downloadImages_(imageUrls);
result = await server.createResourcesFromPaths(result); result = await server.createResourcesFromPaths_(result);
console.info(result); console.info(result);
markdown = server.replaceImageUrlByResources(markdown, result); markdown = server.replaceImageUrlsByResources_(markdown, result);
console.info('--------------------------------------------------'); console.info('--------------------------------------------------');
console.info(markdown); console.info(markdown);
console.info('--------------------------------------------------'); console.info('--------------------------------------------------');
@ -492,7 +493,11 @@ class BaseApplication {
// await this.testing();process.exit(); // await this.testing();process.exit();
const clipperLogger = new Logger();
clipperLogger.addTarget('file', { path: profileDir + '/log-clipper.txt' });
clipperLogger.addTarget('console');
this.clipperServer_ = new ClipperServer(); this.clipperServer_ = new ClipperServer();
this.clipperServer_.setLogger(clipperLogger);
this.clipperServer_.start(); this.clipperServer_.start();
return argv; return argv;

View File

@ -6,9 +6,10 @@ const Resource = require('lib/models/Resource');
const Setting = require('lib/models/Setting'); const Setting = require('lib/models/Setting');
const { shim } = require('lib/shim'); const { shim } = require('lib/shim');
const md5 = require('md5'); const md5 = require('md5');
const { fileExtension, safeFileExtension, filename } = require('lib/path-utils'); const { fileExtension, safeFileExtension, safeFilename, filename } = require('lib/path-utils');
const HtmlToMd = require('lib/HtmlToMd'); const HtmlToMd = require('lib/HtmlToMd');
const { Logger } = require('lib/logger.js'); const { Logger } = require('lib/logger.js');
const markdownUtils = require('lib/markdownUtils');
class ClipperServer { class ClipperServer {
@ -37,8 +38,6 @@ class ClipperServer {
}; };
if (requestNote.bodyHtml) { if (requestNote.bodyHtml) {
console.info(requestNote.bodyHtml);
// Parsing will not work if the HTML is not wrapped in a top level tag, which is not guaranteed // Parsing will not work if the HTML is not wrapped in a top level tag, which is not guaranteed
// when getting the content from elsewhere. So here wrap it - it won't change anything to the final // when getting the content from elsewhere. So here wrap it - it won't change anything to the final
// rendering but it makes sure everything will be parsed. // rendering but it makes sure everything will be parsed.
@ -58,39 +57,45 @@ class ClipperServer {
return output; return output;
} }
extractImageUrls_(md) { async downloadImage_(url) {
// ![some text](http://path/to/image) const tempDir = Setting.value('tempDir');
const regex = new RegExp(/!\[.*?\]\((http[s]?:\/\/.*?)\)/, 'g') const name = filename(url);
let match = regex.exec(md); let fileExt = safeFileExtension(fileExtension(url).toLowerCase());
const output = []; if (fileExt) fileExt = '.' + fileExt;
while (match) { let imagePath = tempDir + '/' + safeFilename(name) + fileExt;
const url = match[1]; if (await shim.fsDriver().exists(imagePath)) imagePath = tempDir + '/' + safeFilename(name) + '_' + md5(Math.random() + '_' + Date.now()).substr(0,10) + fileExt;
if (output.indexOf(url) < 0) output.push(url);
match = regex.exec(md); try {
const result = await shim.fetchBlob(url, { path: imagePath });
return imagePath;
} catch (error) {
this.logger().warn('Cannot download image at ' + url, error);
return '';
} }
return output;
} }
async downloadImages_(urls) { async downloadImages_(urls) {
const tempDir = Setting.value('tempDir'); const PromisePool = require('es6-promise-pool')
const output = {}; const output = {};
for (let i = 0; i < urls.length; i++) { let urlIndex = 0;
const url = urls[i]; const promiseProducer = () => {
const name = filename(url); if (urlIndex >= urls.length) return null;
let fileExt = safeFileExtension(fileExtension(url).toLowerCase());
if (fileExt) fileExt = '.' + fileExt;
let imagePath = tempDir + '/' + name + fileExt;
if (await shim.fsDriver().exists(imagePath)) imagePath = tempDir + '/' + name + '_' + md5(Math.random() + '_' + Date.now()).substr(0,10) + fileExt;
try { const url = urls[urlIndex++];
const result = await shim.fetchBlob(url, { path: imagePath });
output[url] = { path: imagePath }; return new Promise(async (resolve, reject) => {
} catch (error) { const imagePath = await this.downloadImage_(url);
this.logger().warn('ClipperServer: Cannot download image at ' + url, error); if (imagePath) output[url] = { path: imagePath };
} resolve();
});
} }
const concurrency = 3
const pool = new PromisePool(promiseProducer, concurrency)
await pool.start()
return output; return output;
} }
@ -102,16 +107,28 @@ class ClipperServer {
const resource = await shim.createResourceFromPath(urlInfo.path); const resource = await shim.createResourceFromPath(urlInfo.path);
urlInfo.resource = resource; urlInfo.resource = resource;
} catch (error) { } catch (error) {
this.logger().warn('ClipperServer: Cannot create resource for ' + url, error); this.logger().warn('Cannot create resource for ' + url, error);
} }
} }
return urls; return urls;
} }
async removeTempFiles_(urls) {
for (let url in urls) {
if (!urls.hasOwnProperty(url)) continue;
const urlInfo = urls[url];
try {
await shim.fsDriver().remove(urlInfo.path);
} catch (error) {
this.logger().warn('Cannot remove ' + urlInfo.path, error);
}
}
}
replaceImageUrlsByResources_(md, urls) { replaceImageUrlsByResources_(md, urls) {
let output = md.replace(/(!\[.*?\]\()(http[s]?:\/\/.*?)(\))/g, (match, before, imageUrl, after) => { let output = md.replace(/(!\[.*?\]\()([^\s\)]+)(.*?\))/g, (match, before, imageUrl, after) => {
const urlInfo = urls[imageUrl]; const urlInfo = urls[imageUrl];
if (!urlInfo || !urlInfo.resource) return imageUrl; if (!urlInfo || !urlInfo.resource) return before + imageUrl + after;
const resourceUrl = Resource.internalUrl(urlInfo.resource); const resourceUrl = Resource.internalUrl(urlInfo.resource);
return before + resourceUrl + after; return before + resourceUrl + after;
}); });
@ -121,7 +138,10 @@ class ClipperServer {
async start() { async start() {
const port = await netUtils.findAvailablePort([9967, 8967, 8867], 0); // TODO: Make it shared with OneDrive server const port = await netUtils.findAvailablePort([9967, 8967, 8867], 0); // TODO: Make it shared with OneDrive server
if (!port) throw new Error('All potential ports are in use or not available.'); if (!port) {
this.logger().error('All potential ports are in use or not available.');
return;
}
const server = require('http').createServer(); const server = require('http').createServer();
@ -142,7 +162,8 @@ class ClipperServer {
response.end(); response.end();
} }
console.info('GOT REQUEST', request.method + ' ' + request.url); const requestId = Date.now();
this.logger().info('Request (' + requestId + '): ' + request.method + ' ' + request.url);
if (request.method === 'POST') { if (request.method === 'POST') {
const url = urlParser.parse(request.url, true); const url = urlParser.parse(request.url, true);
@ -159,19 +180,17 @@ class ClipperServer {
const requestNote = JSON.parse(body); const requestNote = JSON.parse(body);
let note = await this.requestNoteToNote(requestNote); let note = await this.requestNoteToNote(requestNote);
// TODO: Provide way to check status (importing image x/y) const imageUrls = markdownUtils.extractImageUrls(note.body);
// TODO: Delete temp file after import
// TODO: Download multiple images at once
const imageUrls = this.extractImageUrls_(note.body);
let result = await this.downloadImages_(imageUrls); let result = await this.downloadImages_(imageUrls);
result = await this.createResourcesFromPaths_(result); result = await this.createResourcesFromPaths_(result);
await this.removeTempFiles_(result);
note.body = this.replaceImageUrlsByResources_(note.body, result); note.body = this.replaceImageUrlsByResources_(note.body, result);
note = await Note.save(note); note = await Note.save(note);
this.logger().info('Request (' + requestId + '): Created note ' + note.id);
return writeResponseJson(200, note); return writeResponseJson(200, note);
} catch (error) { } catch (error) {
console.warn(error); this.logger().error(error);
return writeResponseJson(400, { errorCode: 'exception', errorMessage: error.message }); return writeResponseJson(400, { errorCode: 'exception', errorMessage: error.message });
} }
}); });
@ -190,7 +209,7 @@ class ClipperServer {
}); });
console.info('Starting Clipper server on port ' + port); this.logger().info('Starting Clipper server on port ' + port);
server.listen(port); server.listen(port);
} }

View File

@ -7,7 +7,7 @@ class Logger {
constructor() { constructor() {
this.targets_ = []; this.targets_ = [];
this.level_ = Logger.LEVEL_ERROR; this.level_ = Logger.LEVEL_INFO;
this.fileAppendQueue_ = [] this.fileAppendQueue_ = []
this.lastDbCleanup_ = time.unixMs(); this.lastDbCleanup_ = time.unixMs();
} }

View File

@ -19,6 +19,19 @@ const markdownUtils = {
}); });
}, },
extractImageUrls(md) {
// ![some text](http://path/to/image)
const regex = new RegExp(/!\[.*?\]\(([^\s\)]+).*?\)/, 'g')
let match = regex.exec(md);
const output = [];
while (match) {
const url = match[1];
if (output.indexOf(url) < 0) output.push(url);
match = regex.exec(md);
}
return output;
},
}; };
module.exports = markdownUtils; module.exports = markdownUtils;

View File

@ -40,6 +40,12 @@ function safeFileExtension(e) {
return e.replace(/[^a-zA-Z0-9]/g, '') return e.replace(/[^a-zA-Z0-9]/g, '')
} }
function safeFilename(e, maxLength = 32) {
if (!e || !e.replace) return '';
let output = e.replace(/[^a-zA-Z0-9\-_\(\)\.]/g, '_')
return output.substr(0, maxLength);
}
function toSystemSlashes(path, os = null) { function toSystemSlashes(path, os = null) {
if (os === null) os = process.platform; if (os === null) os = process.platform;
if (os === 'win32') return path.replace(/\//g, "\\"); if (os === 'win32') return path.replace(/\//g, "\\");
@ -54,4 +60,4 @@ function ltrimSlashes(path) {
return path.replace(/^\/+/, ''); return path.replace(/^\/+/, '');
} }
module.exports = { basename, dirname, filename, isHidden, fileExtension, safeFileExtension, toSystemSlashes, rtrimSlashes, ltrimSlashes }; module.exports = { basename, dirname, filename, isHidden, fileExtension, safeFilename, safeFileExtension, toSystemSlashes, rtrimSlashes, ltrimSlashes };

View File

@ -97,6 +97,9 @@ function shimInit() {
} }
shim.createResourceFromPath = async function(filePath) { shim.createResourceFromPath = async function(filePath) {
const readChunk = require('read-chunk');
const imageType = require('image-type');
const Resource = require('lib/models/Resource.js'); const Resource = require('lib/models/Resource.js');
const { uuid } = require('lib/uuid.js'); const { uuid } = require('lib/uuid.js');
const { basename, fileExtension, safeFileExtension } = require('lib/path-utils.js'); const { basename, fileExtension, safeFileExtension } = require('lib/path-utils.js');
@ -109,9 +112,22 @@ function shimInit() {
resource.id = uuid.create(); resource.id = uuid.create();
resource.mime = mime.getType(filePath); resource.mime = mime.getType(filePath);
resource.title = basename(filePath); resource.title = basename(filePath);
resource.file_extension = safeFileExtension(fileExtension(filePath));
if (!resource.mime) resource.mime = 'application/octet-stream'; let fileExt = safeFileExtension(fileExtension(filePath));
if (!resource.mime) {
const buffer = await readChunk(filePath, 0, 64);
const detectedType = imageType(buffer);
if (detectedType) {
fileExt = detectedType.ext;
resource.mime = detectedType.mime;
} else {
resource.mime = 'application/octet-stream';
}
}
resource.file_extension = fileExt;
let targetPath = Resource.fullPath(resource); let targetPath = Resource.fullPath(resource);
@ -130,35 +146,6 @@ function shimInit() {
} }
shim.attachFileToNote = async function(note, filePath, position = null) { shim.attachFileToNote = async function(note, filePath, position = null) {
// const Resource = require('lib/models/Resource.js');
// const { uuid } = require('lib/uuid.js');
// const { basename, fileExtension, safeFileExtension } = require('lib/path-utils.js');
// const mime = require('mime/lite');
// const Note = require('lib/models/Note.js');
// if (!(await fs.pathExists(filePath))) throw new Error(_('Cannot access %s', filePath));
// let resource = Resource.new();
// resource.id = uuid.create();
// resource.mime = mime.getType(filePath);
// resource.title = basename(filePath);
// resource.file_extension = safeFileExtension(fileExtension(filePath));
// if (!resource.mime) resource.mime = 'application/octet-stream';
// let targetPath = Resource.fullPath(resource);
// if (resource.mime == 'image/jpeg' || resource.mime == 'image/jpg' || resource.mime == 'image/png') {
// const result = await resizeImage_(filePath, targetPath, resource.mime);
// } else {
// const stat = await shim.fsDriver().stat(filePath);
// if (stat.size >= 10000000) throw new Error('Resources larger than 10 MB are not currently supported as they may crash the mobile applications. The issue is being investigated and will be fixed at a later time.');
// await fs.copy(filePath, targetPath, { overwrite: true });
// }
// await Resource.save(resource, { isNew: true });
const resource = shim.createResourceFromPath(filePath); const resource = shim.createResourceFromPath(filePath);
const newBody = []; const newBody = [];