1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-11-26 22:41:17 +02:00

Clipper: Resolves #6247: Clipper unable to pull and store PDFs (#6384)

This commit is contained in:
asrient
2022-06-20 18:26:54 +05:30
committed by GitHub
parent 0c50a5ab9b
commit c0bc4c38c3
7 changed files with 217 additions and 35 deletions

View File

@@ -69,7 +69,7 @@ const markdownUtils = {
},
// Returns the **encoded** URLs, so to be useful they should be decoded again before use.
extractFileUrls(md: string, onlyImage: boolean = false): Array<string> {
extractFileUrls(md: string, onlyType: string = null): Array<string> {
const markdownIt = new MarkdownIt();
markdownIt.validateLink = validateLinks; // Necessary to support file:/// links
@@ -77,10 +77,16 @@ const markdownUtils = {
const tokens = markdownIt.parse(md, env);
const output: string[] = [];
let linkType = onlyType;
if (linkType === 'pdf') linkType = 'link_open';
const searchUrls = (tokens: any[]) => {
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
if ((onlyImage === true && token.type === 'image') || (onlyImage === false && (token.type === 'image' || token.type === 'link_open'))) {
if ((!onlyType && (token.type === 'link_open' || token.type === 'image')) || (!!onlyType && token.type === onlyType) || (onlyType == 'pdf' && token.type === 'link_open')) {
// Pdf embeds are a special case, they are represented as 'link_open' tokens but are marked with 'embedded_pdf' as link name by the parser
// We are making sure if its in the proper pdf link format, only then we add it to the list
if (onlyType === 'pdf' && !(tokens.length > i + 1 && tokens[i + 1].type === 'text' && tokens[i + 1].content === 'embedded_pdf')) continue;
for (let j = 0; j < token.attrs.length; j++) {
const a = token.attrs[j];
if ((a[0] === 'src' || a[0] === 'href') && a.length >= 2 && a[1]) {
@@ -107,7 +113,11 @@ const markdownUtils = {
},
extractImageUrls(md: string) {
return markdownUtils.extractFileUrls(md,true);
return markdownUtils.extractFileUrls(md, 'image');
},
extractPdfUrls(md: string) {
return markdownUtils.extractFileUrls(md, 'pdf');
},
// The match results has 5 items