1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-21 09:38:01 +02:00

Chore: Move useful clipper logic to the lib package to be used in other places (#9053)

This commit is contained in:
pedr 2023-10-13 11:31:13 -03:00 committed by GitHub
parent b1e1db7831
commit 5733017637
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 275 additions and 126 deletions

View File

@ -575,6 +575,7 @@ packages/lib/WelcomeUtils.js
packages/lib/array.js
packages/lib/callbackUrlUtils.test.js
packages/lib/callbackUrlUtils.js
packages/lib/clipperUtils.js
packages/lib/commands/historyBackward.js
packages/lib/commands/historyForward.js
packages/lib/commands/index.js

1
.gitignore vendored
View File

@ -561,6 +561,7 @@ packages/lib/WelcomeUtils.js
packages/lib/array.js
packages/lib/callbackUrlUtils.test.js
packages/lib/callbackUrlUtils.js
packages/lib/clipperUtils.js
packages/lib/commands/historyBackward.js
packages/lib/commands/historyForward.js
packages/lib/commands/index.js

View File

@ -0,0 +1,114 @@
'use strict';
Object.defineProperty(exports, '__esModule', { value: true });
exports.getStyleSheets = exports.getImageSizes = void 0;
function absoluteUrl(url) {
if (!url) { return url; }
const protocol = url.toLowerCase().split(':')[0];
if (['http', 'https', 'file', 'data'].indexOf(protocol) >= 0) { return url; }
if (url.indexOf('//') === 0) {
return location.protocol + url;
} else if (url[0] === '/') {
return `${location.protocol}//${location.host}${url}`;
} else {
return `${baseUrl()}/${url}`;
}
}
function pageLocationOrigin() {
// location.origin normally returns the protocol + domain + port (eg. https://example.com:8080)
// but for file:// protocol this is browser dependant and in particular Firefox returns "null"
// in this case.
if (location.protocol === 'file:') {
return 'file://';
} else {
return location.origin;
}
}
function baseUrl() {
let output = pageLocationOrigin() + location.pathname;
if (output[output.length - 1] !== '/') {
const output2 = output.split('/');
output2.pop();
output = output2.join('/');
}
return output;
}
function getJoplinClipperSvgClassName(svg) {
for (const className of svg.classList) {
if (className.indexOf('joplin-clipper-svg-') === 0) { return className; }
}
return '';
}
function getImageSizes(element, forceAbsoluteUrls = false) {
const output = {};
const images = element.getElementsByTagName('img');
for (let i = 0; i < images.length; i++) {
const img = images[i];
if (img.classList && img.classList.contains('joplin-clipper-hidden')) { continue; }
let src = imageSrc(img);
src = forceAbsoluteUrls ? absoluteUrl(src) : src;
if (!output[src]) { output[src] = []; }
output[src].push({
width: img.width,
height: img.height,
naturalWidth: img.naturalWidth,
naturalHeight: img.naturalHeight,
});
}
const svgs = element.getElementsByTagName('svg');
for (let i = 0; i < svgs.length; i++) {
const svg = svgs[i];
if (svg.classList && svg.classList.contains('joplin-clipper-hidden')) { continue; }
const className = getJoplinClipperSvgClassName(svg); // 'joplin-clipper-svg-' + i;
if (!className) {
console.warn('SVG without a Joplin class:', svg);
continue;
}
if (!svg.classList.contains(className)) {
svg.classList.add(className);
}
const rect = svg.getBoundingClientRect();
if (!output[className]) { output[className] = []; }
output[className].push({
width: rect.width,
height: rect.height,
});
}
return output;
}
exports.getImageSizes = getImageSizes;
// In general we should use currentSrc because that's the image that's currently displayed,
// especially within <picture> tags or with srcset. In these cases there can be multiple
// sources and the best one is probably the one being displayed, thus currentSrc.
function imageSrc(image) {
if (image.currentSrc) { return image.currentSrc; }
return image.src;
}
// Given a document, return a <style> tag that contains all the styles
// required to render the page. Not currently used but could be as an
// option to clip pages as HTML.
// eslint-disable-next-line
function getStyleSheets(doc) {
const output = [];
for (let i = 0; i < doc.styleSheets.length; i++) {
const sheet = doc.styleSheets[i];
try {
for (const cssRule of sheet.cssRules) {
output.push({ type: 'text', value: cssRule.cssText });
}
} catch (error) {
// Calling sheet.cssRules will throw a CORS error on Chrome if the stylesheet is on a different domain.
// In that case, we skip it and add it to the list of stylesheet URLs. These URls will be downloaded
// by the desktop application, since it doesn't have CORS restrictions.
// eslint-disable-next-line
console.info('Could not retrieve stylesheet now:', sheet.href);
// eslint-disable-next-line
console.info('It will downloaded by the main application.');
// eslint-disable-next-line
console.info(error);
output.push({ type: 'url', value: sheet.href });
}
}
return output;
}
exports.getStyleSheets = getStyleSheets;
// # sourceMappingURL=clipperUtils.js.map

View File

@ -20,20 +20,6 @@
browserSupportsPromises_ = false;
}
function absoluteUrl(url) {
if (!url) return url;
const protocol = url.toLowerCase().split(':')[0];
if (['http', 'https', 'file', 'data'].indexOf(protocol) >= 0) return url;
if (url.indexOf('//') === 0) {
return location.protocol + url;
} else if (url[0] === '/') {
return `${location.protocol}//${location.host}${url}`;
} else {
return `${baseUrl()}/${url}`;
}
}
function escapeHtml(s) {
return s
.replace(/&/g, '&amp;')
@ -49,85 +35,6 @@
return document.title.trim();
}
function pageLocationOrigin() {
// location.origin normally returns the protocol + domain + port (eg. https://example.com:8080)
// but for file:// protocol this is browser dependant and in particular Firefox returns "null"
// in this case.
if (location.protocol === 'file:') {
return 'file://';
} else {
return location.origin;
}
}
function baseUrl() {
let output = pageLocationOrigin() + location.pathname;
if (output[output.length - 1] !== '/') {
output = output.split('/');
output.pop();
output = output.join('/');
}
return output;
}
function getJoplinClipperSvgClassName(svg) {
for (const className of svg.classList) {
if (className.indexOf('joplin-clipper-svg-') === 0) return className;
}
return '';
}
function getImageSizes(element, forceAbsoluteUrls = false) {
const output = {};
const images = element.getElementsByTagName('img');
for (let i = 0; i < images.length; i++) {
const img = images[i];
if (img.classList && img.classList.contains('joplin-clipper-hidden')) continue;
let src = imageSrc(img);
src = forceAbsoluteUrls ? absoluteUrl(src) : src;
if (!output[src]) output[src] = [];
output[src].push({
width: img.width,
height: img.height,
naturalWidth: img.naturalWidth,
naturalHeight: img.naturalHeight,
});
}
const svgs = element.getElementsByTagName('svg');
for (let i = 0; i < svgs.length; i++) {
const svg = svgs[i];
if (svg.classList && svg.classList.contains('joplin-clipper-hidden')) continue;
const className = getJoplinClipperSvgClassName(svg);// 'joplin-clipper-svg-' + i;
if (!className) {
console.warn('SVG without a Joplin class:', svg);
continue;
}
if (!svg.classList.contains(className)) {
svg.classList.add(className);
}
const rect = svg.getBoundingClientRect();
if (!output[className]) output[className] = [];
output[className].push({
width: rect.width,
height: rect.height,
});
}
return output;
}
function getAnchorNames(element) {
const output = [];
// Anchor names are normally in A tags but can be in SPAN too
@ -146,14 +53,6 @@
return output;
}
// In general we should use currentSrc because that's the image that's currently displayed,
// especially within <picture> tags or with srcset. In these cases there can be multiple
// sources and the best one is probably the one being displayed, thus currentSrc.
function imageSrc(image) {
if (image.currentSrc) return image.currentSrc;
return image.src;
}
// Cleans up element by removing all its invisible children (which we don't want to render as Markdown)
// And hard-code the image dimensions so that the information can be used by the clipper server to
// display them at the right sizes in the notes.
@ -181,6 +80,7 @@
}
if (nodeName === 'img') {
// eslint-disable-next-line no-undef
const src = absoluteUrl(imageSrc(node));
node.setAttribute('src', src);
if (!(src in imageIndexes)) imageIndexes[src] = 0;
@ -199,6 +99,7 @@
}
if (nodeName === 'svg') {
// eslint-disable-next-line no-undef
const className = getJoplinClipperSvgClassName(node);
if (!(className in imageIndexes)) imageIndexes[className] = 0;
@ -216,11 +117,13 @@
}
if (nodeName === 'embed') {
// eslint-disable-next-line no-undef
const src = absoluteUrl(node.src);
node.setAttribute('src', src);
}
if (nodeName === 'object') {
// eslint-disable-next-line no-undef
const data = absoluteUrl(node.data);
node.setAttribute('data', data);
}
@ -300,6 +203,7 @@
let svgId = 0;
for (const svg of svgs) {
// eslint-disable-next-line no-undef
if (!getJoplinClipperSvgClassName(svg)) {
svg.classList.add(`joplin-clipper-svg-${svgId}`);
svgId++;
@ -307,30 +211,6 @@
}
}
// Given a document, return a <style> tag that contains all the styles
// required to render the page. Not currently used but could be as an
// option to clip pages as HTML.
function getStyleSheets(doc) {
const output = [];
for (let i = 0; i < doc.styleSheets.length; i++) {
const sheet = doc.styleSheets[i];
try {
for (const cssRule of sheet.cssRules) {
output.push({ type: 'text', value: cssRule.cssText });
}
} catch (error) {
// Calling sheet.cssRules will throw a CORS error on Chrome if the stylesheet is on a different domain.
// In that case, we skip it and add it to the list of stylesheet URLs. These URls will be downloaded
// by the desktop application, since it doesn't have CORS restrictions.
console.info('Could not retrieve stylesheet now:', sheet.href);
console.info('It will downloaded by the main application.');
console.info(error);
output.push({ type: 'url', value: sheet.href });
}
}
return output;
}
function documentForReadability() {
// Readability directly change the passed document so clone it so as
// to preserve the original web page.
@ -372,7 +252,9 @@
name: shouldSendToJoplin ? 'sendContentToJoplin' : 'clippedContent',
title: title,
html: html,
// eslint-disable-next-line no-undef
base_url: baseUrl(),
// eslint-disable-next-line no-undef
url: pageLocationOrigin() + location.pathname + location.search,
parent_id: command.parent_id,
tags: command.tags || '',
@ -397,6 +279,7 @@
response.warning = 'Could not retrieve simplified version of page - full page has been saved instead.';
return response;
}
// eslint-disable-next-line no-undef
return clippedContentResponse(article.title, article.body, getImageSizes(document), getAnchorNames(document));
} else if (command.name === 'isProbablyReaderable') {
@ -408,6 +291,7 @@
} else if (command.name === 'completePageHtml') {
if (isPagePdf()) {
// eslint-disable-next-line no-undef
return clippedContentResponse(pageTitle(), embedPageUrl(), getImageSizes(document), getAnchorNames(document));
}
@ -417,10 +301,12 @@
// Because cleanUpElement is going to modify the DOM and remove elements we don't want to work
// directly on the document, so we make a copy of it first.
const cleanDocument = document.body.cloneNode(true);
// eslint-disable-next-line no-undef
const imageSizes = getImageSizes(document, true);
const imageIndexes = {};
cleanUpElement(convertToMarkup, cleanDocument, imageSizes, imageIndexes);
// eslint-disable-next-line no-undef
const stylesheets = convertToMarkup === 'html' ? getStyleSheets(document) : null;
// The <BODY> tag may have a style in the CSS stylesheets. This
@ -462,9 +348,11 @@
container.appendChild(range.cloneContents());
}
// eslint-disable-next-line no-undef
const imageSizes = getImageSizes(document, true);
const imageIndexes = {};
cleanUpElement(convertToMarkup, container, imageSizes, imageIndexes);
// eslint-disable-next-line no-undef
return clippedContentResponse(pageTitle(), container.innerHTML, getImageSizes(document), getAnchorNames(document));
} else if (command.name === 'screenshot') {
@ -567,6 +455,7 @@
const content = {
title: pageTitle(),
crop_rect: selectionArea,
// eslint-disable-next-line no-undef
url: pageLocationOrigin() + location.pathname + location.search,
parent_id: command.parent_id,
tags: command.tags,
@ -591,7 +480,9 @@
} else if (command.name === 'pageUrl') {
// eslint-disable-next-line no-undef
const url = pageLocationOrigin() + location.pathname + location.search;
// eslint-disable-next-line no-undef
return clippedContentResponse(pageTitle(), url, getImageSizes(document), getAnchorNames(document));
} else {

View File

@ -1,7 +1,7 @@
{
"manifest_version": 2,
"name": "Joplin Web Clipper [DEV]",
"version": "2.13.0",
"version": "2.13.1",
"description": "Capture and save web pages and screenshots from your browser to Joplin.",
"homepage_url": "https://joplinapp.org",
"content_security_policy": "script-src 'self'; object-src 'self'",

View File

@ -1,6 +1,8 @@
const fs = require('fs-extra');
const sourcePath = `${__dirname}/../../lib/randomClipperPort.js`;
const clipperUtilsPath = `${__dirname}/../../lib/clipperUtils.js`;
// Mozilla insists on building the clipper from a tarball, not from the repository
// so we add this check and only copy the file if it's present. Normally it rarely
@ -10,6 +12,10 @@ if (fs.pathExistsSync(sourcePath)) {
fs.copySync(sourcePath, `${__dirname}/src/randomClipperPort.js`);
}
if (fs.pathExistsSync(clipperUtilsPath)) {
fs.copySync(clipperUtilsPath, `${__dirname}/../content_scripts/clipperUtils.js`);
}
// These files give warnings when loading the extension in Chrome, in dev mode
fs.removeSync(`${__dirname}/node_modules/public-encrypt/test/test_key.pem`);
fs.removeSync(`${__dirname}/node_modules/public-encrypt/test/test_rsa_pubkey.pem`);

View File

@ -182,6 +182,7 @@ class AppComponent extends Component {
await bridge().tabsExecuteScript({ file: '/content_scripts/JSDOMParser.js' });
await bridge().tabsExecuteScript({ file: '/content_scripts/Readability.js' });
await bridge().tabsExecuteScript({ file: '/content_scripts/Readability-readerable.js' });
await bridge().tabsExecuteScript({ file: '/content_scripts/clipperUtils.js' });
await bridge().tabsExecuteScript({ file: '/content_scripts/index.js' });
}

View File

@ -0,0 +1,135 @@
function absoluteUrl(url: string) {
if (!url) return url;
const protocol = url.toLowerCase().split(':')[0];
if (['http', 'https', 'file', 'data'].indexOf(protocol) >= 0) return url;
if (url.indexOf('//') === 0) {
return location.protocol + url;
} else if (url[0] === '/') {
return `${location.protocol}//${location.host}${url}`;
} else {
return `${baseUrl()}/${url}`;
}
}
function pageLocationOrigin() {
// location.origin normally returns the protocol + domain + port (eg. https://example.com:8080)
// but for file:// protocol this is browser dependant and in particular Firefox returns "null"
// in this case.
if (location.protocol === 'file:') {
return 'file://';
} else {
return location.origin;
}
}
function baseUrl() {
let output = pageLocationOrigin() + location.pathname;
if (output[output.length - 1] !== '/') {
const output2 = output.split('/');
output2.pop();
output = output2.join('/');
}
return output;
}
function getJoplinClipperSvgClassName(svg: SVGSVGElement) {
for (const className of svg.classList) {
if (className.indexOf('joplin-clipper-svg-') === 0) return className;
}
return '';
}
type ImageObject = {
width: number;
height: number;
naturalWidth?: number;
naturalHeight?: number;
};
export function getImageSizes(element: HTMLElement, forceAbsoluteUrls = false) {
const output: Record<string, ImageObject[]> = {};
const images = element.getElementsByTagName('img');
for (let i = 0; i < images.length; i++) {
const img = images[i];
if (img.classList && img.classList.contains('joplin-clipper-hidden')) continue;
let src = imageSrc(img);
src = forceAbsoluteUrls ? absoluteUrl(src) : src;
if (!output[src]) output[src] = [];
output[src].push({
width: img.width,
height: img.height,
naturalWidth: img.naturalWidth,
naturalHeight: img.naturalHeight,
});
}
const svgs = element.getElementsByTagName('svg');
for (let i = 0; i < svgs.length; i++) {
const svg = svgs[i];
if (svg.classList && svg.classList.contains('joplin-clipper-hidden')) continue;
const className = getJoplinClipperSvgClassName(svg);// 'joplin-clipper-svg-' + i;
if (!className) {
console.warn('SVG without a Joplin class:', svg);
continue;
}
if (!svg.classList.contains(className)) {
svg.classList.add(className);
}
const rect = svg.getBoundingClientRect();
if (!output[className]) output[className] = [];
output[className].push({
width: rect.width,
height: rect.height,
});
}
return output;
}
// In general we should use currentSrc because that's the image that's currently displayed,
// especially within <picture> tags or with srcset. In these cases there can be multiple
// sources and the best one is probably the one being displayed, thus currentSrc.
function imageSrc(image: HTMLImageElement) {
if (image.currentSrc) return image.currentSrc;
return image.src;
}
// Given a document, return a <style> tag that contains all the styles
// required to render the page. Not currently used but could be as an
// option to clip pages as HTML.
// eslint-disable-next-line
export function getStyleSheets(doc: Document) {
const output = [];
for (let i = 0; i < doc.styleSheets.length; i++) {
const sheet = doc.styleSheets[i];
try {
for (const cssRule of sheet.cssRules) {
output.push({ type: 'text', value: cssRule.cssText });
}
} catch (error) {
// Calling sheet.cssRules will throw a CORS error on Chrome if the stylesheet is on a different domain.
// In that case, we skip it and add it to the list of stylesheet URLs. These URls will be downloaded
// by the desktop application, since it doesn't have CORS restrictions.
// eslint-disable-next-line
console.info('Could not retrieve stylesheet now:', sheet.href);
// eslint-disable-next-line
console.info('It will downloaded by the main application.');
// eslint-disable-next-line
console.info(error);
output.push({ type: 'url', value: sheet.href });
}
}
return output;
}