joplin/packages/lib/urlUtils.js

const { rtrimSlashes } = require('./path-utils');
const { urlDecode } = require('./string-utils');

const urlUtils = {};

urlUtils.hash = function(url) {
	const s = url.split('#');
	if (s.length <= 1) return '';
	return s[s.length - 1];
};

urlUtils.urlWithoutPath = function(url) {
	const parsed = require('url').parse(url, true);
	return `${parsed.protocol}//${parsed.host}`;
};

urlUtils.urlProtocol = function(url) {
	if (!url) return '';
	const parsed = require('url').parse(url, true);
	return parsed.protocol;
};

urlUtils.prependBaseUrl = function(url, baseUrl) {
	baseUrl = rtrimSlashes(baseUrl).trim(); // All the code below assumes that the baseUrl does not end up with a slash
	url = url.trim();

	if (!url) url = '';
	if (!baseUrl) return url;
	if (url.indexOf('#') === 0) return url; // Don't prepend if it's a local anchor
	if (urlUtils.urlProtocol(url)) return url; // Don't prepend the base URL if the URL already has a scheme

	if (url.length >= 2 && url.indexOf('//') === 0) {
		// If it starts with // it's a protcol-relative URL
		return urlUtils.urlProtocol(baseUrl) + url;
	} else if (url && url[0] === '/') {
		// If it starts with a slash, it's an absolute URL so it should be relative to the domain (and not to the full baseUrl)
		return urlUtils.urlWithoutPath(baseUrl) + url;
	} else {
		return baseUrl + (url ? `/${url}` : '');
	}
};

const resourceRegex = /^(joplin:\/\/|:\/)([0-9a-zA-Z]{32})(|#[^\s]*)(|\s".*?")$/;

urlUtils.isResourceUrl = function(url) {
	return !!url.match(resourceRegex);
};

urlUtils.parseResourceUrl = function(url) {
	if (!urlUtils.isResourceUrl(url)) return null;

	const match = url.match(resourceRegex);

	const itemId = match[2];
	let hash = match[3].trim();

	// In general we want the hash to be decoded so that non-alphabetical languages
	// appear as-is without being encoded with %.
	// Fixes https://github.com/laurent22/joplin/issues/1870
	if (hash) hash = urlDecode(hash.substr(1)); // Remove the first #

	return {
		itemId: itemId,
		hash: hash,
	};
};

urlUtils.extractResourceUrls = function(text) {
	const markdownLinksRE = /\]\((.*?)\)/g;
	const output = [];
	let result = null;

	while ((result = markdownLinksRE.exec(text)) !== null) {
		const resourceUrlInfo = urlUtils.parseResourceUrl(result[1]);
		if (resourceUrlInfo) output.push(resourceUrlInfo);
	}

	const htmlRegexes = [
		/<img[\s\S]*?src=["']:\/([a-zA-Z0-9]{32})["'][\s\S]*?>/gi,
		/<a[\s\S]*?href=["']:\/([a-zA-Z0-9]{32})["'][\s\S]*?>/gi,
	];

	for (const htmlRegex of htmlRegexes) {
		while (true) {
			const m = htmlRegex.exec(text);
			if (!m) break;
			output.push({ itemId: m[1], hash: '' });
		}
	}

	return output;
};

urlUtils.objectToQueryString = function(query) {
	if (!query) return '';

	let queryString = '';
	const s = [];
	for (const k in query) {
		if (!query.hasOwnProperty(k)) continue;
		s.push(`${encodeURIComponent(k)}=${encodeURIComponent(query[k])}`);
	}
	queryString = s.join('&');

	return queryString;
};

module.exports = urlUtils;
All: Use Lerna to manage monorepo 2020-11-05 18:58:23 +02:00			`const { rtrimSlashes } = require('./path-utils');`
			`const { urlDecode } = require('./string-utils');`
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00
Prompt dialog and popup menu 2017-11-08 19:51:55 +02:00			`const urlUtils = {};`

			`urlUtils.hash = function(url) {`
			`const s = url.split('#');`
			`if (s.length <= 1) return '';`
			`return s[s.length - 1];`
First pass at linting lib dir 2019-07-29 15:43:53 +02:00			`};`
Prompt dialog and popup menu 2017-11-08 19:51:55 +02:00
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00			`urlUtils.urlWithoutPath = function(url) {`
			`const parsed = require('url').parse(url, true);`
Chore: Apply eslint rules 2019-09-19 23:51:18 +02:00			return `${parsed.protocol}//${parsed.host}`;
First pass at linting lib dir 2019-07-29 15:43:53 +02:00			`};`
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00
			`urlUtils.urlProtocol = function(url) {`
Clipper: Fixes #1462: Allow importing images from local file with file:// URLs 2019-05-10 02:06:06 +02:00			`if (!url) return '';`
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00			`const parsed = require('url').parse(url, true);`
			`return parsed.protocol;`
First pass at linting lib dir 2019-07-29 15:43:53 +02:00			`};`
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00
			`urlUtils.prependBaseUrl = function(url, baseUrl) {`
			`baseUrl = rtrimSlashes(baseUrl).trim(); // All the code below assumes that the baseUrl does not end up with a slash`
			`url = url.trim();`

			`if (!url) url = '';`
			`if (!baseUrl) return url;`
Clipper: Fixes #1622: Import named anchors from clipped pages 2019-06-13 01:26:09 +02:00			`if (url.indexOf('#') === 0) return url; // Don't prepend if it's a local anchor`
Clipper: Set source URL and fixed issues with tables and urls 2018-05-24 13:44:13 +02:00			`if (urlUtils.urlProtocol(url)) return url; // Don't prepend the base URL if the URL already has a scheme`
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00
First pass at linting lib dir 2019-07-29 15:43:53 +02:00			`if (url.length >= 2 && url.indexOf('//') === 0) {`
			`// If it starts with // it's a protcol-relative URL`
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00			`return urlUtils.urlProtocol(baseUrl) + url;`
First pass at linting lib dir 2019-07-29 15:43:53 +02:00			`} else if (url && url[0] === '/') {`
			`// If it starts with a slash, it's an absolute URL so it should be relative to the domain (and not to the full baseUrl)`
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00			`return urlUtils.urlWithoutPath(baseUrl) + url;`
			`} else {`
Chore: Apply eslint rules 2019-09-19 23:51:18 +02:00			return baseUrl + (url ? `/${url}` : '');
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00			`}`
First pass at linting lib dir 2019-07-29 15:43:53 +02:00			`};`
Clipper: Download images and convert them to resources 2018-05-23 13:14:38 +02:00
Desktop, CLI: Fixed import of notes that contain links with hashes 2019-09-12 23:48:10 +02:00			`const resourceRegex = /^(joplin:\/\/\|:\/)([0-9a-zA-Z]{32})(\|#[^\s])(\|\s".?")$/;`

All: Fixed link issue following last update 2019-09-10 10:25:58 +02:00			`urlUtils.isResourceUrl = function(url) {`
Desktop, CLI: Fixed import of notes that contain links with hashes 2019-09-12 23:48:10 +02:00			`return !!url.match(resourceRegex);`
All: Fixed link issue following last update 2019-09-10 10:25:58 +02:00			`};`

Desktop: Resolves #1490: Add support for anchor hashes in note links 2019-09-09 19:16:00 +02:00			`urlUtils.parseResourceUrl = function(url) {`
All: Fixed link issue following last update 2019-09-10 10:25:58 +02:00			`if (!urlUtils.isResourceUrl(url)) return null;`

Desktop, CLI: Fixed import of notes that contain links with hashes 2019-09-12 23:48:10 +02:00			`const match = url.match(resourceRegex);`
Desktop: Resolves #1490: Add support for anchor hashes in note links 2019-09-09 19:16:00 +02:00
Desktop, CLI: Fixed import of notes that contain links with hashes 2019-09-12 23:48:10 +02:00			`const itemId = match[2];`
			`let hash = match[3].trim();`
Desktop, Mobile: Fixes #1870: Support non-alphabetical characters in note link anchors 2019-09-12 22:57:23 +02:00
			`// In general we want the hash to be decoded so that non-alphabetical languages`
			`// appear as-is without being encoded with %.`
			`// Fixes https://github.com/laurent22/joplin/issues/1870`
Desktop, CLI: Fixed import of notes that contain links with hashes 2019-09-12 23:48:10 +02:00			`if (hash) hash = urlDecode(hash.substr(1)); // Remove the first #`

			`return {`
			`itemId: itemId,`
			`hash: hash,`
			`};`
			`};`

			`urlUtils.extractResourceUrls = function(text) {`
			`const markdownLinksRE = /\]\((.*?)\)/g;`
			`const output = [];`
			`let result = null;`

			`while ((result = markdownLinksRE.exec(text)) !== null) {`
			`const resourceUrlInfo = urlUtils.parseResourceUrl(result[1]);`
			`if (resourceUrlInfo) output.push(resourceUrlInfo);`
			`}`

			`const htmlRegexes = [`
			`/<img[\s\S]?src=["']:\/([a-zA-Z0-9]{32})["'][\s\S]?>/gi,`
			`/<a[\s\S]?href=["']:\/([a-zA-Z0-9]{32})["'][\s\S]?>/gi,`
			`];`

			`for (const htmlRegex of htmlRegexes) {`
			`while (true) {`
			`const m = htmlRegex.exec(text);`
			`if (!m) break;`
			`output.push({ itemId: m[1], hash: '' });`
			`}`
			`}`
Desktop: Resolves #1490: Add support for anchor hashes in note links 2019-09-09 19:16:00 +02:00
			`return output;`
			`};`

All: Fixed OneDrive authentication 2020-10-28 17:50:34 +02:00			`urlUtils.objectToQueryString = function(query) {`
			`if (!query) return '';`

			`let queryString = '';`
			`const s = [];`
			`for (const k in query) {`
			`if (!query.hasOwnProperty(k)) continue;`
			s.push(`${encodeURIComponent(k)}=${encodeURIComponent(query[k])}`);
			`}`
			`queryString = s.join('&');`

			`return queryString;`
			`};`

First pass at linting lib dir 2019-07-29 15:43:53 +02:00			`module.exports = urlUtils;`