joplin/ReactNativeClient/lib/services/SearchEngine.js

const { Logger } = require('lib/logger.js');
const { shim } = require('lib/shim.js');
const ItemChange = require('lib/models/ItemChange.js');
const Setting = require('lib/models/Setting.js');
const Note = require('lib/models/Note.js');
const BaseModel = require('lib/BaseModel.js');
const { pregQuote } = require('lib/string-utils.js');

class SearchEngine {

	constructor() {
		this.dispatch = (action) => {};
		this.logger_ = new Logger();
		this.db_ = null;
	}

	static instance() {
		if (this.instance_) return this.instance_;
		this.instance_ = new SearchEngine();
		return this.instance_;
	}

	setLogger(logger) {
		this.logger_ = logger;
	}

	logger() {
		return this.logger_;
	}

	setDb(db) {
		this.db_ = db;
	}

	db() {
		return this.db_;
	}

	async countRows() {
		const sql = 'SELECT count(*) as total FROM notes_fts'
		const row = await this.db().selectOne(sql);
		return row && row['total'] ? row['total'] : 0;
	}

	columnIndexesFromOffsets_(offsets) {
		const occurenceCount = Math.floor(offsets.length / 4);
		const indexes = [];

		for (let i = 0; i < occurenceCount; i++) {
			const colIndex = offsets[i * 4] - 1;
			if (indexes.indexOf(colIndex) < 0) indexes.push(colIndex);
		}

		return indexes;
	}

	calculateWeight_(offsets, termCount) {
		// Offset doc: https://www.sqlite.org/fts3.html#offsets

		// - If there's only one term in the query string, the content with the most matches goes on top
		// - If there are multiple terms, the result with the most occurences that are closest to each others go on top.
		//   eg. if query is "abcd efgh", "abcd efgh" will go before "abcd XX efgh".
		
		const occurenceCount = Math.floor(offsets.length / 4);

		if (termCount === 1) return occurenceCount;

		let spread = 0;
		let previousDist = null;
		for (let i = 0; i < occurenceCount; i++) {
			const dist = offsets[i * 4 + 2];

			if (previousDist !== null) {
				const delta = dist - previousDist;
				spread += delta;
			}

			previousDist = dist;
		}

		// Divide the number of occurences by the spread so even if a note has many times the searched terms
		// but these terms are very spread appart, they'll be given a lower weight than a note that has the
		// terms once or twice but just next to each others.
		return occurenceCount / spread;
	}

	orderResults_(rows, parsedQuery) {
		for (let i = 0; i < rows.length; i++) {
			const row = rows[i];
			const offsets = row.offsets.split(' ').map(o => Number(o));
			row.weight = this.calculateWeight_(offsets, parsedQuery.termCount);
			// row.colIndexes = this.columnIndexesFromOffsets_(offsets);
			// row.offsets = offsets;
		}

		rows.sort((a, b) => {
			if (a.weight < b.weight) return +1;
			if (a.weight > b.weight) return -1;
			return 0;
		});
	}

	// https://stackoverflow.com/a/13818704/561309
	queryTermToRegex(term) {
		while (term.length && term.indexOf('*') === 0) {
			term = term.substr(1);
		}

		let regexString = pregQuote(term);
		if (regexString[regexString.length - 1] === '*') {
			regexString = regexString.substr(0, regexString.length - 2) + '[^' + pregQuote(' \t\n\r,.,+-*?!={}<>|:"\'()[]') + ']' + '*';
		}

		return regexString;
	}

	parseQuery(query) {
		const terms = {_:[]};
		
		let inQuote = false;
		let currentCol = '_';
		let currentTerm = '';
		for (let i = 0; i < query.length; i++) {
			const c = query[i];

			if (c === '"') {
				if (inQuote) {
					terms[currentCol].push(currentTerm);
					currentTerm = '';
					inQuote = false;
				} else {
					inQuote = true;
				}
				continue;
			}

			if (c === ' ' && !inQuote) {
				if (!currentTerm) continue;
				terms[currentCol].push(currentTerm);
				currentCol = '_';
				currentTerm = '';
				continue;
			}

			if (c === ':' && !inQuote) {
				currentCol = currentTerm;
				terms[currentCol] = [];
				currentTerm = '';
				continue;
			}

			currentTerm += c;
		}

		if (currentTerm) terms[currentCol].push(currentTerm);

		// Filter terms:
		// - Convert wildcards to regex
		// - Remove columns with no results
		// - Add count of terms

		let termCount = 0;
		const keys = [];
		for (let col in terms) {
			if (!terms.hasOwnProperty(col)) continue;

			if (!terms[col].length) {
				delete terms[col];
				continue;
			}

			for (let i = terms[col].length - 1; i >= 0; i--) {
				const term = terms[col][i];

				// SQlLite FTS doesn't allow "*" queries and neither shall we
				if (term === '*') {
					terms[col].splice(i, 1);
					continue;
				}

				if (term.indexOf('*') >= 0) {
					terms[col][i] = { type: 'regex', value: this.queryTermToRegex(term) };
				}
			}

			termCount += terms[col].length;

			keys.push(col);
		}

		return {
			termCount: termCount,
			keys: keys,
			terms: terms,
		};
	}

	allParsedQueryTerms(parsedQuery) {
		if (!parsedQuery || !parsedQuery.termCount) return [];

		let output = [];
		for (let col in parsedQuery.terms) {
			if (!parsedQuery.terms.hasOwnProperty(col)) continue;
			output = output.concat(parsedQuery.terms[col]);
		}
		return output;
	}

	async search(query) {
		const parsedQuery = this.parseQuery(query);
		const sql = 'SELECT id, title, offsets(notes_fts) AS offsets FROM notes_fts WHERE notes_fts MATCH ?'
		const rows = await this.db().selectAll(sql, [query]);
		this.orderResults_(rows, parsedQuery);
		return rows;
	}
	
}

module.exports = SearchEngine;
Started support for FTS search 2018-12-09 22:45:50 +02:00			`const { Logger } = require('lib/logger.js');`
Started integrating search engine to desktop app 2018-12-10 20:58:49 +02:00			`const { shim } = require('lib/shim.js');`
Nearly finished search engine backend 2018-12-10 20:54:46 +02:00			`const ItemChange = require('lib/models/ItemChange.js');`
			`const Setting = require('lib/models/Setting.js');`
			`const Note = require('lib/models/Note.js');`
			`const BaseModel = require('lib/BaseModel.js');`
Mobile: Fixes #382: Implemented new search engine for mobile and highlight searched words in notes 2018-12-16 19:32:42 +02:00			`const { pregQuote } = require('lib/string-utils.js');`
Started support for FTS search 2018-12-09 22:45:50 +02:00
			`class SearchEngine {`

			`constructor() {`
			`this.dispatch = (action) => {};`
			`this.logger_ = new Logger();`
			`this.db_ = null;`
			`}`
Nearly finished search engine backend 2018-12-10 20:54:46 +02:00
Started support for FTS search 2018-12-09 22:45:50 +02:00			`static instance() {`
			`if (this.instance_) return this.instance_;`
			`this.instance_ = new SearchEngine();`
			`return this.instance_;`
			`}`

			`setLogger(logger) {`
			`this.logger_ = logger;`
			`}`

			`logger() {`
			`return this.logger_;`
			`}`

			`setDb(db) {`
			`this.db_ = db;`
			`}`

			`db() {`
			`return this.db_;`
			`}`

Nearly finished search engine backend 2018-12-10 20:54:46 +02:00			`async countRows() {`
			`const sql = 'SELECT count(*) as total FROM notes_fts'`
			`const row = await this.db().selectOne(sql);`
			`return row && row['total'] ? row['total'] : 0;`
			`}`

			`columnIndexesFromOffsets_(offsets) {`
			`const occurenceCount = Math.floor(offsets.length / 4);`
			`const indexes = [];`

			`for (let i = 0; i < occurenceCount; i++) {`
			`const colIndex = offsets[i * 4] - 1;`
			`if (indexes.indexOf(colIndex) < 0) indexes.push(colIndex);`
			`}`

			`return indexes;`
			`}`

Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`calculateWeight_(offsets, termCount) {`
Nearly finished search engine backend 2018-12-10 20:54:46 +02:00			`// Offset doc: https://www.sqlite.org/fts3.html#offsets`
Started integrating search engine to desktop app 2018-12-10 20:58:49 +02:00
Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`// - If there's only one term in the query string, the content with the most matches goes on top`
			`// - If there are multiple terms, the result with the most occurences that are closest to each others go on top.`
			`// eg. if query is "abcd efgh", "abcd efgh" will go before "abcd XX efgh".`
Nearly finished search engine backend 2018-12-10 20:54:46 +02:00
			`const occurenceCount = Math.floor(offsets.length / 4);`

Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`if (termCount === 1) return occurenceCount;`

Nearly finished search engine backend 2018-12-10 20:54:46 +02:00			`let spread = 0;`
			`let previousDist = null;`
			`for (let i = 0; i < occurenceCount; i++) {`
			`const dist = offsets[i * 4 + 2];`

			`if (previousDist !== null) {`
			`const delta = dist - previousDist;`
			`spread += delta;`
			`}`

			`previousDist = dist;`
			`}`

			`// Divide the number of occurences by the spread so even if a note has many times the searched terms`
			`// but these terms are very spread appart, they'll be given a lower weight than a note that has the`
			`// terms once or twice but just next to each others.`
			`return occurenceCount / spread;`
			`}`

Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`orderResults_(rows, parsedQuery) {`
Nearly finished search engine backend 2018-12-10 20:54:46 +02:00			`for (let i = 0; i < rows.length; i++) {`
			`const row = rows[i];`
			`const offsets = row.offsets.split(' ').map(o => Number(o));`
Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`row.weight = this.calculateWeight_(offsets, parsedQuery.termCount);`
Nearly finished search engine backend 2018-12-10 20:54:46 +02:00			`// row.colIndexes = this.columnIndexesFromOffsets_(offsets);`
			`// row.offsets = offsets;`
			`}`

			`rows.sort((a, b) => {`
			`if (a.weight < b.weight) return +1;`
			`if (a.weight > b.weight) return -1;`
			`return 0;`
			`});`
			`}`

Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`// https://stackoverflow.com/a/13818704/561309`
			`queryTermToRegex(term) {`
Finished search engine integration with desktop app 2018-12-14 00:57:14 +02:00			`while (term.length && term.indexOf('*') === 0) {`
			`term = term.substr(1);`
			`}`

Mobile: Fixes #382: Implemented new search engine for mobile and highlight searched words in notes 2018-12-16 19:32:42 +02:00			`let regexString = pregQuote(term);`
Finished search engine integration with desktop app 2018-12-14 00:57:14 +02:00			`if (regexString[regexString.length - 1] === '*') {`
Mobile: Fixes #382: Implemented new search engine for mobile and highlight searched words in notes 2018-12-16 19:32:42 +02:00			`regexString = regexString.substr(0, regexString.length - 2) + '[^' + pregQuote(' \t\n\r,.,+-?!={}<>\|:"\'()[]') + ']' + '';`
Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`}`
Finished search engine integration with desktop app 2018-12-14 00:57:14 +02:00
			`return regexString;`
Nearly finished search engine backend 2018-12-10 20:54:46 +02:00			`}`

Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`parseQuery(query) {`
			`const terms = {_:[]};`

			`let inQuote = false;`
			`let currentCol = '_';`
			`let currentTerm = '';`
			`for (let i = 0; i < query.length; i++) {`
			`const c = query[i];`

			`if (c === '"') {`
			`if (inQuote) {`
			`terms[currentCol].push(currentTerm);`
			`currentTerm = '';`
			`inQuote = false;`
			`} else {`
			`inQuote = true;`
			`}`
			`continue;`
			`}`
Started integrating search engine to desktop app 2018-12-10 20:58:49 +02:00
Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`if (c === ' ' && !inQuote) {`
			`if (!currentTerm) continue;`
			`terms[currentCol].push(currentTerm);`
			`currentCol = '_';`
			`currentTerm = '';`
			`continue;`
			`}`
Started integrating search engine to desktop app 2018-12-10 20:58:49 +02:00
Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`if (c === ':' && !inQuote) {`
			`currentCol = currentTerm;`
			`terms[currentCol] = [];`
			`currentTerm = '';`
			`continue;`
			`}`

			`currentTerm += c;`
			`}`

			`if (currentTerm) terms[currentCol].push(currentTerm);`

			`// Filter terms:`
			`// - Convert wildcards to regex`
			`// - Remove columns with no results`
			`// - Add count of terms`

			`let termCount = 0;`
			`const keys = [];`
			`for (let col in terms) {`
			`if (!terms.hasOwnProperty(col)) continue;`

			`if (!terms[col].length) {`
			`delete terms[col];`
			`continue;`
			`}`

			`for (let i = terms[col].length - 1; i >= 0; i--) {`
			`const term = terms[col][i];`

			`// SQlLite FTS doesn't allow "*" queries and neither shall we`
			`if (term === '*') {`
			`terms[col].splice(i, 1);`
			`continue;`
			`}`

			`if (term.indexOf('*') >= 0) {`
Finished search engine integration with desktop app 2018-12-14 00:57:14 +02:00			`terms[col][i] = { type: 'regex', value: this.queryTermToRegex(term) };`
Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`}`
			`}`

			`termCount += terms[col].length;`

			`keys.push(col);`
			`}`

			`return {`
			`termCount: termCount,`
			`keys: keys,`
			`terms: terms,`
			`};`
Started integrating search engine to desktop app 2018-12-10 20:58:49 +02:00			`}`

Finished search engine integration with desktop app 2018-12-14 00:57:14 +02:00			`allParsedQueryTerms(parsedQuery) {`
			`if (!parsedQuery \|\| !parsedQuery.termCount) return [];`

			`let output = [];`
			`for (let col in parsedQuery.terms) {`
			`if (!parsedQuery.terms.hasOwnProperty(col)) continue;`
			`output = output.concat(parsedQuery.terms[col]);`
			`}`
			`return output;`
			`}`

Fixed logic to update search engine data 2018-12-12 23:40:05 +02:00			`async search(query) {`
			`const parsedQuery = this.parseQuery(query);`
			`const sql = 'SELECT id, title, offsets(notes_fts) AS offsets FROM notes_fts WHERE notes_fts MATCH ?'`
			`const rows = await this.db().selectAll(sql, [query]);`
			`this.orderResults_(rows, parsedQuery);`
			`return rows;`
			`}`

Started support for FTS search 2018-12-09 22:45:50 +02:00			`}`

			`module.exports = SearchEngine;`