1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-11-27 08:21:03 +02:00

Merge branch 'full_text_search'

This commit is contained in:
Laurent Cozic 2018-12-14 19:56:21 +01:00
commit 38c42b7a15
14 changed files with 576 additions and 63 deletions

View File

@ -8,6 +8,7 @@ const Note = require('lib/models/Note.js');
const Tag = require('lib/models/Tag.js');
const NoteTag = require('lib/models/NoteTag.js');
const Resource = require('lib/models/Resource.js');
const ItemChange = require('lib/models/ItemChange.js');
const NoteResource = require('lib/models/NoteResource.js');
const ResourceService = require('lib/services/ResourceService.js');
const fs = require('fs-extra');
@ -124,4 +125,25 @@ describe('services_ResourceService', function() {
expect(!!(await Resource.load(resource1.id))).toBe(true);
}));
it('should not process twice the same change', asyncTest(async () => {
const service = new ResourceService();
let folder1 = await Folder.save({ title: "folder1" });
let note1 = await Note.save({ title: 'ma note', parent_id: folder1.id });
note1 = await shim.attachFileToNote(note1, __dirname + '/../tests/support/photo.jpg');
let resource1 = (await Resource.all())[0];
await service.indexNoteResources();
const before = (await NoteResource.all())[0];
await time.sleep(0.1);
await service.indexNoteResources();
const after = (await NoteResource.all())[0];
expect(before.last_seen_time).toBe(after.last_seen_time);
}));
});

View File

@ -0,0 +1,188 @@
require('app-module-path').addPath(__dirname);
const { time } = require('lib/time-utils.js');
const { fileContentEqual, setupDatabase, setupDatabaseAndSynchronizer, db, synchronizer, fileApi, sleep, clearDatabase, switchClient, syncTargetId, objectsEqual, checkThrowAsync } = require('test-utils.js');
const SearchEngine = require('lib/services/SearchEngine');
const Note = require('lib/models/Note');
process.on('unhandledRejection', (reason, p) => {
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
});
let engine = null;
describe('services_SearchEngine', function() {
beforeEach(async (done) => {
await setupDatabaseAndSynchronizer(1);
await switchClient(1);
engine = new SearchEngine();
engine.setDb(db());
done();
});
it('should keep the content and FTS table in sync', async (done) => {
let rows, n1, n2, n3;
n1 = await Note.save({ title: "a" });
n2 = await Note.save({ title: "b" });
rows = await engine.search('a');
expect(rows.length).toBe(1);
expect(rows[0].title).toBe('a');
await Note.delete(n1.id);
rows = await engine.search('a');
expect(rows.length).toBe(0);
rows = await engine.search('b');
expect(rows[0].title).toBe('b');
await Note.save({ id: n2.id, title: 'c' });
rows = await engine.search('b');
expect(rows.length).toBe(0);
rows = await engine.search('c');
expect(rows[0].title).toBe('c');
await Note.save({ id: n2.id, encryption_applied: 1 });
rows = await engine.search('c');
expect(rows.length).toBe(0);
await Note.save({ id: n2.id, encryption_applied: 0 });
rows = await engine.search('c');
expect(rows.length).toBe(1);
done();
});
it('should order search results by relevance (1)', async (done) => {
const n1 = await Note.save({ title: "abcd efgh" }); // 3
const n2 = await Note.save({ title: "abcd aaaaa abcd abcd" }); // 1
const n3 = await Note.save({ title: "abcd aaaaa bbbb eeee abcd" }); // 2
const rows = await engine.search('abcd');
expect(rows[0].id).toBe(n2.id);
expect(rows[1].id).toBe(n3.id);
expect(rows[2].id).toBe(n1.id);
done();
});
it('should order search results by relevance (2)', async (done) => {
// 1
const n1 = await Note.save({ title: "abcd efgh", body: "XX abcd XX efgh" });
// 4
const n2 = await Note.save({ title: "abcd aaaaa bbbb eeee efgh" });
// 3
const n3 = await Note.save({ title: "abcd aaaaa efgh" });
// 2
const n4 = await Note.save({ title: "blablablabla blabla bla abcd X efgh" });
// 5
const n5 = await Note.save({ title: "occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh" });
const rows = await engine.search('abcd efgh');
expect(rows[0].id).toBe(n1.id);
expect(rows[1].id).toBe(n4.id);
expect(rows[2].id).toBe(n3.id);
expect(rows[3].id).toBe(n2.id);
expect(rows[4].id).toBe(n5.id);
done();
});
it('should supports various query types', async (done) => {
let rows;
const n1 = await Note.save({ title: "abcd efgh ijkl", body: "aaaa bbbb" });
const n2 = await Note.save({ title: "iiii efgh bbbb", body: "aaaa bbbb" });
rows = await engine.search('abcd ijkl');
expect(rows.length).toBe(1);
rows = await engine.search('"abcd ijkl"');
expect(rows.length).toBe(0);
rows = await engine.search('"abcd efgh"');
expect(rows.length).toBe(1);
rows = await engine.search('title:abcd');
expect(rows.length).toBe(1);
rows = await engine.search('title:efgh');
expect(rows.length).toBe(2);
rows = await engine.search('body:abcd');
expect(rows.length).toBe(0);
rows = await engine.search('body:bbbb');
expect(rows.length).toBe(2);
rows = await engine.search('body:bbbb iiii');
expect(rows.length).toBe(1);
done();
});
it('should parse normal query strings', async (done) => {
let rows;
const testCases = [
['abcd efgh', { _: ['abcd', 'efgh'] }],
['abcd efgh', { _: ['abcd', 'efgh'] }],
['title:abcd efgh', { _: ['efgh'], title: ['abcd'] }],
['title:abcd', { title: ['abcd'] }],
['"abcd efgh"', { _: ['abcd efgh'] }],
];
for (let i = 0; i < testCases.length; i++) {
const t = testCases[i];
const input = t[0];
const expected = t[1];
const actual = engine.parseQuery(input);
expect(JSON.stringify(actual.terms._)).toBe(JSON.stringify(expected._));
expect(JSON.stringify(actual.terms.title)).toBe(JSON.stringify(expected.title));
expect(JSON.stringify(actual.terms.body)).toBe(JSON.stringify(expected.body));
}
done();
});
it('should parse query strings with wildcards', async (done) => {
let rows;
const testCases = [
['do*', ['do', 'dog', 'domino'], [] ],
// "*" is a wildcard only when used at the end (to searhc for documents with the specified prefix)
// If it's at the beginning, it's ignored, if it's in the middle, it's treated as a litteral "*".
['*an*', ['an', 'anneau'], ['piano', 'plan'] ],
['no*no', ['no*no'], ['nonono'] ],
];
for (let i = 0; i < testCases.length; i++) {
const t = testCases[i];
const input = t[0];
const shouldMatch = t[1];
const shouldNotMatch = t[2];
const regex = new RegExp(engine.parseQuery(input).terms._[0].value, 'gmi');
for (let j = 0; j < shouldMatch.length; j++) {
const r = shouldMatch[j].match(regex);
expect(!!r).toBe(true, '"' + input + '" should match "' + shouldMatch[j] + '"');
}
// for (let j = 0; j < shouldNotMatch.length; j++) {
// const r = shouldNotMatch[j].match(regex);
// // console.info(input, shouldNotMatch)
// expect(!!r).toBe(false, '"' + input + '" should not match "' + shouldNotMatch[j] + '"');
// }
}
expect(engine.parseQuery('*').termCount).toBe(0);
done();
});
});

View File

@ -4,6 +4,7 @@ const { DatabaseDriverNode } = require('lib/database-driver-node.js');
const BaseModel = require('lib/BaseModel.js');
const Folder = require('lib/models/Folder.js');
const Note = require('lib/models/Note.js');
const ItemChange = require('lib/models/ItemChange.js');
const Resource = require('lib/models/Resource.js');
const Tag = require('lib/models/Tag.js');
const NoteTag = require('lib/models/NoteTag.js');
@ -122,6 +123,8 @@ async function switchClient(id) {
async function clearDatabase(id = null) {
if (id === null) id = currentClient_;
await ItemChange.waitForAllSaved();
let queries = [
'DELETE FROM notes',
'DELETE FROM folders',

View File

@ -23,6 +23,7 @@ const DecryptionWorker = require('lib/services/DecryptionWorker');
const InteropService = require('lib/services/InteropService');
const InteropServiceHelper = require('./InteropServiceHelper.js');
const ResourceService = require('lib/services/ResourceService');
const SearchEngine = require('lib/services/SearchEngine');
const ClipperServer = require('lib/ClipperServer');
const ExternalEditWatcher = require('lib/services/ExternalEditWatcher');
const { bridge } = require('electron').remote.require('./bridge');
@ -793,6 +794,9 @@ class Application extends BaseApplication {
ResourceService.runInBackground();
SearchEngine.instance().setDb(reg.db());
SearchEngine.instance().setLogger(reg.logger());
if (Setting.value('env') === 'dev') {
AlarmService.updateAllNotifications();
} else {

View File

@ -13,6 +13,7 @@ const InteropService = require('lib/services/InteropService');
const InteropServiceHelper = require('../InteropServiceHelper.js');
const Search = require('lib/models/Search');
const Mark = require('mark.js/dist/mark.min.js');
const SearchEngine = require('lib/services/SearchEngine');
class NoteListComponent extends React.Component {
@ -234,8 +235,11 @@ class NoteListComponent extends React.Component {
let highlightedWords = [];
if (this.props.notesParentType === 'Search') {
const search = BaseModel.byId(this.props.searches, this.props.selectedSearchId);
highlightedWords = search ? Search.keywords(search.query_pattern) : [];
const query = BaseModel.byId(this.props.searches, this.props.selectedSearchId);
if (query) {
const parsedQuery = SearchEngine.instance().parseQuery(query.query_pattern);
highlightedWords = SearchEngine.instance().allParsedQueryTerms(parsedQuery);
}
}
let style = Object.assign({ width: width }, this.style().listItem);
@ -266,7 +270,18 @@ class NoteListComponent extends React.Component {
exclude: ['img'],
acrossElements: true,
});
mark.mark(highlightedWords);
mark.unmark();
for (let i = 0; i < highlightedWords.length; i++) {
const w = highlightedWords[i];
if (w.type === 'regex') {
mark.markRegExp(new RegExp(w.value, 'gmi'), { acrossElements: true });
} else {
mark.mark([w]);
}
}
// Note: in this case it is safe to use dangerouslySetInnerHTML because titleElement
// is a span tag that we created and that contains data that's been inserted as plain text

View File

@ -34,6 +34,7 @@ const ExternalEditWatcher = require('lib/services/ExternalEditWatcher');
const ResourceFetcher = require('lib/services/ResourceFetcher');
const { toSystemSlashes, safeFilename } = require('lib/path-utils');
const { clipboard } = require('electron');
const SearchEngine = require('lib/services/SearchEngine');
require('brace/mode/markdown');
// https://ace.c9.io/build/kitchen-sink.html
@ -84,7 +85,7 @@ class NoteTextComponent extends React.Component {
this.scheduleSaveTimeout_ = null;
this.restoreScrollTop_ = null;
this.lastSetHtml_ = '';
this.lastSetMarkers_ = [];
this.lastSetMarkers_ = '';
this.lastSetMarkersOptions_ = {};
this.selectionRange_ = null;
this.noteSearchBar_ = React.createRef();
@ -508,7 +509,7 @@ class NoteTextComponent extends React.Component {
}
this.lastSetHtml_ = '';
this.lastSetMarkers_ = [];
this.lastSetMarkers_ = '';
this.lastSetMarkersOptions_ = {};
this.setState(newState);
@ -1558,11 +1559,15 @@ class NoteTextComponent extends React.Component {
markerOptions.selectedIndex = this.state.localSearch.selectedIndex;
} else {
const search = BaseModel.byId(this.props.searches, this.props.selectedSearchId);
if (search) keywords = Search.keywords(search.query_pattern);
if (search) {
const parsedQuery = SearchEngine.instance().parseQuery(search.query_pattern);
keywords = SearchEngine.instance().allParsedQueryTerms(parsedQuery);
}
}
if (htmlHasChanged || !ArrayUtils.contentEquals(this.lastSetMarkers_, keywords) || !ObjectUtils.fieldsEqual(this.lastSetMarkersOptions_, markerOptions)) {
this.lastSetMarkers_ = keywords.slice();
const keywordHash = JSON.stringify(keywords);
if (htmlHasChanged || keywordHash !== this.lastSetMarkers_ || !ObjectUtils.fieldsEqual(this.lastSetMarkersOptions_, markerOptions)) {
this.lastSetMarkers_ = keywordHash;
this.lastSetMarkersOptions_ = Object.assign({}, markerOptions);
this.webview_.send('setMarkers', keywords, markerOptions);
}

View File

@ -36,6 +36,7 @@
<body id="body">
<div id="hlScriptContainer"></div>
<div id="markScriptContainer"></div>
<!-- START_OF_DOCUMENT -->
<div id="content" ondragstart="return false;" ondrop="return false;"></div>
<script>
@ -143,6 +144,8 @@
ipc.setHtml = (event) => {
const html = event.html;
markJsHackMarkerInserted_ = false;
updateBodyHeight();
contentElement.innerHTML = html;
@ -196,6 +199,29 @@
setPercentScroll(percent);
}
// HACK for Mark.js bug - https://github.com/julmot/mark.js/issues/127
let markJsHackMarkerInserted_ = false;
function addMarkJsSpaceHack(document) {
if (markJsHackMarkerInserted_) return;
const prepareElementsForMarkJs = (elements, type) => {
// const markJsHackMarker_ = '&#8203; &#8203;'
const markJsHackMarker_ = ' ';
for (let i = 0; i < elements.length; i++) {
if (!type) {
elements[i].innerHTML = elements[i].innerHTML + markJsHackMarker_;
} else if (type === 'insertBefore') {
elements[i].insertAdjacentHTML('beforeBegin', markJsHackMarker_);
}
}
}
prepareElementsForMarkJs(document.getElementsByTagName('p'));
prepareElementsForMarkJs(document.getElementsByTagName('div'));
prepareElementsForMarkJs(document.getElementsByTagName('br'), 'insertBefore');
markJsHackMarkerInserted_ = true;
}
let mark_ = null;
let markSelectedElement_ = null;
function setMarkers(keywords, options = null) {
@ -208,6 +234,8 @@
});
}
addMarkJsSpaceHack(document);
mark_.unmark()
if (markSelectedElement_) markSelectedElement_.classList.remove('mark-selected');
@ -215,20 +243,32 @@
let selectedElement = null;
let elementIndex = 0;
if (keywords.length) {
mark_.mark(keywords, {
each: (element) => {
if (!('selectedIndex' in options)) return;
const onEachElement = (element) => {
if (!('selectedIndex' in options)) return;
if (('selectedIndex' in options) && elementIndex === options.selectedIndex) {
markSelectedElement_ = element;
element.classList.add('mark-selected');
selectedElement = element;
}
elementIndex++;
}
});
if (('selectedIndex' in options) && elementIndex === options.selectedIndex) {
markSelectedElement_ = element;
element.classList.add('mark-selected');
selectedElement = element;
}
elementIndex++;
}
for (let i = 0; i < keywords.length; i++) {
const keyword = keywords[i];
if (keyword.type === 'regex') {
mark_.markRegExp(new RegExp(keyword.value, 'gmi'), {
each: onEachElement,
acrossElements: true,
});
} else {
mark_.mark([keyword], {
each: onEachElement,
accuracy: 'exactly',
});
}
}
ipcProxySendToHost('setMarkerCount', elementIndex);

View File

@ -34,6 +34,7 @@ const SyncTargetWebDAV = require('lib/SyncTargetWebDAV.js');
const SyncTargetDropbox = require('lib/SyncTargetDropbox.js');
const EncryptionService = require('lib/services/EncryptionService');
const ResourceFetcher = require('lib/services/ResourceFetcher');
const SearchEngine = require('lib/services/SearchEngine');
const DecryptionWorker = require('lib/services/DecryptionWorker');
const BaseService = require('lib/services/BaseService');
@ -218,12 +219,28 @@ class BaseApplication {
} else if (parentType === Tag.modelType()) {
notes = await Tag.notes(parentId, options);
} else if (parentType === BaseModel.TYPE_SEARCH) {
let fields = Note.previewFields();
let search = BaseModel.byId(state.searches, parentId);
notes = await Note.previews(null, {
fields: fields,
anywherePattern: '*' + search.query_pattern + '*',
});
const search = BaseModel.byId(state.searches, parentId);
const results = await SearchEngine.instance().search(search.query_pattern);
const noteIds = results.map(n => n.id);
const previewOptions = {
order: [],
fields: Note.previewFields(),
conditions: ['id IN ("' + noteIds.join('","') + '")'],
}
notes = await Note.previews(null, previewOptions);
// By default, the notes will be returned in reverse order
// or maybe random order so sort them here in the correct order
// (search engine returns the results in order of relevance).
const sortedNotes = [];
for (let i = 0; i < notes.length; i++) {
const idx = noteIds.indexOf(notes[i].id);
sortedNotes[idx] = notes[i];
}
notes = sortedNotes;
}
}

View File

@ -395,8 +395,6 @@ class MdToHtml {
previousToken = t;
}
output.unshift('<!-- START_OF_DOCUMENT -->');
// Insert the extra CSS at the top of the HTML
if (!ObjectUtils.isEmpty(extraCssBlocks)) {

View File

@ -219,6 +219,7 @@ class JoplinDatabase extends Database {
if (tableName == 'android_metadata') continue;
if (tableName == 'table_fields') continue;
if (tableName == 'sqlite_sequence') continue;
if (tableName.indexOf('notes_fts') === 0) continue;
chain.push(() => {
return this.selectAll('PRAGMA table_info("' + tableName + '")').then((pragmas) => {
for (let i = 0; i < pragmas.length; i++) {
@ -260,7 +261,7 @@ class JoplinDatabase extends Database {
// default value and thus might cause problems. In that case, the default value
// must be set in the synchronizer too.
const existingDatabaseVersions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14];
const existingDatabaseVersions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
let currentVersionIndex = existingDatabaseVersions.indexOf(fromVersion);
@ -445,6 +446,30 @@ class JoplinDatabase extends Database {
}));
}
if (targetVersion == 15) {
queries.push('CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes", notindexed="id", id, title, body)');
queries.push('INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0');
// Keep the content tables (notes) and the FTS table (notes_fts) in sync.
// More info at https://www.sqlite.org/fts3.html#_external_content_fts4_tables_
queries.push(`
CREATE TRIGGER notes_fts_before_update BEFORE UPDATE ON notes BEGIN
DELETE FROM notes_fts WHERE docid=old.rowid;
END;`);
queries.push(`
CREATE TRIGGER notes_fts_before_delete BEFORE DELETE ON notes BEGIN
DELETE FROM notes_fts WHERE docid=old.rowid;
END;`);
queries.push(`
CREATE TRIGGER notes_after_update AFTER UPDATE ON notes BEGIN
INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0 AND new.rowid = notes.rowid;
END;`);
queries.push(`
CREATE TRIGGER notes_after_insert AFTER INSERT ON notes BEGIN
INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0 AND new.rowid = notes.rowid;
END;`);
}
queries.push({ sql: 'UPDATE version SET version = ?', params: [targetVersion] });
await this.transactionExecBatch(queries);

View File

@ -249,7 +249,7 @@ class Note extends BaseItem {
// is used to sort already loaded notes.
if (!options) options = {};
if (!options.order) options.order = [
if (!('order' in options)) options.order = [
{ by: 'user_updated_time', dir: 'DESC' },
{ by: 'user_created_time', dir: 'DESC' },
{ by: 'title', dir: 'DESC' },

View File

@ -157,6 +157,9 @@ class Setting extends BaseModel {
'net.ignoreTlsErrors': { value: false, type: Setting.TYPE_BOOL, show: (settings) => { return [SyncTargetRegistry.nameToId('nextcloud'), SyncTargetRegistry.nameToId('webdav')].indexOf(settings['sync.target']) >= 0 }, public: true, appTypes: ['desktop', 'cli'], label: () => _('Ignore TLS certificate errors') },
'api.token': { value: null, type: Setting.TYPE_STRING, public: false },
'resourceService.lastProcessedChangeId': { value: 0, type: Setting.TYPE_INT, public: false },
'searchEngine.lastProcessedChangeId': { value: 0, type: Setting.TYPE_INT, public: false },
};
return this.metadata_;
@ -459,27 +462,6 @@ class Setting extends BaseModel {
return output;
}
// Currently only supports objects with properties one level deep
// static object(key) {
// let output = {};
// let keys = this.keys();
// for (let i = 0; i < keys.length; i++) {
// let k = keys[i].split('.');
// if (k[0] == key) {
// output[k[1]] = this.value(keys[i]);
// }
// }
// return output;
// }
// Currently only supports objects with properties one level deep
// static setObject(key, object) {
// for (let n in object) {
// if (!object.hasOwnProperty(n)) continue;
// this.setValue(key + '.' + n, object[n]);
// }
// }
static async saveAll() {
if (!this.saveTimeoutId_) return Promise.resolve();

View File

@ -4,6 +4,7 @@ const Note = require('lib/models/Note');
const Resource = require('lib/models/Resource');
const BaseModel = require('lib/BaseModel');
const BaseService = require('lib/services/BaseService');
const Setting = require('lib/models/Setting');
const { shim } = require('lib/shim');
class ResourceService extends BaseService {
@ -11,10 +12,6 @@ class ResourceService extends BaseService {
async indexNoteResources() {
this.logger().info('ResourceService::indexNoteResources: Start');
let lastId = 0;
const processedChangeIds = [];
await ItemChange.waitForAllSaved();
while (true) {
@ -25,7 +22,7 @@ class ResourceService extends BaseService {
AND id > ?
ORDER BY id ASC
LIMIT 100
`, [BaseModel.TYPE_NOTE, lastId]);
`, [BaseModel.TYPE_NOTE, Setting.value('resourceService.lastProcessedChangeId')]);
if (!changes.length) break;
@ -61,15 +58,11 @@ class ResourceService extends BaseService {
throw new Error('Invalid change type: ' + change.type);
}
lastId = change.id;
processedChangeIds.push(change.id);
Setting.setValue('resourceService.lastProcessedChangeId', change.id);
}
}
if (lastId) {
await ItemChange.db().exec('DELETE FROM item_changes WHERE id <= ?', [lastId]);
}
await Setting.saveAll();
await NoteResource.addOrphanedResources();

View File

@ -0,0 +1,221 @@
const { Logger } = require('lib/logger.js');
const { shim } = require('lib/shim.js');
const ItemChange = require('lib/models/ItemChange.js');
const Setting = require('lib/models/Setting.js');
const Note = require('lib/models/Note.js');
const BaseModel = require('lib/BaseModel.js');
class SearchEngine {
constructor() {
this.dispatch = (action) => {};
this.logger_ = new Logger();
this.db_ = null;
}
static instance() {
if (this.instance_) return this.instance_;
this.instance_ = new SearchEngine();
return this.instance_;
}
setLogger(logger) {
this.logger_ = logger;
}
logger() {
return this.logger_;
}
setDb(db) {
this.db_ = db;
}
db() {
return this.db_;
}
async countRows() {
const sql = 'SELECT count(*) as total FROM notes_fts'
const row = await this.db().selectOne(sql);
return row && row['total'] ? row['total'] : 0;
}
columnIndexesFromOffsets_(offsets) {
const occurenceCount = Math.floor(offsets.length / 4);
const indexes = [];
for (let i = 0; i < occurenceCount; i++) {
const colIndex = offsets[i * 4] - 1;
if (indexes.indexOf(colIndex) < 0) indexes.push(colIndex);
}
return indexes;
}
calculateWeight_(offsets, termCount) {
// Offset doc: https://www.sqlite.org/fts3.html#offsets
// - If there's only one term in the query string, the content with the most matches goes on top
// - If there are multiple terms, the result with the most occurences that are closest to each others go on top.
// eg. if query is "abcd efgh", "abcd efgh" will go before "abcd XX efgh".
const occurenceCount = Math.floor(offsets.length / 4);
if (termCount === 1) return occurenceCount;
let spread = 0;
let previousDist = null;
for (let i = 0; i < occurenceCount; i++) {
const dist = offsets[i * 4 + 2];
if (previousDist !== null) {
const delta = dist - previousDist;
spread += delta;
}
previousDist = dist;
}
// Divide the number of occurences by the spread so even if a note has many times the searched terms
// but these terms are very spread appart, they'll be given a lower weight than a note that has the
// terms once or twice but just next to each others.
return occurenceCount / spread;
}
orderResults_(rows, parsedQuery) {
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
const offsets = row.offsets.split(' ').map(o => Number(o));
row.weight = this.calculateWeight_(offsets, parsedQuery.termCount);
// row.colIndexes = this.columnIndexesFromOffsets_(offsets);
// row.offsets = offsets;
}
rows.sort((a, b) => {
if (a.weight < b.weight) return +1;
if (a.weight > b.weight) return -1;
return 0;
});
}
// https://stackoverflow.com/a/13818704/561309
queryTermToRegex(term) {
while (term.length && term.indexOf('*') === 0) {
term = term.substr(1);
}
const preg_quote = (str, delimiter) => {
return (str + '').replace(new RegExp('[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&');
} // [^ \t,\.,\+\-\\*?!={}<>\|:"\'\(\)[\]]
let regexString = preg_quote(term);
if (regexString[regexString.length - 1] === '*') {
regexString = regexString.substr(0, regexString.length - 2) + '[^' + preg_quote(' \t\n\r,.,+-*?!={}<>|:"\'()[]') + ']' + '*';
}
return regexString;
}
parseQuery(query) {
const terms = {_:[]};
let inQuote = false;
let currentCol = '_';
let currentTerm = '';
for (let i = 0; i < query.length; i++) {
const c = query[i];
if (c === '"') {
if (inQuote) {
terms[currentCol].push(currentTerm);
currentTerm = '';
inQuote = false;
} else {
inQuote = true;
}
continue;
}
if (c === ' ' && !inQuote) {
if (!currentTerm) continue;
terms[currentCol].push(currentTerm);
currentCol = '_';
currentTerm = '';
continue;
}
if (c === ':' && !inQuote) {
currentCol = currentTerm;
terms[currentCol] = [];
currentTerm = '';
continue;
}
currentTerm += c;
}
if (currentTerm) terms[currentCol].push(currentTerm);
// Filter terms:
// - Convert wildcards to regex
// - Remove columns with no results
// - Add count of terms
let termCount = 0;
const keys = [];
for (let col in terms) {
if (!terms.hasOwnProperty(col)) continue;
if (!terms[col].length) {
delete terms[col];
continue;
}
for (let i = terms[col].length - 1; i >= 0; i--) {
const term = terms[col][i];
// SQlLite FTS doesn't allow "*" queries and neither shall we
if (term === '*') {
terms[col].splice(i, 1);
continue;
}
if (term.indexOf('*') >= 0) {
terms[col][i] = { type: 'regex', value: this.queryTermToRegex(term) };
}
}
termCount += terms[col].length;
keys.push(col);
}
return {
termCount: termCount,
keys: keys,
terms: terms,
};
}
allParsedQueryTerms(parsedQuery) {
if (!parsedQuery || !parsedQuery.termCount) return [];
let output = [];
for (let col in parsedQuery.terms) {
if (!parsedQuery.terms.hasOwnProperty(col)) continue;
output = output.concat(parsedQuery.terms[col]);
}
return output;
}
async search(query) {
const parsedQuery = this.parseQuery(query);
const sql = 'SELECT id, title, offsets(notes_fts) AS offsets FROM notes_fts WHERE notes_fts MATCH ?'
const rows = await this.db().selectAll(sql, [query]);
this.orderResults_(rows, parsedQuery);
return rows;
}
}
module.exports = SearchEngine;