mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-24 10:27:10 +02:00
Fixed logic to update search engine data
This commit is contained in:
parent
0a6f8b0cfe
commit
5ec7c16e3e
@ -2,15 +2,8 @@ require('app-module-path').addPath(__dirname);
|
||||
|
||||
const { time } = require('lib/time-utils.js');
|
||||
const { fileContentEqual, setupDatabase, setupDatabaseAndSynchronizer, db, synchronizer, fileApi, sleep, clearDatabase, switchClient, syncTargetId, objectsEqual, checkThrowAsync } = require('test-utils.js');
|
||||
const markdownUtils = require('lib/markdownUtils.js');
|
||||
const SearchEngine = require('lib/services/SearchEngine');
|
||||
const Folder = require('lib/models/Folder');
|
||||
const Note = require('lib/models/Note');
|
||||
const ItemChange = require('lib/models/ItemChange');
|
||||
const Tag = require('lib/models/Tag');
|
||||
const Resource = require('lib/models/Resource');
|
||||
|
||||
jasmine.DEFAULT_TIMEOUT_INTERVAL = 10000;
|
||||
|
||||
process.on('unhandledRejection', (reason, p) => {
|
||||
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
|
||||
@ -26,45 +19,57 @@ describe('services_SearchEngine', function() {
|
||||
|
||||
engine = new SearchEngine();
|
||||
engine.setDb(db());
|
||||
await engine.dropFtsTables();
|
||||
await engine.createFtsTables();
|
||||
|
||||
done();
|
||||
});
|
||||
|
||||
it('should create the FTS table', async (done) => {
|
||||
let rows;
|
||||
it('should keep the content and FTS table in sync', async (done) => {
|
||||
let rows, n1, n2, n3;
|
||||
|
||||
await Note.save({ title: "abcd efgh" });
|
||||
rows = await engine.search('abcd efgh');
|
||||
n1 = await Note.save({ title: "a" });
|
||||
n2 = await Note.save({ title: "b" });
|
||||
rows = await engine.search('a');
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].title).toBe('a');
|
||||
|
||||
await Note.delete(n1.id);
|
||||
rows = await engine.search('a');
|
||||
expect(rows.length).toBe(0);
|
||||
rows = await engine.search('b');
|
||||
expect(rows[0].title).toBe('b');
|
||||
|
||||
await Note.save({ id: n2.id, title: 'c' });
|
||||
rows = await engine.search('b');
|
||||
expect(rows.length).toBe(0);
|
||||
rows = await engine.search('c');
|
||||
expect(rows[0].title).toBe('c');
|
||||
|
||||
await Note.save({ id: n2.id, encryption_applied: 1 });
|
||||
rows = await engine.search('c');
|
||||
expect(rows.length).toBe(0);
|
||||
|
||||
rows = await engine.search('abcd efgh');
|
||||
expect(await engine.countRows()).toBe(1);
|
||||
await Note.save({ id: n2.id, encryption_applied: 0 });
|
||||
rows = await engine.search('c');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
done();
|
||||
});
|
||||
|
||||
it('should update the FTS table', async (done) => {
|
||||
let rows;
|
||||
it('should order search results by relevance (1)', async (done) => {
|
||||
const n1 = await Note.save({ title: "abcd efgh" }); // 3
|
||||
const n2 = await Note.save({ title: "abcd aaaaa abcd abcd" }); // 1
|
||||
const n3 = await Note.save({ title: "abcd aaaaa bbbb eeee abcd" }); // 2
|
||||
|
||||
expect(await engine.countRows()).toBe(0);
|
||||
const rows = await engine.search('abcd');
|
||||
|
||||
await Note.save({ title: "abcd efgh" });
|
||||
await engine.updateFtsTables();
|
||||
expect(await engine.countRows()).toBe(1);
|
||||
|
||||
await Note.save({ title: "abcd efgh" });
|
||||
await engine.updateFtsTables();
|
||||
expect(await engine.countRows()).toBe(2);
|
||||
|
||||
await engine.updateFtsTables();
|
||||
expect(await engine.countRows()).toBe(2);
|
||||
expect(rows[0].id).toBe(n2.id);
|
||||
expect(rows[1].id).toBe(n3.id);
|
||||
expect(rows[2].id).toBe(n1.id);
|
||||
|
||||
done();
|
||||
});
|
||||
|
||||
it('should order search results by relevance', async (done) => {
|
||||
it('should order search results by relevance (2)', async (done) => {
|
||||
// 1
|
||||
const n1 = await Note.save({ title: "abcd efgh", body: "XX abcd XX efgh" });
|
||||
// 4
|
||||
@ -76,8 +81,6 @@ describe('services_SearchEngine', function() {
|
||||
// 5
|
||||
const n5 = await Note.save({ title: "occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh" });
|
||||
|
||||
await engine.updateFtsTables();
|
||||
|
||||
const rows = await engine.search('abcd efgh');
|
||||
|
||||
expect(rows[0].id).toBe(n1.id);
|
||||
@ -95,8 +98,6 @@ describe('services_SearchEngine', function() {
|
||||
const n1 = await Note.save({ title: "abcd efgh ijkl", body: "aaaa bbbb" });
|
||||
const n2 = await Note.save({ title: "iiii efgh bbbb", body: "aaaa bbbb" });
|
||||
|
||||
await engine.updateFtsTables();
|
||||
|
||||
rows = await engine.search('abcd ijkl');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
@ -124,4 +125,54 @@ describe('services_SearchEngine', function() {
|
||||
done();
|
||||
});
|
||||
|
||||
it('should parse normal query strings', async (done) => {
|
||||
let rows;
|
||||
|
||||
const testCases = [
|
||||
['abcd efgh', { _: ['abcd', 'efgh'] }],
|
||||
['abcd efgh', { _: ['abcd', 'efgh'] }],
|
||||
['title:abcd efgh', { _: ['efgh'], title: ['abcd'] }],
|
||||
['title:abcd', { title: ['abcd'] }],
|
||||
['"abcd efgh"', { _: ['abcd efgh'] }],
|
||||
];
|
||||
|
||||
for (let i = 0; i < testCases.length; i++) {
|
||||
const t = testCases[i];
|
||||
const input = t[0];
|
||||
const expected = t[1];
|
||||
const actual = engine.parseQuery(input);
|
||||
|
||||
expect(JSON.stringify(actual.terms._)).toBe(JSON.stringify(expected._));
|
||||
expect(JSON.stringify(actual.terms.title)).toBe(JSON.stringify(expected.title));
|
||||
expect(JSON.stringify(actual.terms.body)).toBe(JSON.stringify(expected.body));
|
||||
}
|
||||
|
||||
done();
|
||||
});
|
||||
|
||||
it('should parse query strings with wildcards', async (done) => {
|
||||
let rows;
|
||||
|
||||
const testCases = [
|
||||
['do*', ['do', 'dog', 'domino'] ],
|
||||
['*an*', ['an', 'piano', 'anneau', 'plan', 'PANIC'] ],
|
||||
];
|
||||
|
||||
for (let i = 0; i < testCases.length; i++) {
|
||||
const t = testCases[i];
|
||||
const input = t[0];
|
||||
const expected = t[1];
|
||||
const regex = engine.parseQuery(input).terms._[0];
|
||||
|
||||
for (let j = 0; j < expected.length; j++) {
|
||||
const r = expected[j].match(regex);
|
||||
expect(!!r).toBe(true);
|
||||
}
|
||||
}
|
||||
|
||||
expect(engine.parseQuery('*').termCount).toBe(0);
|
||||
|
||||
done();
|
||||
});
|
||||
|
||||
});
|
@ -796,7 +796,6 @@ class Application extends BaseApplication {
|
||||
|
||||
SearchEngine.instance().setDb(reg.db());
|
||||
SearchEngine.instance().setLogger(reg.logger());
|
||||
SearchEngine.runInBackground();
|
||||
|
||||
if (Setting.value('env') === 'dev') {
|
||||
AlarmService.updateAllNotifications();
|
||||
|
@ -447,9 +447,27 @@ class JoplinDatabase extends Database {
|
||||
}
|
||||
|
||||
if (targetVersion == 15) {
|
||||
// NOTE: Duplicated from SearchEngine.createFtsTables()
|
||||
queries.push('CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes", notindexed="id", id, title, body)');
|
||||
queries.push('INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0');
|
||||
|
||||
// Keep the content tables (notes) and the FTS table (notes_fts) in sync.
|
||||
// More info at https://www.sqlite.org/fts3.html#_external_content_fts4_tables_
|
||||
queries.push(`
|
||||
CREATE TRIGGER notes_fts_before_update BEFORE UPDATE ON notes BEGIN
|
||||
DELETE FROM notes_fts WHERE docid=old.rowid;
|
||||
END;`);
|
||||
queries.push(`
|
||||
CREATE TRIGGER notes_fts_before_delete BEFORE DELETE ON notes BEGIN
|
||||
DELETE FROM notes_fts WHERE docid=old.rowid;
|
||||
END;`);
|
||||
queries.push(`
|
||||
CREATE TRIGGER notes_after_update AFTER UPDATE ON notes BEGIN
|
||||
INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0 AND new.rowid = notes.rowid;
|
||||
END;`);
|
||||
queries.push(`
|
||||
CREATE TRIGGER notes_after_insert AFTER INSERT ON notes BEGIN
|
||||
INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0 AND new.rowid = notes.rowid;
|
||||
END;`);
|
||||
}
|
||||
|
||||
queries.push({ sql: 'UPDATE version SET version = ?', params: [targetVersion] });
|
||||
|
@ -13,67 +13,6 @@ class SearchEngine {
|
||||
this.db_ = null;
|
||||
}
|
||||
|
||||
// Note: Duplicated in JoplinDatabase migration 15
|
||||
async createFtsTables() {
|
||||
await this.db().exec('CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes", notindexed="id", id, title, body)');
|
||||
await this.db().exec('INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0');
|
||||
}
|
||||
|
||||
async dropFtsTables() {
|
||||
await this.db().exec('DROP TABLE IF EXISTS notes_fts');
|
||||
}
|
||||
|
||||
async updateFtsTables() {
|
||||
|
||||
// await this.db().exec('DELETE FROM notes_fts');
|
||||
// await this.db().exec('INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0');
|
||||
// return;
|
||||
|
||||
this.logger().info('SearchEngine: Updating FTS table...');
|
||||
|
||||
await ItemChange.waitForAllSaved();
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
let lastChangeId = Setting.value('searchEngine.lastProcessedChangeId');
|
||||
|
||||
while (true) {
|
||||
const changes = await ItemChange.modelSelectAll(`
|
||||
SELECT id, item_id, type
|
||||
FROM item_changes
|
||||
WHERE item_type = ?
|
||||
AND id > ?
|
||||
ORDER BY id ASC
|
||||
LIMIT 100
|
||||
`, [BaseModel.TYPE_NOTE, lastChangeId]);
|
||||
|
||||
if (!changes.length) break;
|
||||
|
||||
const queries = [];
|
||||
|
||||
for (let i = 0; i < changes.length; i++) {
|
||||
const change = changes[i];
|
||||
|
||||
if (change.type === ItemChange.TYPE_CREATE || change.type === ItemChange.TYPE_UPDATE) {
|
||||
queries.push({ sql: 'DELETE FROM notes_fts WHERE id = ?', params: [change.item_id] });
|
||||
queries.push({ sql: 'INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE id = ?', params: [change.item_id] });
|
||||
} else if (change.type === ItemChange.TYPE_DELETE) {
|
||||
queries.push({ sql: 'DELETE FROM notes_fts WHERE id = ?', params: [change.item_id] });
|
||||
} else {
|
||||
throw new Error('Invalid change type: ' + change.type);
|
||||
}
|
||||
|
||||
lastChangeId = change.id;
|
||||
}
|
||||
|
||||
await this.db().transactionExecBatch(queries);
|
||||
Setting.setValue('searchEngine.lastProcessedChangeId', lastChangeId);
|
||||
await Setting.saveAll();
|
||||
}
|
||||
|
||||
this.logger().info('SearchEngine: Updated FTS table in ' + (Date.now() - startTime) + 'ms');
|
||||
}
|
||||
|
||||
static instance() {
|
||||
if (this.instance_) return this.instance_;
|
||||
this.instance_ = new SearchEngine();
|
||||
@ -114,15 +53,17 @@ class SearchEngine {
|
||||
return indexes;
|
||||
}
|
||||
|
||||
calculateWeight_(offsets) {
|
||||
calculateWeight_(offsets, termCount) {
|
||||
// Offset doc: https://www.sqlite.org/fts3.html#offsets
|
||||
|
||||
// TODO: If there's only one term - specia case - whatever has the most occurences win.
|
||||
// TODO: Parse query string.
|
||||
// TODO: Support wildcards
|
||||
// - If there's only one term in the query string, the content with the most matches goes on top
|
||||
// - If there are multiple terms, the result with the most occurences that are closest to each others go on top.
|
||||
// eg. if query is "abcd efgh", "abcd efgh" will go before "abcd XX efgh".
|
||||
|
||||
const occurenceCount = Math.floor(offsets.length / 4);
|
||||
|
||||
if (termCount === 1) return occurenceCount;
|
||||
|
||||
let spread = 0;
|
||||
let previousDist = null;
|
||||
for (let i = 0; i < occurenceCount; i++) {
|
||||
@ -142,11 +83,11 @@ class SearchEngine {
|
||||
return occurenceCount / spread;
|
||||
}
|
||||
|
||||
orderResults_(rows) {
|
||||
orderResults_(rows, parsedQuery) {
|
||||
for (let i = 0; i < rows.length; i++) {
|
||||
const row = rows[i];
|
||||
const offsets = row.offsets.split(' ').map(o => Number(o));
|
||||
row.weight = this.calculateWeight_(offsets);
|
||||
row.weight = this.calculateWeight_(offsets, parsedQuery.termCount);
|
||||
// row.colIndexes = this.columnIndexesFromOffsets_(offsets);
|
||||
// row.offsets = offsets;
|
||||
}
|
||||
@ -158,27 +99,104 @@ class SearchEngine {
|
||||
});
|
||||
}
|
||||
|
||||
// https://stackoverflow.com/a/13818704/561309
|
||||
queryTermToRegex(term) {
|
||||
const preg_quote = (str, delimiter) => {
|
||||
return (str + '').replace(new RegExp('[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&');
|
||||
}
|
||||
const regexString = preg_quote(term).replace(/\\\*/g, '.*').replace(/\\\?/g, '.');
|
||||
return new RegExp(regexString, 'gmi');
|
||||
}
|
||||
|
||||
parseQuery(query) {
|
||||
const terms = {_:[]};
|
||||
|
||||
let inQuote = false;
|
||||
let currentCol = '_';
|
||||
let currentTerm = '';
|
||||
for (let i = 0; i < query.length; i++) {
|
||||
const c = query[i];
|
||||
|
||||
if (c === '"') {
|
||||
if (inQuote) {
|
||||
terms[currentCol].push(currentTerm);
|
||||
currentTerm = '';
|
||||
inQuote = false;
|
||||
} else {
|
||||
inQuote = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c === ' ' && !inQuote) {
|
||||
if (!currentTerm) continue;
|
||||
terms[currentCol].push(currentTerm);
|
||||
currentCol = '_';
|
||||
currentTerm = '';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c === ':' && !inQuote) {
|
||||
currentCol = currentTerm;
|
||||
terms[currentCol] = [];
|
||||
currentTerm = '';
|
||||
continue;
|
||||
}
|
||||
|
||||
currentTerm += c;
|
||||
}
|
||||
|
||||
if (currentTerm) terms[currentCol].push(currentTerm);
|
||||
|
||||
// Filter terms:
|
||||
// - Convert wildcards to regex
|
||||
// - Remove columns with no results
|
||||
// - Add count of terms
|
||||
|
||||
let termCount = 0;
|
||||
const keys = [];
|
||||
for (let col in terms) {
|
||||
if (!terms.hasOwnProperty(col)) continue;
|
||||
|
||||
if (!terms[col].length) {
|
||||
delete terms[col];
|
||||
continue;
|
||||
}
|
||||
|
||||
for (let i = terms[col].length - 1; i >= 0; i--) {
|
||||
const term = terms[col][i];
|
||||
|
||||
// SQlLite FTS doesn't allow "*" queries and neither shall we
|
||||
if (term === '*') {
|
||||
terms[col].splice(i, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (term.indexOf('*') >= 0) {
|
||||
terms[col][i] = this.queryTermToRegex(term);
|
||||
}
|
||||
}
|
||||
|
||||
termCount += terms[col].length;
|
||||
|
||||
keys.push(col);
|
||||
}
|
||||
|
||||
return {
|
||||
termCount: termCount,
|
||||
keys: keys,
|
||||
terms: terms,
|
||||
};
|
||||
}
|
||||
|
||||
async search(query) {
|
||||
const parsedQuery = this.parseQuery(query);
|
||||
const sql = 'SELECT id, title, offsets(notes_fts) AS offsets FROM notes_fts WHERE notes_fts MATCH ?'
|
||||
const rows = await this.db().selectAll(sql, [query]);
|
||||
this.orderResults_(rows);
|
||||
this.orderResults_(rows, parsedQuery);
|
||||
return rows;
|
||||
}
|
||||
|
||||
static runInBackground() {
|
||||
if (this.isRunningInBackground_) return;
|
||||
|
||||
this.isRunningInBackground_ = true;
|
||||
|
||||
setTimeout(() => {
|
||||
SearchEngine.instance().updateFtsTables();
|
||||
}, 1000 * 30);
|
||||
|
||||
shim.setInterval(() => {
|
||||
SearchEngine.instance().updateFtsTables();
|
||||
}, 1000 * 60 * 30);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
module.exports = SearchEngine;
|
Loading…
Reference in New Issue
Block a user