From 460f8266729e25136c9f68de9ce5c27b91a9dd6e Mon Sep 17 00:00:00 2001 From: Laurent Cozic Date: Mon, 10 Dec 2018 18:54:46 +0000 Subject: [PATCH] Nearly finished search engine backend --- CliClient/tests/services_SearchEngine.js | 93 +++++++++- CliClient/tests/test-utils.js | 3 + ReactNativeClient/lib/models/Setting.js | 1 + .../lib/services/SearchEngine.js | 159 +++++++++++++----- 4 files changed, 206 insertions(+), 50 deletions(-) diff --git a/CliClient/tests/services_SearchEngine.js b/CliClient/tests/services_SearchEngine.js index 0760a5cba..00e64b9c4 100644 --- a/CliClient/tests/services_SearchEngine.js +++ b/CliClient/tests/services_SearchEngine.js @@ -6,6 +6,7 @@ const markdownUtils = require('lib/markdownUtils.js'); const SearchEngine = require('lib/services/SearchEngine'); const Folder = require('lib/models/Folder'); const Note = require('lib/models/Note'); +const ItemChange = require('lib/models/ItemChange'); const Tag = require('lib/models/Tag'); const Resource = require('lib/models/Resource'); @@ -22,19 +23,103 @@ describe('services_SearchEngine', function() { beforeEach(async (done) => { await setupDatabaseAndSynchronizer(1); await switchClient(1); + engine = new SearchEngine(); engine.setDb(db()); + await engine.dropFtsTables(); + await engine.createFtsTables(); + done(); }); it('should create the FTS table', async (done) => { + let rows; + await Note.save({ title: "abcd efgh" }); - await Note.save({ title: "abcd aaaaa bbbb eeee efgh" }); - await Note.save({ title: "abcd aaaaa efgh" }); - await Note.save({ title: "blablablabla blabla bla abcd X efgh" }); - await Note.save({ title: "occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh" }); + rows = await engine.search('abcd efgh'); + expect(rows.length).toBe(0); + + rows = await engine.search('abcd efgh'); + expect(await engine.countRows()).toBe(1); + + done(); + }); + + it('should update the FTS table', async (done) => { + let rows; + + expect(await engine.countRows()).toBe(0); + + await Note.save({ title: "abcd efgh" }); + await engine.updateFtsTables(); + expect(await engine.countRows()).toBe(1); + + await Note.save({ title: "abcd efgh" }); + await engine.updateFtsTables(); + expect(await engine.countRows()).toBe(2); await engine.updateFtsTables(); + expect(await engine.countRows()).toBe(2); + + done(); + }); + + it('should order search results by relevance', async (done) => { + // 1 + const n1 = await Note.save({ title: "abcd efgh", body: "XX abcd XX efgh" }); + // 4 + const n2 = await Note.save({ title: "abcd aaaaa bbbb eeee efgh" }); + // 3 + const n3 = await Note.save({ title: "abcd aaaaa efgh" }); + // 2 + const n4 = await Note.save({ title: "blablablabla blabla bla abcd X efgh" }); + // 5 + const n5 = await Note.save({ title: "occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh" }); + + await engine.updateFtsTables(); + + const rows = await engine.search('abcd efgh'); + + expect(rows[0].id).toBe(n1.id); + expect(rows[1].id).toBe(n4.id); + expect(rows[2].id).toBe(n3.id); + expect(rows[3].id).toBe(n2.id); + expect(rows[4].id).toBe(n5.id); + + done(); + }); + + it('should supports various query types', async (done) => { + let rows; + + const n1 = await Note.save({ title: "abcd efgh ijkl", body: "aaaa bbbb" }); + const n2 = await Note.save({ title: "iiii efgh bbbb", body: "aaaa bbbb" }); + + await engine.updateFtsTables(); + + rows = await engine.search('abcd ijkl'); + expect(rows.length).toBe(1); + + rows = await engine.search('"abcd ijkl"'); + expect(rows.length).toBe(0); + + rows = await engine.search('"abcd efgh"'); + expect(rows.length).toBe(1); + + rows = await engine.search('title:abcd'); + expect(rows.length).toBe(1); + + rows = await engine.search('title:efgh'); + expect(rows.length).toBe(2); + + rows = await engine.search('body:abcd'); + expect(rows.length).toBe(0); + + rows = await engine.search('body:bbbb'); + expect(rows.length).toBe(2); + + rows = await engine.search('body:bbbb iiii'); + expect(rows.length).toBe(1); done(); }); diff --git a/CliClient/tests/test-utils.js b/CliClient/tests/test-utils.js index c23569800..3196a9e53 100644 --- a/CliClient/tests/test-utils.js +++ b/CliClient/tests/test-utils.js @@ -4,6 +4,7 @@ const { DatabaseDriverNode } = require('lib/database-driver-node.js'); const BaseModel = require('lib/BaseModel.js'); const Folder = require('lib/models/Folder.js'); const Note = require('lib/models/Note.js'); +const ItemChange = require('lib/models/ItemChange.js'); const Resource = require('lib/models/Resource.js'); const Tag = require('lib/models/Tag.js'); const NoteTag = require('lib/models/NoteTag.js'); @@ -122,6 +123,8 @@ async function switchClient(id) { async function clearDatabase(id = null) { if (id === null) id = currentClient_; + await ItemChange.waitForAllSaved(); + let queries = [ 'DELETE FROM notes', 'DELETE FROM folders', diff --git a/ReactNativeClient/lib/models/Setting.js b/ReactNativeClient/lib/models/Setting.js index baf494710..1dd366bff 100644 --- a/ReactNativeClient/lib/models/Setting.js +++ b/ReactNativeClient/lib/models/Setting.js @@ -159,6 +159,7 @@ class Setting extends BaseModel { 'api.token': { value: null, type: Setting.TYPE_STRING, public: false }, 'resourceService.lastProcessedChangeId': { value: 0, type: Setting.TYPE_INT, public: false }, + 'searchEngine.lastProcessedChangeId': { value: 0, type: Setting.TYPE_INT, public: false }, }; return this.metadata_; diff --git a/ReactNativeClient/lib/services/SearchEngine.js b/ReactNativeClient/lib/services/SearchEngine.js index c97a8a0aa..2cd5e9685 100644 --- a/ReactNativeClient/lib/services/SearchEngine.js +++ b/ReactNativeClient/lib/services/SearchEngine.js @@ -1,4 +1,8 @@ const { Logger } = require('lib/logger.js'); +const ItemChange = require('lib/models/ItemChange.js'); +const Setting = require('lib/models/Setting.js'); +const Note = require('lib/models/Note.js'); +const BaseModel = require('lib/BaseModel.js'); class SearchEngine { @@ -7,65 +11,63 @@ class SearchEngine { this.logger_ = new Logger(); this.db_ = null; } - + + async createFtsTables() { + await this.db().exec('CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes", notindexed="id", id, title, body)'); + await this.db().exec('INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0'); + } + + async dropFtsTables() { + await this.db().exec('DROP TABLE IF EXISTS notes_fts'); + } + async updateFtsTables() { - // CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes", title, body); - // INSERT INTO notes_fts(docid, title, body) SELECT rowid, title, body FROM notes; - // SELECT title, offsets(notes_fts) length(offsets(notes_fts)) - length(replace(offsets(notes_fts), ' ', '')) + 1 - // FROM notes_fts - // WHERE notes_fts - // MATCH 'test'; - await this.db().exec('CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes", title, body)'); - await this.db().exec('INSERT INTO notes_fts(docid, title, body) SELECT rowid, title, body FROM notes;'); + // await this.db().exec('DELETE FROM notes_fts'); + // await this.db().exec('INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE is_conflict = 0 AND encryption_applied = 0'); + // return; - - const sql = `SELECT docid, title, offsets(notes_fts) as offsets FROM notes_fts WHERE notes_fts MATCH "abcd efgh" `; + await ItemChange.waitForAllSaved(); - const rows = await this.db().selectAll(sql); + const startTime = Date.now(); - const calculateWeight = (offsets) => { - // Offset doc: https://www.sqlite.org/fts3.html#offsets - - const occurenceCount = Math.floor(offsets.length / 4); + let lastChangeId = Setting.value('searchEngine.lastProcessedChangeId'); - let spread = 0; - let previousDist = null; - for (let i = 0; i < occurenceCount; i++) { - const dist = offsets[i * 4 + 2]; + while (true) { + const changes = await ItemChange.modelSelectAll(` + SELECT id, item_id, type + FROM item_changes + WHERE item_type = ? + AND id > ? + ORDER BY id ASC + LIMIT 100 + `, [BaseModel.TYPE_NOTE, lastChangeId]); - if (previousDist !== null) { - const delta = dist - previousDist; - spread += delta; + if (!changes.length) break; + + const queries = []; + + for (let i = 0; i < changes.length; i++) { + const change = changes[i]; + + if (change.type === ItemChange.TYPE_CREATE || change.type === ItemChange.TYPE_UPDATE) { + queries.push({ sql: 'DELETE FROM notes_fts WHERE id = ?', params: [change.item_id] }); + queries.push({ sql: 'INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes WHERE id = ?', params: [change.item_id] }); + } else if (change.type === ItemChange.TYPE_DELETE) { + queries.push({ sql: 'DELETE FROM notes_fts WHERE id = ?', params: [change.item_id] }); + } else { + throw new Error('Invalid change type: ' + change.type); } - previousDist = dist; + lastChangeId = change.id; } - // Divide the number of occureances by the spread so even if a note has many times the searched terms - // but these terms are very spread appart, they'll be given a lower weight than a note that has the - // terms once or twice but just next to each others. - return occurenceCount / spread; + await this.db().transactionExecBatch(queries); + Setting.setValue('searchEngine.lastProcessedChangeId', lastChangeId); + await Setting.saveAll(); } - const orderResults = (rows) => { - for (let i = 0; i < rows.length; i++) { - const row = rows[i]; - row.weight = calculateWeight(row.offsets.split(' ').map(o => Number(o))); - } - - rows.sort((a, b) => { - if (a.weight < b.weight) return +1; - if (a.weight > b.weight) return -1; - return 0; - }); - } - - orderResults(rows); - - console.info(rows); - - // console.info(rows); + this.logger().info('SearchEngine: Updated FTS table in ' + (Date.now() - startTime) + 'ms'); } static instance() { @@ -90,6 +92,71 @@ class SearchEngine { return this.db_; } + async countRows() { + const sql = 'SELECT count(*) as total FROM notes_fts' + const row = await this.db().selectOne(sql); + return row && row['total'] ? row['total'] : 0; + } + + columnIndexesFromOffsets_(offsets) { + const occurenceCount = Math.floor(offsets.length / 4); + const indexes = []; + + for (let i = 0; i < occurenceCount; i++) { + const colIndex = offsets[i * 4] - 1; + if (indexes.indexOf(colIndex) < 0) indexes.push(colIndex); + } + + return indexes; + } + + calculateWeight_(offsets) { + // Offset doc: https://www.sqlite.org/fts3.html#offsets + + const occurenceCount = Math.floor(offsets.length / 4); + + let spread = 0; + let previousDist = null; + for (let i = 0; i < occurenceCount; i++) { + const dist = offsets[i * 4 + 2]; + + if (previousDist !== null) { + const delta = dist - previousDist; + spread += delta; + } + + previousDist = dist; + } + + // Divide the number of occurences by the spread so even if a note has many times the searched terms + // but these terms are very spread appart, they'll be given a lower weight than a note that has the + // terms once or twice but just next to each others. + return occurenceCount / spread; + } + + orderResults_(rows) { + for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + const offsets = row.offsets.split(' ').map(o => Number(o)); + row.weight = this.calculateWeight_(offsets); + // row.colIndexes = this.columnIndexesFromOffsets_(offsets); + // row.offsets = offsets; + } + + rows.sort((a, b) => { + if (a.weight < b.weight) return +1; + if (a.weight > b.weight) return -1; + return 0; + }); + } + + async search(query) { + const sql = 'SELECT id, offsets(notes_fts) AS offsets FROM notes_fts WHERE notes_fts MATCH ?' + const rows = await this.db().selectAll(sql, [query]); + this.orderResults_(rows); + return rows; + } + } module.exports = SearchEngine; \ No newline at end of file