1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-11 18:24:43 +02:00

Started support for FTS search

This commit is contained in:
Laurent Cozic 2018-12-09 21:45:50 +01:00
parent 57225a36b9
commit 3b6131f1ca
2 changed files with 137 additions and 0 deletions

View File

@ -0,0 +1,42 @@
require('app-module-path').addPath(__dirname);
const { time } = require('lib/time-utils.js');
const { fileContentEqual, setupDatabase, setupDatabaseAndSynchronizer, db, synchronizer, fileApi, sleep, clearDatabase, switchClient, syncTargetId, objectsEqual, checkThrowAsync } = require('test-utils.js');
const markdownUtils = require('lib/markdownUtils.js');
const SearchEngine = require('lib/services/SearchEngine');
const Folder = require('lib/models/Folder');
const Note = require('lib/models/Note');
const Tag = require('lib/models/Tag');
const Resource = require('lib/models/Resource');
jasmine.DEFAULT_TIMEOUT_INTERVAL = 10000;
process.on('unhandledRejection', (reason, p) => {
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
});
let engine = null;
describe('services_SearchEngine', function() {
beforeEach(async (done) => {
await setupDatabaseAndSynchronizer(1);
await switchClient(1);
engine = new SearchEngine();
engine.setDb(db());
done();
});
it('should create the FTS table', async (done) => {
await Note.save({ title: "abcd efgh" });
await Note.save({ title: "abcd aaaaa bbbb eeee efgh" });
await Note.save({ title: "abcd aaaaa efgh" });
await Note.save({ title: "blablablabla blabla bla abcd X efgh" });
await Note.save({ title: "occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh" });
await engine.updateFtsTables();
done();
});
});

View File

@ -0,0 +1,95 @@
const { Logger } = require('lib/logger.js');
class SearchEngine {
constructor() {
this.dispatch = (action) => {};
this.logger_ = new Logger();
this.db_ = null;
}
async updateFtsTables() {
// CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes", title, body);
// INSERT INTO notes_fts(docid, title, body) SELECT rowid, title, body FROM notes;
// SELECT title, offsets(notes_fts) length(offsets(notes_fts)) - length(replace(offsets(notes_fts), ' ', '')) + 1
// FROM notes_fts
// WHERE notes_fts
// MATCH 'test';
await this.db().exec('CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes", title, body)');
await this.db().exec('INSERT INTO notes_fts(docid, title, body) SELECT rowid, title, body FROM notes;');
const sql = `SELECT docid, title, offsets(notes_fts) as offsets FROM notes_fts WHERE notes_fts MATCH "abcd efgh" `;
const rows = await this.db().selectAll(sql);
const calculateWeight = (offsets) => {
// Offset doc: https://www.sqlite.org/fts3.html#offsets
const occurenceCount = Math.floor(offsets.length / 4);
let spread = 0;
let previousDist = null;
for (let i = 0; i < occurenceCount; i++) {
const dist = offsets[i * 4 + 2];
if (previousDist !== null) {
const delta = dist - previousDist;
spread += delta;
}
previousDist = dist;
}
// Divide the number of occureances by the spread so even if a note has many times the searched terms
// but these terms are very spread appart, they'll be given a lower weight than a note that has the
// terms once or twice but just next to each others.
return occurenceCount / spread;
}
const orderResults = (rows) => {
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
row.weight = calculateWeight(row.offsets.split(' ').map(o => Number(o)));
}
rows.sort((a, b) => {
if (a.weight < b.weight) return +1;
if (a.weight > b.weight) return -1;
return 0;
});
}
orderResults(rows);
console.info(rows);
// console.info(rows);
}
static instance() {
if (this.instance_) return this.instance_;
this.instance_ = new SearchEngine();
return this.instance_;
}
setLogger(logger) {
this.logger_ = logger;
}
logger() {
return this.logger_;
}
setDb(db) {
this.db_ = db;
}
db() {
return this.db_;
}
}
module.exports = SearchEngine;