mirror of
https://github.com/laurent22/joplin.git
synced 2024-11-27 08:21:03 +02:00
Search engine: normalize text
This commit is contained in:
parent
4472590133
commit
a1f0bd1e6c
@ -137,7 +137,7 @@ describe('services_SearchEngine', function() {
|
||||
rows = await engine.search('Рейтер');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
rows = await engine.search('pейтер');
|
||||
rows = await engine.search('рейтер');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
rows = await engine.search('Dog');
|
||||
|
@ -29,6 +29,11 @@ class ItemChange extends BaseModel {
|
||||
}
|
||||
}
|
||||
|
||||
static async lastChangeId() {
|
||||
const row = await this.db().selectOne('SELECT max(id) as max_id FROM item_changes');
|
||||
return row && row.max_id ? row.max_id : 0;
|
||||
}
|
||||
|
||||
// Because item changes are recorded in the background, this function
|
||||
// can be used for synchronous code, in particular when unit testing.
|
||||
static async waitForAllSaved() {
|
||||
|
@ -164,6 +164,7 @@ class Setting extends BaseModel {
|
||||
|
||||
'resourceService.lastProcessedChangeId': { value: 0, type: Setting.TYPE_INT, public: false },
|
||||
'searchEngine.lastProcessedChangeId': { value: 0, type: Setting.TYPE_INT, public: false },
|
||||
'searchEngine.initialIndexingDone': { value: false, type: Setting.TYPE_BOOL, public: false },
|
||||
};
|
||||
|
||||
return this.metadata_;
|
||||
|
@ -36,17 +36,61 @@ class SearchEngine {
|
||||
return this.db_;
|
||||
}
|
||||
|
||||
noteById_(notes, noteId) {
|
||||
for (let i = 0; i < notes.length; i++) {
|
||||
if (notes[i].id === noteId) return notes[i];
|
||||
}
|
||||
// The note may have been deleted since the change was recorded. For example in this case:
|
||||
// - Note created (Some Change object is recorded)
|
||||
// - Note is deleted
|
||||
// - ResourceService indexer runs.
|
||||
// In that case, there will be a change for the note, but the note will be gone.
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
async initialIndexing() {
|
||||
let noteIds = await this.db().selectAll('SELECT id FROM notes WHERE is_conflict = 0 AND encryption_applied = 0');
|
||||
noteIds = noteIds.map(n => n.id);
|
||||
|
||||
|
||||
// TODO: get last change id HERE
|
||||
|
||||
// First delete content of note_normalized, in case the previous initial indexing failed
|
||||
await this.db().exec('DELETE FROM note_normalized');
|
||||
|
||||
while (noteIds.length) {
|
||||
const currentIds = noteIds.splice(0, 100);
|
||||
const notes = await Note.modelSelectAll('SELECT id, title, body FROM notes WHERE id IN ("' + currentIds.join('","') + '") AND is_conflict = 0 AND encryption_applied = 0');
|
||||
const queries = [];
|
||||
|
||||
for (let i = 0; i < notes.length; i++) {
|
||||
const note = notes[i];
|
||||
const n = this.normalizeNote_(note);
|
||||
queries.push({ sql: 'INSERT INTO notes_normalized(id, title, body) VALUES (?, ?, ?)', params: [n.id, n.title, n.body] });
|
||||
}
|
||||
|
||||
await this.db().transactionExecBatch(queries);
|
||||
}
|
||||
|
||||
// TODO: SET last chnage ID here
|
||||
|
||||
}
|
||||
|
||||
async syncTables() {
|
||||
this.logger().info('SearchEngine: Updating FTS table...');
|
||||
|
||||
await ItemChange.waitForAllSaved();
|
||||
|
||||
if (!Setting.value('searchEngine.initialIndexingDone')) {
|
||||
await this.initialIndexing();
|
||||
return;
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
let lastChangeId = Setting.value('searchEngine.lastProcessedChangeId');
|
||||
|
||||
// TODO: if lastChangedid is undefined - index the whole notes table
|
||||
|
||||
while (true) {
|
||||
const changes = await ItemChange.modelSelectAll(`
|
||||
SELECT id, item_id, type
|
||||
@ -59,6 +103,8 @@ class SearchEngine {
|
||||
|
||||
if (!changes.length) break;
|
||||
|
||||
const noteIds = changes.map(a => a.item_id);
|
||||
const notes = await Note.modelSelectAll('SELECT id, title, body FROM notes WHERE id IN ("' + noteIds.join('","') + '") AND is_conflict = 0 AND encryption_applied = 0');
|
||||
const queries = [];
|
||||
|
||||
for (let i = 0; i < changes.length; i++) {
|
||||
@ -66,7 +112,11 @@ class SearchEngine {
|
||||
|
||||
if (change.type === ItemChange.TYPE_CREATE || change.type === ItemChange.TYPE_UPDATE) {
|
||||
queries.push({ sql: 'DELETE FROM notes_normalized WHERE id = ?', params: [change.item_id] });
|
||||
queries.push({ sql: 'INSERT INTO notes_normalized(id, title, body) SELECT id, title, body FROM notes WHERE id = ? AND is_conflict = 0 AND encryption_applied = 0', params: [change.item_id] });
|
||||
const note = this.noteById_(notes, change.item_id);
|
||||
if (note) {
|
||||
const n = this.normalizeNote_(note);
|
||||
queries.push({ sql: 'INSERT INTO notes_normalized(id, title, body) VALUES (?, ?, ?)', params: [change.item_id, n.title, n.body] });
|
||||
}
|
||||
} else if (change.type === ItemChange.TYPE_DELETE) {
|
||||
queries.push({ sql: 'DELETE FROM notes_normalized WHERE id = ?', params: [change.item_id] });
|
||||
} else {
|
||||
@ -255,7 +305,19 @@ class SearchEngine {
|
||||
return output;
|
||||
}
|
||||
|
||||
normalizeText_(text) {
|
||||
return text.normalize().toLowerCase();
|
||||
}
|
||||
|
||||
normalizeNote_(note) {
|
||||
const n = Object.assign({}, note);
|
||||
n.title = this.normalizeText_(n.title);
|
||||
n.body = this.normalizeText_(n.body);
|
||||
return n;
|
||||
}
|
||||
|
||||
async search(query) {
|
||||
query = this.normalizeText_(query);
|
||||
const parsedQuery = this.parseQuery(query);
|
||||
const sql = 'SELECT id, title, offsets(notes_fts) AS offsets FROM notes_fts WHERE notes_fts MATCH ?'
|
||||
const rows = await this.db().selectAll(sql, [query]);
|
||||
|
Loading…
Reference in New Issue
Block a user