mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-30 10:36:35 +02:00
Started rewriting search engine to better support unicode
This commit is contained in:
parent
f308fe71f9
commit
41155f5ef4
@ -31,6 +31,7 @@ npm test tests-build/models_Note.js
|
||||
npm test tests-build/models_Tag.js
|
||||
npm test tests-build/models_Setting.js
|
||||
npm test tests-build/pathUtils.js
|
||||
npm test tests-build/StringUtils.js
|
||||
npm test tests-build/services_InteropService.js
|
||||
npm test tests-build/services_ResourceService.js
|
||||
npm test tests-build/urlUtils.js
|
||||
|
@ -16,6 +16,7 @@ describe('StringUtils', function() {
|
||||
|
||||
it('should surround keywords with strings', async (done) => {
|
||||
const testCases = [
|
||||
[[], 'test', 'a', 'b', 'test'],
|
||||
[['test'], 'test', 'a', 'b', 'atestb'],
|
||||
[['test'], 'Test', 'a', 'b', 'aTestb'],
|
||||
[['te[]st'], 'Te[]st', 'a', 'b', 'aTe[]stb'],
|
||||
|
@ -28,27 +28,32 @@ describe('services_SearchEngine', function() {
|
||||
|
||||
n1 = await Note.save({ title: "a" });
|
||||
n2 = await Note.save({ title: "b" });
|
||||
await engine.syncTables();
|
||||
rows = await engine.search('a');
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].title).toBe('a');
|
||||
|
||||
await Note.delete(n1.id);
|
||||
await engine.syncTables();
|
||||
rows = await engine.search('a');
|
||||
expect(rows.length).toBe(0);
|
||||
rows = await engine.search('b');
|
||||
expect(rows[0].title).toBe('b');
|
||||
|
||||
await Note.save({ id: n2.id, title: 'c' });
|
||||
await engine.syncTables();
|
||||
rows = await engine.search('b');
|
||||
expect(rows.length).toBe(0);
|
||||
rows = await engine.search('c');
|
||||
expect(rows[0].title).toBe('c');
|
||||
|
||||
await Note.save({ id: n2.id, encryption_applied: 1 });
|
||||
await engine.syncTables();
|
||||
rows = await engine.search('c');
|
||||
expect(rows.length).toBe(0);
|
||||
|
||||
await Note.save({ id: n2.id, encryption_applied: 0 });
|
||||
await engine.syncTables();
|
||||
rows = await engine.search('c');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
@ -60,6 +65,7 @@ describe('services_SearchEngine', function() {
|
||||
const n2 = await Note.save({ title: "abcd aaaaa abcd abcd" }); // 1
|
||||
const n3 = await Note.save({ title: "abcd aaaaa bbbb eeee abcd" }); // 2
|
||||
|
||||
await engine.syncTables();
|
||||
const rows = await engine.search('abcd');
|
||||
|
||||
expect(rows[0].id).toBe(n2.id);
|
||||
@ -81,6 +87,7 @@ describe('services_SearchEngine', function() {
|
||||
// 5
|
||||
const n5 = await Note.save({ title: "occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh" });
|
||||
|
||||
await engine.syncTables();
|
||||
const rows = await engine.search('abcd efgh');
|
||||
|
||||
expect(rows[0].id).toBe(n1.id);
|
||||
@ -97,6 +104,11 @@ describe('services_SearchEngine', function() {
|
||||
|
||||
const n1 = await Note.save({ title: "abcd efgh ijkl", body: "aaaa bbbb" });
|
||||
const n2 = await Note.save({ title: "iiii efgh bbbb", body: "aaaa bbbb" });
|
||||
const n3 = await Note.save({ title: "Агентство Рейтер" });
|
||||
const n4 = await Note.save({ title: "Dog" });
|
||||
const n5 = await Note.save({ title: "СООБЩИЛО" });
|
||||
|
||||
await engine.syncTables();
|
||||
|
||||
rows = await engine.search('abcd ijkl');
|
||||
expect(rows.length).toBe(1);
|
||||
@ -122,6 +134,21 @@ describe('services_SearchEngine', function() {
|
||||
rows = await engine.search('body:bbbb iiii');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
rows = await engine.search('Рейтер');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
rows = await engine.search('pейтер');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
rows = await engine.search('Dog');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
rows = await engine.search('dog');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
rows = await engine.search('сообщило');
|
||||
expect(rows.length).toBe(1);
|
||||
|
||||
done();
|
||||
});
|
||||
|
||||
@ -172,12 +199,6 @@ describe('services_SearchEngine', function() {
|
||||
const r = shouldMatch[j].match(regex);
|
||||
expect(!!r).toBe(true, '"' + input + '" should match "' + shouldMatch[j] + '"');
|
||||
}
|
||||
|
||||
// for (let j = 0; j < shouldNotMatch.length; j++) {
|
||||
// const r = shouldNotMatch[j].match(regex);
|
||||
// // console.info(input, shouldNotMatch)
|
||||
// expect(!!r).toBe(false, '"' + input + '" should not match "' + shouldNotMatch[j] + '"');
|
||||
// }
|
||||
}
|
||||
|
||||
expect(engine.parseQuery('*').termCount).toBe(0);
|
||||
|
@ -137,6 +137,7 @@ async function clearDatabase(id = null) {
|
||||
'DELETE FROM settings',
|
||||
'DELETE FROM deleted_items',
|
||||
'DELETE FROM sync_items',
|
||||
'DELETE FROM notes_normalized',
|
||||
];
|
||||
|
||||
await databases_[id].transactionExecBatch(queries);
|
||||
|
@ -474,13 +474,54 @@ class JoplinDatabase extends Database {
|
||||
END;`);
|
||||
}
|
||||
|
||||
if (targetVersion == 16) {
|
||||
const notesNormalized = `
|
||||
CREATE TABLE notes_normalized (
|
||||
id TEXT NOT NULL,
|
||||
title TEXT NOT NULL DEFAULT "",
|
||||
body TEXT NOT NULL DEFAULT ""
|
||||
);
|
||||
`;
|
||||
|
||||
queries.push(this.sqlStringToLines(notesNormalized)[0]);
|
||||
|
||||
queries.push('CREATE INDEX notes_normalized_id ON notes_normalized (id)');
|
||||
|
||||
queries.push('DROP TRIGGER IF EXISTS notes_fts_before_update');
|
||||
queries.push('DROP TRIGGER IF EXISTS notes_fts_before_delete');
|
||||
queries.push('DROP TRIGGER IF EXISTS notes_after_update');
|
||||
queries.push('DROP TRIGGER IF EXISTS notes_after_insert');
|
||||
queries.push('DROP TABLE IF EXISTS notes_fts');
|
||||
|
||||
queries.push('CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes_normalized", notindexed="id", id, title, body)');
|
||||
|
||||
// Keep the content tables (notes) and the FTS table (notes_fts) in sync.
|
||||
// More info at https://www.sqlite.org/fts3.html#_external_content_fts4_tables_
|
||||
queries.push(`
|
||||
CREATE TRIGGER notes_fts_before_update BEFORE UPDATE ON notes_normalized BEGIN
|
||||
DELETE FROM notes_fts WHERE docid=old.rowid;
|
||||
END;`);
|
||||
queries.push(`
|
||||
CREATE TRIGGER notes_fts_before_delete BEFORE DELETE ON notes_normalized BEGIN
|
||||
DELETE FROM notes_fts WHERE docid=old.rowid;
|
||||
END;`);
|
||||
queries.push(`
|
||||
CREATE TRIGGER notes_after_update AFTER UPDATE ON notes_normalized BEGIN
|
||||
INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes_normalized WHERE new.rowid = notes_normalized.rowid;
|
||||
END;`);
|
||||
queries.push(`
|
||||
CREATE TRIGGER notes_after_insert AFTER INSERT ON notes_normalized BEGIN
|
||||
INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes_normalized WHERE new.rowid = notes_normalized.rowid;
|
||||
END;`);
|
||||
}
|
||||
|
||||
queries.push({ sql: 'UPDATE version SET version = ?', params: [targetVersion] });
|
||||
|
||||
try {
|
||||
await this.transactionExecBatch(queries);
|
||||
} catch (error) {
|
||||
if (targetVersion === 15) {
|
||||
this.logger().warn('Could not upgrade to database v15 - FTS feature will not be used', error);
|
||||
if (targetVersion === 15 || targetVersion === 16) {
|
||||
this.logger().warn('Could not upgrade to database v15 or v16 - FTS feature will not be used', error);
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
|
@ -36,6 +36,54 @@ class SearchEngine {
|
||||
return this.db_;
|
||||
}
|
||||
|
||||
async syncTables() {
|
||||
this.logger().info('SearchEngine: Updating FTS table...');
|
||||
|
||||
await ItemChange.waitForAllSaved();
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
let lastChangeId = Setting.value('searchEngine.lastProcessedChangeId');
|
||||
|
||||
// TODO: if lastChangedid is undefined - index the whole notes table
|
||||
|
||||
while (true) {
|
||||
const changes = await ItemChange.modelSelectAll(`
|
||||
SELECT id, item_id, type
|
||||
FROM item_changes
|
||||
WHERE item_type = ?
|
||||
AND id > ?
|
||||
ORDER BY id ASC
|
||||
LIMIT 100
|
||||
`, [BaseModel.TYPE_NOTE, lastChangeId]);
|
||||
|
||||
if (!changes.length) break;
|
||||
|
||||
const queries = [];
|
||||
|
||||
for (let i = 0; i < changes.length; i++) {
|
||||
const change = changes[i];
|
||||
|
||||
if (change.type === ItemChange.TYPE_CREATE || change.type === ItemChange.TYPE_UPDATE) {
|
||||
queries.push({ sql: 'DELETE FROM notes_normalized WHERE id = ?', params: [change.item_id] });
|
||||
queries.push({ sql: 'INSERT INTO notes_normalized(id, title, body) SELECT id, title, body FROM notes WHERE id = ? AND is_conflict = 0 AND encryption_applied = 0', params: [change.item_id] });
|
||||
} else if (change.type === ItemChange.TYPE_DELETE) {
|
||||
queries.push({ sql: 'DELETE FROM notes_normalized WHERE id = ?', params: [change.item_id] });
|
||||
} else {
|
||||
throw new Error('Invalid change type: ' + change.type);
|
||||
}
|
||||
|
||||
lastChangeId = change.id;
|
||||
}
|
||||
|
||||
await this.db().transactionExecBatch(queries);
|
||||
Setting.setValue('searchEngine.lastProcessedChangeId', lastChangeId);
|
||||
await Setting.saveAll();
|
||||
}
|
||||
|
||||
this.logger().info('SearchEngine: Updated FTS table in ' + (Date.now() - startTime) + 'ms');
|
||||
}
|
||||
|
||||
async countRows() {
|
||||
const sql = 'SELECT count(*) as total FROM notes_fts'
|
||||
const row = await this.db().selectOne(sql);
|
||||
|
@ -229,6 +229,8 @@ function pregQuote(str, delimiter = '') {
|
||||
}
|
||||
|
||||
function surroundKeywords(keywords, text, prefix, suffix) {
|
||||
if (!keywords.length) return text;
|
||||
|
||||
let regexString = keywords.map((k) => {
|
||||
if (k.type === 'regex') {
|
||||
return k.value;
|
||||
|
Loading…
Reference in New Issue
Block a user