2024-01-05 14:06:33 +00:00
|
|
|
import { setupDatabaseAndSynchronizer, db, sleep, switchClient, msleep } from '../../testing/test-utils';
|
2024-01-05 14:15:47 +00:00
|
|
|
import SearchEngine from './SearchEngine';
|
2024-01-05 14:06:33 +00:00
|
|
|
import Note from '../../models/Note';
|
|
|
|
import ItemChange from '../../models/ItemChange';
|
|
|
|
import Setting from '../../models/Setting';
|
|
|
|
|
|
|
|
let engine: SearchEngine = null;
|
|
|
|
|
|
|
|
// const IDF = (N:number, n:number) => Math.max(Math.log((N - n + 0.5) / (n + 0.5)), 0);
|
|
|
|
|
|
|
|
// const frequency = (word:string, string:string) => {
|
|
|
|
// const re = new RegExp(`\\b(${word})\\b`, 'g');
|
|
|
|
// return (string.match(re) || []).length;
|
|
|
|
// };
|
|
|
|
|
|
|
|
// const calculateScore = (searchString, notes) => {
|
|
|
|
// const K1 = 1.2;
|
|
|
|
// const B = 0.75;
|
|
|
|
|
|
|
|
// const freqTitle = notes.map(note => frequency(searchString, note.title));
|
|
|
|
// const notesWithWord = freqTitle.filter(count => count !== 0).length;
|
|
|
|
// const numTokens = notes.map(note => note.title.split(' ').length);
|
|
|
|
// const avgTokens = Math.round(numTokens.reduce((a, b) => a + b, 0) / notes.length);
|
|
|
|
|
|
|
|
// const msSinceEpoch = Math.round(new Date().getTime());
|
|
|
|
// const msPerDay = 86400000;
|
|
|
|
// const weightForDaysSinceLastUpdate = (row) => {
|
|
|
|
// // BM25 weights typically range 0-10, and last updated date should weight similarly, though prioritizing recency logarithmically.
|
|
|
|
// // An alpha of 200 ensures matches in the last week will show up front (11.59) and often so for matches within 2 weeks (5.99),
|
|
|
|
// // but is much less of a factor at 30 days (2.84) or very little after 90 days (0.95), focusing mostly on content at that point.
|
|
|
|
// if (!row.user_updated_time) {
|
|
|
|
// return 0;
|
|
|
|
// }
|
|
|
|
|
|
|
|
// const alpha = 200;
|
|
|
|
// const daysSinceLastUpdate = (msSinceEpoch - row.user_updated_time) / msPerDay;
|
|
|
|
// return alpha * Math.log(1 + 1 / Math.max(daysSinceLastUpdate, 0.5));
|
|
|
|
// };
|
|
|
|
|
|
|
|
// let titleBM25WeightedByLastUpdate = new Array(notes.length).fill(-1);
|
|
|
|
// if (avgTokens !== 0) {
|
|
|
|
// for (let i = 0; i < notes.length; i++) {
|
|
|
|
// titleBM25WeightedByLastUpdate[i] = IDF(notes.length, notesWithWord) * ((freqTitle[i] * (K1 + 1)) / (freqTitle[i] + K1 * (1 - B + B * (numTokens[i] / avgTokens))));
|
|
|
|
// titleBM25WeightedByLastUpdate[i] += weightForDaysSinceLastUpdate(notes[i]);
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
|
|
|
|
// const scores = [];
|
|
|
|
// for (let i = 0; i < notes.length; i++) {
|
|
|
|
// if (freqTitle[i]) scores.push(titleBM25WeightedByLastUpdate[i]);
|
|
|
|
// }
|
|
|
|
|
|
|
|
// scores.sort().reverse();
|
|
|
|
// return scores;
|
|
|
|
// };
|
|
|
|
|
|
|
|
describe('services/SearchEngine', () => {
|
2018-12-09 21:45:50 +01:00
|
|
|
|
2022-11-15 10:23:50 +00:00
|
|
|
beforeEach(async () => {
|
2018-12-09 21:45:50 +01:00
|
|
|
await setupDatabaseAndSynchronizer(1);
|
|
|
|
await switchClient(1);
|
2018-12-10 18:54:46 +00:00
|
|
|
|
2018-12-09 21:45:50 +01:00
|
|
|
engine = new SearchEngine();
|
|
|
|
engine.setDb(db());
|
|
|
|
});
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should keep the content and FTS table in sync', (async () => {
|
2024-01-05 14:06:33 +00:00
|
|
|
let rows;
|
2018-12-12 22:40:05 +01:00
|
|
|
|
2024-01-05 14:06:33 +00:00
|
|
|
const n1 = await Note.save({ title: 'a' });
|
|
|
|
const n2 = await Note.save({ title: 'b' });
|
2018-12-29 20:19:18 +01:00
|
|
|
await engine.syncTables();
|
2018-12-12 22:40:05 +01:00
|
|
|
rows = await engine.search('a');
|
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
expect(rows[0].title).toBe('a');
|
2018-12-10 18:54:46 +00:00
|
|
|
|
2018-12-12 22:40:05 +01:00
|
|
|
await Note.delete(n1.id);
|
2018-12-29 20:19:18 +01:00
|
|
|
await engine.syncTables();
|
2018-12-12 22:40:05 +01:00
|
|
|
rows = await engine.search('a');
|
2018-12-10 18:54:46 +00:00
|
|
|
expect(rows.length).toBe(0);
|
2018-12-12 22:40:05 +01:00
|
|
|
rows = await engine.search('b');
|
|
|
|
expect(rows[0].title).toBe('b');
|
2018-12-10 18:54:46 +00:00
|
|
|
|
2018-12-12 22:40:05 +01:00
|
|
|
await Note.save({ id: n2.id, title: 'c' });
|
2018-12-29 20:19:18 +01:00
|
|
|
await engine.syncTables();
|
2018-12-12 22:40:05 +01:00
|
|
|
rows = await engine.search('b');
|
|
|
|
expect(rows.length).toBe(0);
|
|
|
|
rows = await engine.search('c');
|
|
|
|
expect(rows[0].title).toBe('c');
|
2018-12-10 18:54:46 +00:00
|
|
|
|
2018-12-12 22:40:05 +01:00
|
|
|
await Note.save({ id: n2.id, encryption_applied: 1 });
|
2018-12-29 20:19:18 +01:00
|
|
|
await engine.syncTables();
|
2018-12-12 22:40:05 +01:00
|
|
|
rows = await engine.search('c');
|
|
|
|
expect(rows.length).toBe(0);
|
2018-12-10 18:54:46 +00:00
|
|
|
|
2018-12-12 22:40:05 +01:00
|
|
|
await Note.save({ id: n2.id, encryption_applied: 0 });
|
2018-12-29 20:19:18 +01:00
|
|
|
await engine.syncTables();
|
2018-12-12 22:40:05 +01:00
|
|
|
rows = await engine.search('c');
|
|
|
|
expect(rows.length).toBe(1);
|
2019-01-14 19:11:54 +00:00
|
|
|
}));
|
2018-12-10 18:54:46 +00:00
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should, after initial indexing, save the last change ID', (async () => {
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: 'abcd efgh' }); // 3
|
|
|
|
await Note.save({ title: 'abcd aaaaa abcd abcd' }); // 1
|
2019-01-14 19:11:54 +00:00
|
|
|
|
|
|
|
expect(Setting.value('searchEngine.initialIndexingDone')).toBe(false);
|
|
|
|
|
|
|
|
await ItemChange.waitForAllSaved();
|
|
|
|
const lastChangeId = await ItemChange.lastChangeId();
|
2018-12-09 21:45:50 +01:00
|
|
|
|
2019-01-14 19:11:54 +00:00
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
expect(Setting.value('searchEngine.lastProcessedChangeId')).toBe(lastChangeId);
|
|
|
|
expect(Setting.value('searchEngine.initialIndexingDone')).toBe(true);
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should order search results by relevance BM25', (async () => {
|
2020-08-19 04:23:28 +05:30
|
|
|
// BM25 is based on term frequency - inverse document frequency
|
|
|
|
// The tf–idf value increases proportionally to the number of times a word appears in the document
|
|
|
|
// and is offset by the number of documents in the corpus that contain the word, which helps to adjust
|
|
|
|
// for the fact that some words appear more frequently in general.
|
|
|
|
|
|
|
|
// BM25 returns weight zero for search term which occurs in more than half the notes.
|
|
|
|
// So terms that are abundant in all notes to have zero relevance w.r.t BM25.
|
|
|
|
|
2019-07-30 09:35:42 +02:00
|
|
|
const n1 = await Note.save({ title: 'abcd efgh' }); // 3
|
2020-08-19 04:23:28 +05:30
|
|
|
const n2 = await Note.save({ title: 'abcd efgh abcd abcd' }); // 1
|
2019-07-30 09:35:42 +02:00
|
|
|
const n3 = await Note.save({ title: 'abcd aaaaa bbbb eeee abcd' }); // 2
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: 'xyz xyz' });
|
|
|
|
await Note.save({ title: 'xyz xyz xyz xyz' });
|
|
|
|
await Note.save({ title: 'xyz xyz xyz xyz xyz xyz' });
|
|
|
|
await Note.save({ title: 'xyz xyz xyz xyz xyz xyz' });
|
|
|
|
await Note.save({ title: 'xyz xyz xyz xyz xyz xyz xyz xyz' });
|
2018-12-10 18:54:46 +00:00
|
|
|
|
2018-12-29 20:19:18 +01:00
|
|
|
await engine.syncTables();
|
2020-08-19 04:23:28 +05:30
|
|
|
let rows = await engine.search('abcd');
|
2018-12-10 18:54:46 +00:00
|
|
|
|
2018-12-12 22:40:05 +01:00
|
|
|
expect(rows[0].id).toBe(n2.id);
|
|
|
|
expect(rows[1].id).toBe(n3.id);
|
|
|
|
expect(rows[2].id).toBe(n1.id);
|
2020-08-19 04:23:28 +05:30
|
|
|
|
|
|
|
rows = await engine.search('abcd efgh');
|
|
|
|
expect(rows[0].id).toBe(n1.id); // shorter note; also 'efgh' is more rare than 'abcd'.
|
|
|
|
expect(rows[1].id).toBe(n2.id);
|
|
|
|
}));
|
|
|
|
|
2023-10-02 10:41:30 +01:00
|
|
|
it('should order search results by relevance BM25 - 2', async () => {
|
|
|
|
// This simple test case didn't even work before due to a bug in the IDF
|
|
|
|
// calculation, and would just order by timestamp.
|
|
|
|
const n1 = await Note.save({ title: 'abcd abcd' }); // 1
|
|
|
|
await msleep(1);
|
|
|
|
const n2 = await Note.save({ title: 'abcd' }); // 2
|
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
const rows = await engine.search('abcd');
|
|
|
|
|
|
|
|
expect(rows[0].id).toBe(n1.id);
|
|
|
|
expect(rows[1].id).toBe(n2.id);
|
|
|
|
});
|
|
|
|
|
2020-11-10 15:59:30 +00:00
|
|
|
// TODO: Need to update and replace jasmine.mockDate() calls with Jest
|
|
|
|
// equivalent
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
// it('should correctly weigh notes using BM25 and user_updated_time', (async () => {
|
2020-11-10 15:59:30 +00:00
|
|
|
// await mockDate(2020, 9, 30, 50);
|
|
|
|
// const noteData = [
|
|
|
|
// {
|
|
|
|
// title: 'abc test2 test2',
|
|
|
|
// updated_time: 1601425064756,
|
|
|
|
// user_updated_time: 1601425064756,
|
|
|
|
// created_time: 1601425064756,
|
|
|
|
// user_created_time: 1601425064756,
|
|
|
|
// },
|
|
|
|
// {
|
|
|
|
// title: 'foo foo',
|
|
|
|
// updated_time: 1601425064758,
|
|
|
|
// user_updated_time: 1601425064758,
|
|
|
|
// created_time: 1601425064758,
|
|
|
|
// user_created_time: 1601425064758,
|
|
|
|
// },
|
|
|
|
// {
|
|
|
|
// title: 'dead beef',
|
|
|
|
// updated_time: 1601425064760,
|
|
|
|
// user_updated_time: 1601425064760,
|
|
|
|
// created_time: 1601425064760,
|
|
|
|
// user_created_time: 1601425064760,
|
|
|
|
// },
|
|
|
|
// {
|
|
|
|
// title: 'test2 bar',
|
|
|
|
// updated_time: 1601425064761,
|
|
|
|
// user_updated_time: 1601425064761,
|
|
|
|
// created_time: 1601425064761,
|
|
|
|
// user_created_time: 1601425064761,
|
|
|
|
// },
|
|
|
|
// {
|
|
|
|
// title: 'blah blah abc',
|
|
|
|
// updated_time: 1601425064763,
|
|
|
|
// user_updated_time: 1601425064763,
|
|
|
|
// created_time: 1601425064763,
|
|
|
|
// user_created_time: 1601425064763,
|
|
|
|
// },
|
|
|
|
// ];
|
|
|
|
|
|
|
|
// const n0 = await Note.save(noteData[0], { autoTimestamp: false });
|
|
|
|
// const n1 = await Note.save(noteData[1], { autoTimestamp: false });
|
|
|
|
// const n2 = await Note.save(noteData[2], { autoTimestamp: false });
|
|
|
|
// const n3 = await Note.save(noteData[3], { autoTimestamp: false });
|
|
|
|
// const n4 = await Note.save(noteData[4], { autoTimestamp: false });
|
|
|
|
// restoreDate();
|
|
|
|
// await engine.syncTables();
|
|
|
|
// await mockDate(2020, 9, 30, 50);
|
|
|
|
|
|
|
|
// let searchString = 'abc';
|
|
|
|
// let scores = calculateScore(searchString, noteData);
|
|
|
|
// let rows = await engine.search(searchString);
|
|
|
|
|
|
|
|
// expect(rows[0].weight).toEqual(scores[0]);
|
|
|
|
// expect(rows[1].weight).toEqual(scores[1]);
|
|
|
|
|
|
|
|
// // console.log(rows);
|
|
|
|
// // console.log(scores);
|
|
|
|
|
|
|
|
// searchString = 'test2';
|
|
|
|
// scores = calculateScore(searchString, noteData);
|
|
|
|
// rows = await engine.search(searchString);
|
|
|
|
|
|
|
|
// // console.log(rows);
|
|
|
|
// // console.log(scores);
|
|
|
|
|
|
|
|
// expect(rows[0].weight).toEqual(scores[0]);
|
|
|
|
// expect(rows[1].weight).toEqual(scores[1]);
|
|
|
|
|
|
|
|
// searchString = 'foo';
|
|
|
|
// scores = calculateScore(searchString, noteData);
|
|
|
|
// rows = await engine.search(searchString);
|
|
|
|
|
|
|
|
// // console.log(rows);
|
|
|
|
// // console.log(scores);
|
|
|
|
|
|
|
|
// expect(rows[0].weight).toEqual(scores[0]);
|
|
|
|
// await restoreDate();
|
|
|
|
// }));
|
2018-12-10 18:54:46 +00:00
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should tell where the results are found', (async () => {
|
2020-04-13 22:10:59 +00:00
|
|
|
const notes = [
|
|
|
|
await Note.save({ title: 'abcd efgh', body: 'abcd' }),
|
|
|
|
await Note.save({ title: 'abcd' }),
|
|
|
|
await Note.save({ title: 'efgh', body: 'abcd' }),
|
|
|
|
];
|
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
2024-01-05 14:06:33 +00:00
|
|
|
const testCases: [string, string[], string[], string[]][] = [
|
2020-04-13 22:10:59 +00:00
|
|
|
['abcd', ['title', 'body'], ['title'], ['body']],
|
|
|
|
['efgh', ['title'], [], ['title']],
|
|
|
|
];
|
|
|
|
|
|
|
|
for (const testCase of testCases) {
|
|
|
|
const rows = await engine.search(testCase[0]);
|
|
|
|
|
|
|
|
for (let i = 0; i < notes.length; i++) {
|
2020-05-21 09:14:33 +01:00
|
|
|
const row = rows.find(row => row.id === notes[i].id);
|
2020-04-13 22:10:59 +00:00
|
|
|
const actual = row ? row.fields.sort().join(',') : '';
|
2024-01-05 14:06:33 +00:00
|
|
|
const expected = (testCase[i + 1] as string[]).sort().join(',');
|
2020-04-13 22:10:59 +00:00
|
|
|
expect(expected).toBe(actual);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}));
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should order search results by relevance (last updated first)', (async () => {
|
2019-02-24 12:00:06 +00:00
|
|
|
let rows;
|
|
|
|
|
2019-07-30 09:35:42 +02:00
|
|
|
const n1 = await Note.save({ title: 'abcd' });
|
2019-02-24 12:00:06 +00:00
|
|
|
await sleep(0.1);
|
2019-07-30 09:35:42 +02:00
|
|
|
const n2 = await Note.save({ title: 'abcd' });
|
2019-02-24 12:00:06 +00:00
|
|
|
await sleep(0.1);
|
2019-07-30 09:35:42 +02:00
|
|
|
const n3 = await Note.save({ title: 'abcd' });
|
2019-02-24 12:00:06 +00:00
|
|
|
await sleep(0.1);
|
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
rows = await engine.search('abcd');
|
|
|
|
|
|
|
|
expect(rows[0].id).toBe(n3.id);
|
|
|
|
expect(rows[1].id).toBe(n2.id);
|
|
|
|
expect(rows[2].id).toBe(n1.id);
|
|
|
|
|
2019-07-30 09:35:42 +02:00
|
|
|
await Note.save({ id: n1.id, title: 'abcd' });
|
2019-02-24 12:00:06 +00:00
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
rows = await engine.search('abcd');
|
|
|
|
expect(rows[0].id).toBe(n1.id);
|
|
|
|
expect(rows[1].id).toBe(n3.id);
|
|
|
|
expect(rows[2].id).toBe(n2.id);
|
|
|
|
}));
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should order search results by relevance (completed to-dos last)', (async () => {
|
2019-02-24 12:00:06 +00:00
|
|
|
let rows;
|
|
|
|
|
2019-07-30 09:35:42 +02:00
|
|
|
const n1 = await Note.save({ title: 'abcd', is_todo: 1 });
|
2019-02-24 12:00:06 +00:00
|
|
|
await sleep(0.1);
|
2019-07-30 09:35:42 +02:00
|
|
|
const n2 = await Note.save({ title: 'abcd', is_todo: 1 });
|
2019-02-24 12:00:06 +00:00
|
|
|
await sleep(0.1);
|
2019-07-30 09:35:42 +02:00
|
|
|
const n3 = await Note.save({ title: 'abcd', is_todo: 1 });
|
2019-02-24 12:00:06 +00:00
|
|
|
await sleep(0.1);
|
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
rows = await engine.search('abcd');
|
|
|
|
|
|
|
|
expect(rows[0].id).toBe(n3.id);
|
|
|
|
expect(rows[1].id).toBe(n2.id);
|
|
|
|
expect(rows[2].id).toBe(n1.id);
|
|
|
|
|
|
|
|
await Note.save({ id: n3.id, todo_completed: Date.now() });
|
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
rows = await engine.search('abcd');
|
|
|
|
expect(rows[0].id).toBe(n2.id);
|
|
|
|
expect(rows[1].id).toBe(n1.id);
|
|
|
|
expect(rows[2].id).toBe(n3.id);
|
|
|
|
}));
|
|
|
|
|
2024-01-25 09:03:53 -08:00
|
|
|
it('should support searching through documents that contain null characters', (async () => {
|
2024-02-06 08:24:00 -08:00
|
|
|
await Note.save({
|
|
|
|
title: 'Test',
|
|
|
|
body: `
|
|
|
|
NUL characters, "\x00", have been known to break FTS search.
|
|
|
|
Previously, all characters after a NUL (\x00) character in a note
|
|
|
|
would not show up in search results. NUL characters may have also
|
|
|
|
broken search for other notes.
|
|
|
|
|
|
|
|
In this note, "testing" only appears after the NUL characters.
|
|
|
|
`,
|
|
|
|
});
|
2024-01-25 09:03:53 -08:00
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
2024-02-06 08:24:00 -08:00
|
|
|
expect((await engine.search('previously')).length).toBe(1);
|
2024-01-25 09:03:53 -08:00
|
|
|
expect((await engine.search('testing')).length).toBe(1);
|
|
|
|
}));
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should supports various query types', (async () => {
|
2018-12-10 18:54:46 +00:00
|
|
|
let rows;
|
|
|
|
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: 'abcd efgh ijkl', body: 'aaaa bbbb' });
|
|
|
|
await Note.save({ title: 'iiii efgh bbbb', body: 'aaaa bbbb' });
|
|
|
|
await Note.save({ title: 'Агентство Рейтер' });
|
|
|
|
await Note.save({ title: 'Dog' });
|
|
|
|
await Note.save({ title: 'СООБЩИЛО' });
|
2018-12-29 20:19:18 +01:00
|
|
|
|
|
|
|
await engine.syncTables();
|
2018-12-10 18:54:46 +00:00
|
|
|
|
|
|
|
rows = await engine.search('abcd ijkl');
|
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
|
|
|
|
rows = await engine.search('"abcd ijkl"');
|
|
|
|
expect(rows.length).toBe(0);
|
|
|
|
|
|
|
|
rows = await engine.search('"abcd efgh"');
|
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
|
|
|
|
rows = await engine.search('title:abcd');
|
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
|
|
|
|
rows = await engine.search('title:efgh');
|
|
|
|
expect(rows.length).toBe(2);
|
|
|
|
|
|
|
|
rows = await engine.search('body:abcd');
|
|
|
|
expect(rows.length).toBe(0);
|
|
|
|
|
|
|
|
rows = await engine.search('body:bbbb');
|
|
|
|
expect(rows.length).toBe(2);
|
|
|
|
|
|
|
|
rows = await engine.search('body:bbbb iiii');
|
|
|
|
expect(rows.length).toBe(1);
|
2018-12-09 21:45:50 +01:00
|
|
|
|
2018-12-29 20:19:18 +01:00
|
|
|
rows = await engine.search('Рейтер');
|
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
|
2019-01-13 16:05:07 +00:00
|
|
|
rows = await engine.search('рейтер');
|
2018-12-29 20:19:18 +01:00
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
|
|
|
|
rows = await engine.search('Dog');
|
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
|
|
|
|
rows = await engine.search('dog');
|
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
|
|
|
|
rows = await engine.search('сообщило');
|
|
|
|
expect(rows.length).toBe(1);
|
2019-01-14 19:11:54 +00:00
|
|
|
}));
|
2018-12-29 20:19:18 +01:00
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should support queries with or without accents', (async () => {
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: 'père noël' });
|
2019-01-14 19:11:54 +00:00
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
expect((await engine.search('père')).length).toBe(1);
|
|
|
|
expect((await engine.search('pere')).length).toBe(1);
|
|
|
|
expect((await engine.search('noe*')).length).toBe(1);
|
|
|
|
expect((await engine.search('noë*')).length).toBe(1);
|
|
|
|
}));
|
2018-12-09 21:45:50 +01:00
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should support queries with Chinese characters', (async () => {
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: '我是法国人', body: '中文测试' });
|
2019-01-14 19:11:54 +00:00
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
expect((await engine.search('我')).length).toBe(1);
|
|
|
|
expect((await engine.search('法国人')).length).toBe(1);
|
2020-06-04 00:06:14 +08:00
|
|
|
expect((await engine.search('法国人*'))[0].fields.sort()).toEqual(['body', 'title']); // usually assume that keyword was matched in body
|
|
|
|
expect((await engine.search('测试')).length).toBe(1);
|
|
|
|
expect((await engine.search('测试'))[0].fields).toEqual(['body']);
|
|
|
|
expect((await engine.search('测试*'))[0].fields).toEqual(['body']);
|
2021-06-07 16:15:04 +02:00
|
|
|
expect((await engine.search('any:1 type:todo 测试')).length).toBe(1);
|
2019-01-14 19:11:54 +00:00
|
|
|
}));
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should support queries with Japanese characters', (async () => {
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: '私は日本語を話すことができません', body: 'テスト' });
|
2019-01-14 19:11:54 +00:00
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
expect((await engine.search('日本')).length).toBe(1);
|
|
|
|
expect((await engine.search('できません')).length).toBe(1);
|
2020-06-04 00:06:14 +08:00
|
|
|
expect((await engine.search('できません*'))[0].fields.sort()).toEqual(['body', 'title']); // usually assume that keyword was matched in body
|
|
|
|
expect((await engine.search('テスト'))[0].fields.sort()).toEqual(['body']);
|
2021-06-07 16:15:04 +02:00
|
|
|
expect((await engine.search('any:1 type:todo テスト')).length).toBe(1);
|
2019-01-14 19:11:54 +00:00
|
|
|
}));
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should support queries with Korean characters', (async () => {
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: '이것은 한국말이다' });
|
2019-01-14 19:11:54 +00:00
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
expect((await engine.search('이것은')).length).toBe(1);
|
|
|
|
expect((await engine.search('말')).length).toBe(1);
|
2021-06-07 16:15:04 +02:00
|
|
|
expect((await engine.search('any:1 type:todo 말')).length).toBe(1);
|
2019-01-14 19:11:54 +00:00
|
|
|
}));
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should support queries with Thai characters', (async () => {
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: 'นี่คือคนไทย' });
|
2020-01-29 04:50:52 -08:00
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
expect((await engine.search('นี่คือค')).length).toBe(1);
|
|
|
|
expect((await engine.search('ไทย')).length).toBe(1);
|
2021-06-07 16:15:04 +02:00
|
|
|
expect((await engine.search('any:1 type:todo ไทย')).length).toBe(1);
|
2019-04-03 07:46:41 +01:00
|
|
|
}));
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should parse normal query strings', (async () => {
|
2024-04-05 12:16:49 +01:00
|
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
2024-01-05 14:06:33 +00:00
|
|
|
const testCases: [string, any][] = [
|
2018-12-12 22:40:05 +01:00
|
|
|
['abcd efgh', { _: ['abcd', 'efgh'] }],
|
|
|
|
['abcd efgh', { _: ['abcd', 'efgh'] }],
|
|
|
|
['title:abcd efgh', { _: ['efgh'], title: ['abcd'] }],
|
|
|
|
['title:abcd', { title: ['abcd'] }],
|
|
|
|
['"abcd efgh"', { _: ['abcd efgh'] }],
|
2023-04-23 10:03:26 +01:00
|
|
|
['"abcd efgh" ijkl', { _: ['abcd efgh', 'ijkl'] }],
|
2019-04-01 19:43:13 +00:00
|
|
|
['title:abcd title:efgh', { title: ['abcd', 'efgh'] }],
|
2018-12-12 22:40:05 +01:00
|
|
|
];
|
|
|
|
|
|
|
|
for (let i = 0; i < testCases.length; i++) {
|
|
|
|
const t = testCases[i];
|
|
|
|
const input = t[0];
|
|
|
|
const expected = t[1];
|
2020-09-06 17:37:00 +05:30
|
|
|
const actual = await engine.parseQuery(input);
|
2018-12-12 22:40:05 +01:00
|
|
|
|
2024-04-05 12:16:49 +01:00
|
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
2024-01-05 14:06:33 +00:00
|
|
|
const _Values = actual.terms._ ? actual.terms._.map((v: any) => v.value) : undefined;
|
2024-04-05 12:16:49 +01:00
|
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
2024-01-05 14:06:33 +00:00
|
|
|
const titleValues = actual.terms.title ? actual.terms.title.map((v: any) => v.value) : undefined;
|
2024-04-05 12:16:49 +01:00
|
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
|
2024-01-05 14:06:33 +00:00
|
|
|
const bodyValues = actual.terms.body ? actual.terms.body.map((v: any) => v.value) : undefined;
|
2018-12-12 22:40:05 +01:00
|
|
|
|
2024-01-05 14:06:33 +00:00
|
|
|
expect(JSON.stringify(_Values)).toBe(JSON.stringify(expected._));
|
|
|
|
expect(JSON.stringify(titleValues)).toBe(JSON.stringify(expected.title));
|
|
|
|
expect(JSON.stringify(bodyValues)).toBe(JSON.stringify(expected.body));
|
2018-12-12 22:40:05 +01:00
|
|
|
}
|
2019-01-14 19:11:54 +00:00
|
|
|
}));
|
2018-12-12 22:40:05 +01:00
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should handle queries with special characters', (async () => {
|
2019-01-31 08:35:41 +00:00
|
|
|
let rows;
|
|
|
|
|
|
|
|
const testCases = [
|
2020-04-18 12:45:54 +01:00
|
|
|
// "-" is considered a word delimiter so it is stripped off
|
|
|
|
// when indexing the notes. "did-not-match" is translated to
|
|
|
|
// three word "did", "not", "match"
|
|
|
|
['did-not-match', 'did not match'],
|
|
|
|
['did-not-match', '"did-not-match"'],
|
2019-01-31 08:35:41 +00:00
|
|
|
['does match', 'does match'],
|
|
|
|
];
|
|
|
|
|
|
|
|
for (let i = 0; i < testCases.length; i++) {
|
|
|
|
const t = testCases[i];
|
|
|
|
const content = t[0];
|
|
|
|
const query = t[1];
|
|
|
|
|
|
|
|
const n = await Note.save({ title: content });
|
|
|
|
await engine.syncTables();
|
|
|
|
rows = await engine.search(query);
|
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
|
2020-04-18 12:45:54 +01:00
|
|
|
|
2019-01-31 08:35:41 +00:00
|
|
|
await Note.delete(n.id);
|
|
|
|
}
|
|
|
|
}));
|
|
|
|
|
2020-12-01 18:05:24 +00:00
|
|
|
it('should allow using basic search', (async () => {
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: '- [ ] abcd' });
|
|
|
|
await Note.save({ title: '[ ] abcd' });
|
2020-04-18 12:45:54 +01:00
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
expect((await engine.search('"- [ ]"', { searchType: SearchEngine.SEARCH_TYPE_FTS })).length).toBe(0);
|
|
|
|
expect((await engine.search('"- [ ]"', { searchType: SearchEngine.SEARCH_TYPE_BASIC })).length).toBe(1);
|
|
|
|
expect((await engine.search('"[ ]"', { searchType: SearchEngine.SEARCH_TYPE_BASIC })).length).toBe(2);
|
|
|
|
}));
|
2020-12-09 13:02:54 +00:00
|
|
|
|
|
|
|
it('should not mistake cyrillic "l" for latin "n"', (async () => {
|
|
|
|
const n1 = await Note.save({ title: 'latin n', body: 'n' });
|
|
|
|
const n2 = await Note.save({ title: 'cyrillic l', body: 'л' });
|
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
expect((await engine.search('n')).length).toBe(1);
|
|
|
|
expect((await engine.search('n'))[0].id).toBe(n1.id);
|
|
|
|
|
|
|
|
expect((await engine.search('л')).length).toBe(1);
|
|
|
|
expect((await engine.search('л'))[0].id).toBe(n2.id);
|
|
|
|
}));
|
2023-09-24 23:22:36 +01:00
|
|
|
|
|
|
|
it('should automatically add wildcards', (async () => {
|
2024-01-05 14:06:33 +00:00
|
|
|
await Note.save({ title: 'hello1' });
|
|
|
|
await Note.save({ title: 'hello2' });
|
2023-09-24 23:22:36 +01:00
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
expect((await engine.search('hello')).length).toBe(0);
|
|
|
|
expect((await engine.search('hello', { appendWildCards: true })).length).toBe(2);
|
|
|
|
}));
|
2023-10-25 14:41:05 +01:00
|
|
|
|
2024-02-08 17:58:07 +00:00
|
|
|
it('should search HTML-entity encoded text', (async () => {
|
|
|
|
await Note.save({ title: 'éçà' }); // éçà
|
|
|
|
|
|
|
|
await engine.syncTables();
|
|
|
|
|
|
|
|
const rows = await engine.search('éçà');
|
|
|
|
expect(rows.length).toBe(1);
|
|
|
|
}));
|
|
|
|
|
2024-01-26 20:15:00 +00:00
|
|
|
// Disabled for now:
|
|
|
|
// https://github.com/laurent22/joplin/issues/9769#issuecomment-1912459744
|
2023-10-25 14:41:05 +01:00
|
|
|
|
2024-01-26 20:15:00 +00:00
|
|
|
// it('should search by item ID if no other result was found', (async () => {
|
|
|
|
// const f1 = await Folder.save({});
|
|
|
|
// const n1 = await Note.save({ title: 'hello1', parent_id: f1.id });
|
|
|
|
// const n2 = await Note.save({ title: 'hello2' });
|
2023-10-25 14:41:05 +01:00
|
|
|
|
2024-01-26 20:15:00 +00:00
|
|
|
// await engine.syncTables();
|
2023-10-25 14:41:05 +01:00
|
|
|
|
2024-01-26 20:15:00 +00:00
|
|
|
// const results = await engine.search(n1.id);
|
|
|
|
// expect(results.length).toBe(1);
|
|
|
|
// expect(results[0].id).toBe(n1.id);
|
|
|
|
// expect(results[0].title).toBe(n1.title);
|
|
|
|
// expect(results[0].parent_id).toBe(n1.parent_id);
|
|
|
|
|
|
|
|
// expect((await engine.search(n2.id))[0].id).toBe(n2.id);
|
|
|
|
// expect(await engine.search(f1.id)).toEqual([]);
|
|
|
|
// }));
|
2024-01-05 14:43:24 +00:00
|
|
|
|
2019-07-30 09:35:42 +02:00
|
|
|
});
|