1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-08-13 22:12:50 +02:00

More search engine improvements

This commit is contained in:
Laurent Cozic
2019-01-15 18:10:22 +00:00
parent 2e12b2655b
commit a7896b43d7
5 changed files with 76 additions and 5 deletions

View File

@@ -200,6 +200,7 @@ class Application extends BaseApplication {
if (["NOTE_UPDATE_ONE", "NOTE_DELETE", "FOLDER_UPDATE_ONE", "FOLDER_DELETE"].indexOf(action.type) >= 0) {
if (!await reg.syncTarget().syncStarted()) reg.scheduleSync(30 * 1000, { syncSteps: ["update_remote", "delete_remote"] });
SearchEngine.instance().scheduleSyncTables();
}
if (['EVENT_NOTE_ALARM_FIELD_CHANGE', 'NOTE_DELETE'].indexOf(action.type) >= 0) {

View File

@@ -14,6 +14,7 @@ const InteropServiceHelper = require('../InteropServiceHelper.js');
const Search = require('lib/models/Search');
const Mark = require('mark.js/dist/mark.min.js');
const SearchEngine = require('lib/services/SearchEngine');
const { replaceRegexDiacritics } = require('lib/string-utils');
class NoteListComponent extends React.Component {
@@ -279,7 +280,7 @@ class NoteListComponent extends React.Component {
const w = highlightedWords[i];
if (w.type === 'regex') {
mark.markRegExp(new RegExp('\\b' + w.value + '\\b', 'gmi'), {
mark.markRegExp(new RegExp('\\b' + replaceRegexDiacritics(w.value) + '\\b', 'gmi'), {
acrossElements: true,
});
} else {

View File

@@ -227,6 +227,10 @@
if (!options) options = {};
// TODO: It should highlight queries without accents - eg "penche*" should highlight "penchés"
// TODO: It should highlight Chinese, Japanese characters, etc.
// TODO: It should highlight Russian
// TODO: not working - "oue*" doesn't highlight "ouéé"
// TODO: Search engine support to mobile app (sync tables)
if (!mark_) {
mark_ = new Mark(document.getElementById('content'), {

View File

@@ -14,6 +14,7 @@ class SearchEngine {
this.dispatch = (action) => {};
this.logger_ = new Logger();
this.db_ = null;
this.isIndexing_ = false;
}
static instance() {
@@ -51,7 +52,7 @@ class SearchEngine {
}
async rebuildIndex() {
async rebuildIndex_() {
let noteIds = await this.db().selectAll('SELECT id FROM notes WHERE is_conflict = 0 AND encryption_applied = 0');
noteIds = noteIds.map(n => n.id);
@@ -77,14 +78,28 @@ class SearchEngine {
Setting.setValue('searchEngine.lastProcessedChangeId', lastChangeId);
}
scheduleSyncTables() {
if (this.scheduleSyncTablesIID_) return;
this.scheduleSyncTablesIID_ = setTimeout(async () => {
await this.syncTables();
this.scheduleSyncTablesIID_ = null;
}, 10000);
}
async syncTables() {
if (this.isIndexing_) return;
this.isIndexing_ = true;
this.logger().info('SearchEngine: Updating FTS table...');
await ItemChange.waitForAllSaved();
if (!Setting.value('searchEngine.initialIndexingDone')) {
await this.rebuildIndex();
Setting.setValue('searchEngine.initialIndexingDone', true)
await this.rebuildIndex_();
Setting.setValue('searchEngine.initialIndexingDone', true);
this.isIndexing_ = false;
return;
}
@@ -135,6 +150,8 @@ class SearchEngine {
await ItemChangeUtils.deleteProcessedChanges();
this.logger().info('SearchEngine: Updated FTS table in ' + (Date.now() - startTime) + 'ms');
this.isIndexing_ = false;
}
async countRows() {

View File

@@ -242,6 +242,54 @@ function surroundKeywords(keywords, text, prefix, suffix) {
return text.replace(re, prefix + '$1' + suffix);
}
function replaceRegexDiacritics(regexString) {
if (!regexString) return '';
const diacriticReplacements = {
'a': '[aàáâãäåāą]',
'A': '[AÀÁÂÃÄÅĀĄ]',
'c': '[cçćč]',
'C': '[CÇĆČ]',
'd': '[dđď]',
'D': '[DĐĎ]',
'e': '[eèéêëěēę]',
'E': '[EÈÉÊËĚĒĘ]',
'i': '[iìíîïī]',
'I': '[IÌÍÎÏĪ]',
'l': '[lł]',
'L': '[LŁ]',
'n': '[nñňń]',
'N': '[NÑŇŃ]',
'o': '[oòóôõöøō]',
'O': '[OÒÓÔÕÖØŌ]',
'r': '[rř]',
'R': '[RŘ]',
's': '[sšś]',
'S': '[SŠŚ]',
't': '[tť]',
'T': '[TŤ]',
'u': '[uùúûüůū]',
'U': '[UÙÚÛÜŮŪ]',
'y': '[yÿý]',
'Y': '[YŸÝ]',
'z': '[zžżź]',
'Z': '[ZŽŻŹ]',
};
let output = '';
for (let i = 0; i < regexString.length; i++) {
let c = regexString[i];
const r = diacriticReplacements[c];
if (r) {
output += r;
} else {
output += c;
}
}
return output;
}
const REGEX_JAPANESE = /[\u3000-\u303f]|[\u3040-\u309f]|[\u30a0-\u30ff]|[\uff00-\uff9f]|[\u4e00-\u9faf]|[\u3400-\u4dbf]/;
const REGEX_CHINESE = /[\u4e00-\u9fff]|[\u3400-\u4dbf]|[\u{20000}-\u{2a6df}]|[\u{2a700}-\u{2b73f}]|[\u{2b740}-\u{2b81f}]|[\u{2b820}-\u{2ceaf}]|[\uf900-\ufaff]|[\u3300-\u33ff]|[\ufe30-\ufe4f]|[\uf900-\ufaff]|[\u{2f800}-\u{2fa1f}]/u;
const REGEX_KOREAN = /[\uac00-\ud7af]|[\u1100-\u11ff]|[\u3130-\u318f]|[\ua960-\ua97f]|[\ud7b0-\ud7ff]/;
@@ -253,4 +301,4 @@ function scriptType(s) {
return 'en';
}
module.exports = { removeDiacritics, escapeFilename, wrap, splitCommandString, padLeft, toTitleCase, urlDecode, escapeHtml, pregQuote, surroundKeywords, scriptType };
module.exports = { removeDiacritics, escapeFilename, wrap, splitCommandString, padLeft, toTitleCase, urlDecode, escapeHtml, pregQuote, surroundKeywords, scriptType, replaceRegexDiacritics };