1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-24 10:27:10 +02:00

All: Improved support for Japanese, Chinese, Korean search queries (also applies to Goto Anything)

This commit is contained in:
Laurent Cozic 2019-04-03 07:46:41 +01:00
parent 252d937405
commit 72b36522e8
4 changed files with 44 additions and 10 deletions

View File

@ -256,6 +256,23 @@ describe('services_SearchEngine', function() {
expect((await engine.search('말')).length).toBe(1);
}));
it('should support field restricted queries with Chinese characters', asyncTest(async () => {
let rows;
const n1 = await Note.save({ title: "你好", body: "我是法国人" });
await engine.syncTables();
expect((await engine.search('title:你好*')).length).toBe(1);
expect((await engine.search('body:你好')).length).toBe(0);
expect((await engine.search('title:你好 body:法国人')).length).toBe(1);
expect((await engine.search('title:你好 body:bla')).length).toBe(0);
expect((await engine.search('title:你好 我是')).length).toBe(1);
expect((await engine.search('title:bla 我是')).length).toBe(0);
// For non-alpha char, only the first field is looked at, the following ones are ignored
expect((await engine.search('title:你好 title:hello')).length).toBe(1);
}));
it('should parse normal query strings', asyncTest(async () => {
let rows;

View File

@ -358,6 +358,20 @@ class Note extends BaseItem {
return this.modelSelectOne('SELECT ' + this.previewFieldsSql(options.fields) + ' FROM notes WHERE is_conflict = 0 AND id = ?', [noteId]);
}
static async search(options = null) {
if (!options) options = {};
if (!options.conditions) options.conditions = [];
if (!options.conditionsParams) options.conditionsParams = [];
if (options.bodyPattern) {
const pattern = options.bodyPattern.replace(/\*/g, '%');
options.conditions.push('body LIKE ?');
options.conditionsParams.push(pattern);
}
return super.search(options);
}
static conflictedNotes() {
return this.modelSelectAll('SELECT * FROM notes WHERE is_conflict = 1');
}

View File

@ -344,17 +344,18 @@ class SearchEngine {
}
async basicSearch(query) {
let p = query.split(' ');
let temp = [];
for (let i = 0; i < p.length; i++) {
let t = p[i].trim();
if (!t) continue;
temp.push(t);
query = query.replace(/\*/, '');
const parsedQuery = this.parseQuery(query);
const searchOptions = {};
for (const key of parsedQuery.keys) {
const term = parsedQuery.terms[key][0].value;
if (key === '_') searchOptions.anywherePattern = '*' + term + '*';
if (key === 'title') searchOptions.titlePattern = '*' + term + '*';
if (key === 'body') searchOptions.bodyPattern = '*' + term + '*';
}
return await Note.previews(null, {
anywherePattern: '*' + temp.join('*') + '*',
});
return Note.previews(null, searchOptions);
}
async search(query) {

View File

@ -267,8 +267,10 @@ const REGEX_CHINESE = /[\u4e00-\u9fff]|[\u3400-\u4dbf]|[\u{20000}-\u{2a6df}]|[\u
const REGEX_KOREAN = /[\uac00-\ud7af]|[\u1100-\u11ff]|[\u3130-\u318f]|[\ua960-\ua97f]|[\ud7b0-\ud7ff]/;
function scriptType(s) {
if (REGEX_JAPANESE.test(s)) return 'ja';
// A string entirely with Chinese character will be detected as Japanese too
// so Chinese detection must go first.
if (REGEX_CHINESE.test(s)) return 'zh';
if (REGEX_JAPANESE.test(s)) return 'ja';
if (REGEX_KOREAN.test(s)) return 'ko';
return 'en';
}