1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-24 10:27:10 +02:00

All: Resolves #2279: Handle Thai language in search (#2387)

* Update SearchEngine.js

use basicSearch if thai string

* Update string-utils.js

added thai regex support

* Update services_SearchEngine.js

added thai language test

* Update services_SearchEngine.js

remove trailing spaces
This commit is contained in:
Kirtan Purohit 2020-01-29 04:50:52 -08:00 committed by GitHub
parent dcbd8aed30
commit fcda843778
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 1 deletions

View File

@ -258,6 +258,16 @@ describe('services_SearchEngine', function() {
expect((await engine.search('말')).length).toBe(1);
}));
it('should support queries with Thai characters', asyncTest(async () => {
let rows;
const n1 = await Note.save({ title: 'นี่คือคนไทย' });
await engine.syncTables();
expect((await engine.search('นี่คือค')).length).toBe(1);
expect((await engine.search('ไทย')).length).toBe(1);
}));
it('should support field restricted queries with Chinese characters', asyncTest(async () => {
let rows;
const n1 = await Note.save({ title: '你好', body: '我是法国人' });

View File

@ -386,7 +386,7 @@ class SearchEngine {
const st = scriptType(query);
if (!Setting.value('db.ftsEnabled') || ['ja', 'zh', 'ko'].indexOf(st) >= 0) {
if (!Setting.value('db.ftsEnabled') || ['ja', 'zh', 'ko', 'th'].indexOf(st) >= 0) {
// Non-alphabetical languages aren't support by SQLite FTS (except with extensions which are not available in all platforms)
return this.basicSearch(query);
} else {

View File

@ -267,6 +267,7 @@ function substrWithEllipsis(s, start, length) {
const REGEX_JAPANESE = /[\u3000-\u303f]|[\u3040-\u309f]|[\u30a0-\u30ff]|[\uff00-\uff9f]|[\u4e00-\u9faf]|[\u3400-\u4dbf]/;
const REGEX_CHINESE = /[\u4e00-\u9fff]|[\u3400-\u4dbf]|[\u{20000}-\u{2a6df}]|[\u{2a700}-\u{2b73f}]|[\u{2b740}-\u{2b81f}]|[\u{2b820}-\u{2ceaf}]|[\uf900-\ufaff]|[\u3300-\u33ff]|[\ufe30-\ufe4f]|[\uf900-\ufaff]|[\u{2f800}-\u{2fa1f}]/u;
const REGEX_KOREAN = /[\uac00-\ud7af]|[\u1100-\u11ff]|[\u3130-\u318f]|[\ua960-\ua97f]|[\ud7b0-\ud7ff]/;
const REGEX_THAI = /[\u0e00-\u0e7f]/;
function scriptType(s) {
// A string entirely with Chinese character will be detected as Japanese too
@ -274,6 +275,7 @@ function scriptType(s) {
if (REGEX_CHINESE.test(s)) return 'zh';
if (REGEX_JAPANESE.test(s)) return 'ja';
if (REGEX_KOREAN.test(s)) return 'ko';
if (REGEX_THAI.test(s)) return 'th';
return 'en';
}