1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-30 10:36:35 +02:00

Desktop: Fixes #3153: Make GotoAnyting work with East Asian charactors (#3180)

This commit is contained in:
叡山电车 2020-06-04 00:06:14 +08:00 committed by GitHub
parent 720494d870
commit 5082181c49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 8 deletions

View File

@ -257,22 +257,29 @@ describe('services_SearchEngine', function() {
it('should support queries with Chinese characters', asyncTest(async () => {
let rows;
const n1 = await Note.save({ title: '我是法国人' });
const n1 = await Note.save({ title: '我是法国人', body: '中文测试' });
await engine.syncTables();
expect((await engine.search('我')).length).toBe(1);
expect((await engine.search('法国人')).length).toBe(1);
expect((await engine.search('法国人*'))[0].fields.sort()).toEqual(['body', 'title']); // usually assume that keyword was matched in body
expect((await engine.search('测试')).length).toBe(1);
expect((await engine.search('测试'))[0].fields).toEqual(['body']);
expect((await engine.search('测试*'))[0].fields).toEqual(['body']);
}));
it('should support queries with Japanese characters', asyncTest(async () => {
let rows;
const n1 = await Note.save({ title: '私は日本語を話すことができません' });
const n1 = await Note.save({ title: '私は日本語を話すことができません', body: 'テスト' });
await engine.syncTables();
expect((await engine.search('日本')).length).toBe(1);
expect((await engine.search('できません')).length).toBe(1);
expect((await engine.search('できません*'))[0].fields.sort()).toEqual(['body', 'title']); // usually assume that keyword was matched in body
expect((await engine.search('テスト'))[0].fields.sort()).toEqual(['body']);
}));
it('should support queries with Korean characters', asyncTest(async () => {
@ -302,10 +309,15 @@ describe('services_SearchEngine', function() {
await engine.syncTables();
expect((await engine.search('title:你好*')).length).toBe(1);
expect((await engine.search('title:你好*'))[0].fields).toEqual(['title']);
expect((await engine.search('body:法国人')).length).toBe(1);
expect((await engine.search('body:法国人'))[0].fields).toEqual(['body']);
expect((await engine.search('body:你好')).length).toBe(0);
expect((await engine.search('title:你好 body:法国人')).length).toBe(1);
expect((await engine.search('title:你好 body:法国人'))[0].fields.sort()).toEqual(['body', 'title']);
expect((await engine.search('title:你好 body:bla')).length).toBe(0);
expect((await engine.search('title:你好 我是')).length).toBe(1);
expect((await engine.search('title:你好 我是'))[0].fields.sort()).toEqual(['body', 'title']);
expect((await engine.search('title:bla 我是')).length).toBe(0);
// For non-alpha char, only the first field is looked at, the following ones are ignored

View File

@ -235,13 +235,36 @@ class SearchEngine {
return occurenceCount / spread;
}
processResults_(rows, parsedQuery) {
processBasicSearchResults_(rows, parsedQuery) {
const valueRegexs = parsedQuery.keys.includes('_') ? parsedQuery.terms['_'].map(term => term.valueRegex || term.value) : [];
const isTitleSearch = parsedQuery.keys.includes('title');
const isOnlyTitle = parsedQuery.keys.length === 1 && isTitleSearch;
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
const offsets = row.offsets.split(' ').map(o => Number(o));
row.weight = this.calculateWeight_(offsets, parsedQuery.termCount);
row.fields = this.fieldNamesFromOffsets_(offsets);
const testTitle = regex => new RegExp(regex, 'ig').test(row.title);
const matchedFields = {
title: isTitleSearch || valueRegexs.some(testTitle),
body: !isOnlyTitle,
};
row.fields = Object.keys(matchedFields).filter(key => matchedFields[key]);
row.weight = 0;
}
}
processResults_(rows, parsedQuery, isBasicSearchResults = false) {
if (isBasicSearchResults) {
this.processBasicSearchResults_(rows, parsedQuery);
} else {
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
const offsets = row.offsets.split(' ').map(o => Number(o));
row.weight = this.calculateWeight_(offsets, parsedQuery.termCount);
row.fields = this.fieldNamesFromOffsets_(offsets);
}
}
rows.sort((a, b) => {
if (a.fields.includes('title') && !b.fields.includes('title')) return -1;
@ -383,6 +406,8 @@ class SearchEngine {
const searchOptions = {};
for (const key of parsedQuery.keys) {
if (parsedQuery.terms[key].length === 0) continue;
const term = parsedQuery.terms[key][0].value;
if (key === '_') searchOptions.anywherePattern = `*${term}*`;
if (key === 'title') searchOptions.titlePattern = `*${term}*`;
@ -415,10 +440,13 @@ class SearchEngine {
query = this.normalizeText_(query);
const searchType = this.determineSearchType_(query, options.searchType);
const parsedQuery = this.parseQuery(query);
if (searchType === SearchEngine.SEARCH_TYPE_BASIC) {
// Non-alphabetical languages aren't support by SQLite FTS (except with extensions which are not available in all platforms)
return this.basicSearch(query);
const rows = await this.basicSearch(query);
this.processResults_(rows, parsedQuery, true);
return rows;
} else { // SEARCH_TYPE_FTS
// FTS will ignore all special characters, like "-" in the index. So if
// we search for "this-phrase" it won't find it because it will only
@ -426,7 +454,6 @@ class SearchEngine {
// when searching.
// https://github.com/laurent22/joplin/issues/1075#issuecomment-459258856
query = query.replace(/-/g, ' ');
const parsedQuery = this.parseQuery(query);
const sql = `
SELECT
notes_fts.id,