From 5d2baa872e7325c8aa5d69d5fdde5e12196fc7ff Mon Sep 17 00:00:00 2001 From: Naveen M V <30305957+naviji@users.noreply.github.com> Date: Fri, 11 Sep 2020 21:52:32 +0000 Subject: [PATCH] Desktop: Fix wildcard search (#3713) --- CliClient/tests/filterParser.js | 6 ++- CliClient/tests/services_SearchFuzzy.js | 18 ++++++++ ElectronClient/plugins/GotoAnything.jsx | 9 ++-- .../lib/services/searchengine/SearchEngine.js | 44 +++++++++++++------ .../lib/services/searchengine/filterParser.ts | 19 ++++++-- 5 files changed, 74 insertions(+), 22 deletions(-) diff --git a/CliClient/tests/filterParser.js b/CliClient/tests/filterParser.js index 2ca86b771..bbc95fa76 100644 --- a/CliClient/tests/filterParser.js +++ b/CliClient/tests/filterParser.js @@ -4,8 +4,10 @@ require('app-module-path').addPath(__dirname); const filterParser = require('lib/services/searchengine/filterParser.js').default; // import filterParser from 'lib/services/searchengine/filterParser.js'; -const makeTerm = (name, value, negated, quoted = false) => { - if (name !== 'text') { return { name, value, negated }; } else { return { name, value, negated, quoted }; } +const makeTerm = (name, value, negated, quoted = false, wildcard = false) => { + if (name === 'text') { return { name, value, negated, quoted, wildcard }; } + if (name === 'title' | name === 'body') { return { name, value, negated, wildcard }; } + return { name, value, negated }; }; describe('filterParser should be correct filter for keyword', () => { diff --git a/CliClient/tests/services_SearchFuzzy.js b/CliClient/tests/services_SearchFuzzy.js index 060a72a78..48c78a35d 100644 --- a/CliClient/tests/services_SearchFuzzy.js +++ b/CliClient/tests/services_SearchFuzzy.js @@ -141,5 +141,23 @@ describe('services_SearchFuzzy', function() { expect(rows.map(r=>r.id)).toContain(n5.id); })); + it('should leave wild card searches alone', asyncTest(async () => { + let rows; + const n1 = await Note.save({ title: 'abc def' }); + const n2 = await Note.save({ title: 'abcc ghi' }); + const n3 = await Note.save({ title: 'abccc ghi' }); + const n4 = await Note.save({ title: 'abcccc ghi' }); + const n5 = await Note.save({ title: 'wxy zzz' }); + + await engine.syncTables(); + + rows = await engine.search('abc*', { fuzzy: true }); + + expect(rows.length).toBe(4); + expect(rows.map(r=>r.id)).toContain(n1.id); + expect(rows.map(r=>r.id)).toContain(n2.id); + expect(rows.map(r=>r.id)).toContain(n3.id); + expect(rows.map(r=>r.id)).toContain(n4.id); + })); }); diff --git a/ElectronClient/plugins/GotoAnything.jsx b/ElectronClient/plugins/GotoAnything.jsx index c26d34914..a2945593a 100644 --- a/ElectronClient/plugins/GotoAnything.jsx +++ b/ElectronClient/plugins/GotoAnything.jsx @@ -177,8 +177,9 @@ class Dialog extends React.PureComponent { return output.join(' '); } - keywords() { - return this.props.highlightedWords; + async keywords(searchQuery) { + const parsedQuery = await SearchEngine.instance().parseQuery(searchQuery, false); + return SearchEngine.instance().allParsedQueryTerms(parsedQuery); } markupToHtml() { @@ -226,7 +227,7 @@ class Dialog extends React.PureComponent { } } else { const limit = 20; - const searchKeywords = this.keywords(); + const searchKeywords = await this.keywords(searchQuery); const notes = await Note.byIds(results.map(result => result.id).slice(0, limit), { fields: ['id', 'body', 'markup_language', 'is_todo', 'todo_completed'] }); const notesById = notes.reduce((obj, { id, body, markup_language }) => ((obj[[id]] = { id, body, markup_language }), obj), {}); @@ -282,7 +283,7 @@ class Dialog extends React.PureComponent { this.setState({ listType: listType, results: results, - keywords: this.keywords(), + keywords: await this.keywords(searchQuery), selectedItemId: results.length === 0 ? null : results[0].id, resultsInBody: resultsInBody, }); diff --git a/ReactNativeClient/lib/services/searchengine/SearchEngine.js b/ReactNativeClient/lib/services/searchengine/SearchEngine.js index 43a93aa62..32ae8b80f 100644 --- a/ReactNativeClient/lib/services/searchengine/SearchEngine.js +++ b/ReactNativeClient/lib/services/searchengine/SearchEngine.js @@ -434,7 +434,6 @@ class SearchEngine { } async parseQuery(query, fuzzy = false) { - // fuzzy = false; const trimQuotes = (str) => str.startsWith('"') ? str.substr(1, str.length - 2) : str; let allTerms = []; @@ -453,18 +452,22 @@ class SearchEngine { const fuzzyScore = []; let numFuzzyMatches = []; let terms = null; - if (fuzzy) { - const fuzzyText = await this.fuzzifier(textTerms.filter(x => !x.quoted).map(x => trimQuotes(x.value))); - const fuzzyTitle = await this.fuzzifier(titleTerms.map(x => trimQuotes(x.value))); - const fuzzyBody = await this.fuzzifier(bodyTerms.map(x => trimQuotes(x.value))); - const phraseSearches = textTerms.filter(x => x.quoted).map(x => x.value); - // Save number of matches we got for each word - // fuzzifier() is currently set to return at most 3 matches) + if (fuzzy) { + const fuzzyText = await this.fuzzifier(textTerms.filter(x => !(x.quoted || x.wildcard)).map(x => trimQuotes(x.value))); + const fuzzyTitle = await this.fuzzifier(titleTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value))); + const fuzzyBody = await this.fuzzifier(bodyTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value))); + + const phraseTextSearch = textTerms.filter(x => x.quoted); + const wildCardSearch = textTerms.concat(titleTerms).concat(bodyTerms).filter(x => x.wildcard); + + // Save number of fuzzy matches we got for each word + // fuzzifier() is currently set to return at most 3 matches // We need to know which fuzzy words go together so that we can filter out notes that don't contain a required word. numFuzzyMatches = fuzzyText.concat(fuzzyTitle).concat(fuzzyBody).map(x => x.length); - for (let i = 0; i < phraseSearches.length; i++) { - numFuzzyMatches.push(1); // Phrase searches are preserved without fuzzification + for (let i = 0; i < phraseTextSearch.length + wildCardSearch.length; i++) { + // Phrase searches and wildcard searches are preserved without fuzzification (A single match) + numFuzzyMatches.push(1); } const mergedFuzzyText = [].concat.apply([], fuzzyText); @@ -474,18 +477,33 @@ class SearchEngine { const fuzzyTextTerms = mergedFuzzyText.map(x => { return { name: 'text', value: x.word, negated: false, score: x.score }; }); const fuzzyTitleTerms = mergedFuzzyTitle.map(x => { return { name: 'title', value: x.word, negated: false, score: x.score }; }); const fuzzyBodyTerms = mergedFuzzyBody.map(x => { return { name: 'body', value: x.word, negated: false, score: x.score }; }); - const phraseTextTerms = phraseSearches.map(x => { return { name: 'text', value: x, negated: false, score: 0 }; }); + // Remove previous text, title and body and replace with fuzzy versions allTerms = allTerms.filter(x => (x.name !== 'text' && x.name !== 'title' && x.name !== 'body')); - allFuzzyTerms = allTerms.concat(fuzzyTextTerms).concat(fuzzyTitleTerms).concat(fuzzyBodyTerms).concat(phraseTextTerms); + // The order matters here! + // The text goes first, then title, then body, then phrase and finally wildcard + // This is because it needs to match with numFuzzyMathches. + allFuzzyTerms = allTerms.concat(fuzzyTextTerms).concat(fuzzyTitleTerms).concat(fuzzyBodyTerms).concat(phraseTextSearch).concat(wildCardSearch); const allTextTerms = allFuzzyTerms.filter(x => x.name === 'title' || x.name === 'body' || x.name === 'text'); for (let i = 0; i < allTextTerms.length; i++) { + // Phrase searches and wildcard searches will get a fuzziness score of zero. + // This means that they will go first in the sort order (Even if there are other words with matches in the title) + // Undesirable? fuzzyScore.push(allFuzzyTerms[i].score ? allFuzzyTerms[i].score : 0); } - terms = { _: fuzzyTextTerms.concat(phraseTextTerms).map(x =>trimQuotes(x.value)), 'title': fuzzyTitleTerms.map(x =>trimQuotes(x.value)), 'body': fuzzyBodyTerms.map(x =>trimQuotes(x.value)) }; + const wildCardTextTerms = wildCardSearch.filter(x => x.name === 'text').map(x =>trimQuotes(x.value)); + const wildCardTitleTerms = wildCardSearch.filter(x => x.name === 'title').map(x =>trimQuotes(x.value)); + const wildCardBodyTerms = wildCardSearch.filter(x => x.name === 'body').map(x =>trimQuotes(x.value)); + const phraseTextTerms = phraseTextSearch.map(x => trimQuotes(x.value)); + + terms = { + _: fuzzyTextTerms.map(x => trimQuotes(x.value)).concat(phraseTextTerms).concat(wildCardTextTerms), + title: fuzzyTitleTerms.map(x => trimQuotes(x.value)).concat(wildCardTitleTerms), + body: fuzzyBodyTerms.map(x => trimQuotes(x.value)).concat(wildCardBodyTerms), + }; } else { const nonNegatedTextTerms = textTerms.length + titleTerms.length + bodyTerms.length; for (let i = 0; i < nonNegatedTextTerms; i++) { diff --git a/ReactNativeClient/lib/services/searchengine/filterParser.ts b/ReactNativeClient/lib/services/searchengine/filterParser.ts index eb04735f0..960c603e3 100644 --- a/ReactNativeClient/lib/services/searchengine/filterParser.ts +++ b/ReactNativeClient/lib/services/searchengine/filterParser.ts @@ -4,6 +4,7 @@ interface Term { value: string negated: boolean quoted?: boolean + wildcard?: boolean } const makeTerm = (name: string, value: string): Term => { @@ -88,7 +89,7 @@ const parseQuery = (query: string): Term[] => { // eg. Split title:"hello world" to title:hello title:world const values = trimQuotes(value).split(/[\s-_]+/); values.forEach(value => { - result.push({ name, value, negated }); + result.push({ name, value, negated, wildcard: value.indexOf('*') >= 0 }); }); } else { result.push({ name, value, negated }); @@ -97,9 +98,21 @@ const parseQuery = (query: string): Term[] => { // Every word is quoted if not already. // By quoting the word, FTS match query will take care of removing dashes and other word seperators. if (value.startsWith('-')) { - result.push({ name: 'text', value: quote(value.slice(1)) , negated: true, quoted: quoted(value) }); + result.push({ + name: 'text', + value: quote(value.slice(1)), + negated: true, + quoted: quoted(value), + wildcard: value.indexOf('*') >= 0, + }); } else { - result.push({ name: 'text', value: quote(value), negated: false, quoted: quoted(value) }); + result.push({ + name: 'text', + value: quote(value), + negated: false, + quoted: quoted(value), + wildcard: value.indexOf('*') >= 0, + }); } } }