All: Search: More multi-language support

2024-12-24 10:27:10 +02:00 · 2019-01-18 17:56:56 +00:00 · 2019-01-18 17:56:56 +00:00 · 8fdc0bf17c
commit 8fdc0bf17c
parent 96cd56548e
12 changed files with 123 additions and 57 deletions
--- a/CliClient/app/command-sync.js
+++ b/CliClient/app/command-sync.js
@ -117,7 +117,6 @@ class Command extends BaseCommand {
 		this.releaseLockFn_ = null;

 		// Lock is unique per profile/database
-		// TODO: use SQLite database to do lock?
 		const lockFilePath = require('os').tmpdir() + '/synclock_' + md5(escape(Setting.value('profileDir'))); // https://github.com/pvorb/node-md5/issues/41
 		if (!await fs.pathExists(lockFilePath)) await fs.writeFile(lockFilePath, 'synclock');

--- a/ElectronClient/app/compile.js
+++ b/ElectronClient/app/compile.js
@ -40,4 +40,9 @@ fs.readdirSync(guiPath).forEach((filename) => {
 	}
 });

-fs.copySync(basePath + '/ReactNativeClient/lib/string-utils-common.js', __dirname + '/gui/note-viewer/lib.js');
+const libContent = [
+	fs.readFileSync(basePath + '/ReactNativeClient/lib/string-utils-common.js', 'utf8'),
+	fs.readFileSync(basePath + '/ReactNativeClient/lib/markJsUtils.js', 'utf8'),
+];
+
+fs.writeFileSync(__dirname + '/gui/note-viewer/lib.js', libContent.join('\n'), 'utf8');
--- a/ElectronClient/app/gui/NoteList.jsx
+++ b/ElectronClient/app/gui/NoteList.jsx
@ -4,6 +4,7 @@ const { connect } = require('react-redux');
 const { time } = require('lib/time-utils.js');
 const { themeStyle } = require('../theme.js');
 const BaseModel = require('lib/BaseModel');
+const markJsUtils = require('lib/markJsUtils');
 const { _ } = require('lib/locale.js');
 const { bridge } = require('electron').remote.require('./bridge');
 const Menu = bridge().Menu;
@ -14,7 +15,7 @@ const InteropServiceHelper = require('../InteropServiceHelper.js');
 const Search = require('lib/models/Search');
 const Mark = require('mark.js/dist/mark.min.js');
 const SearchEngine = require('lib/services/SearchEngine');
-const { replaceRegexDiacritics } = require('lib/string-utils');
+const { replaceRegexDiacritics, pregQuote } = require('lib/string-utils');

 class NoteListComponent extends React.Component {

@ -279,15 +280,10 @@ class NoteListComponent extends React.Component {
 			for (let i = 0; i < highlightedWords.length; i++) {
 				const w = highlightedWords[i];

-				if (w.type === 'regex') {
-					mark.markRegExp(new RegExp('\\b' + replaceRegexDiacritics(w.value) + '\\b', 'gmi'), {
-						acrossElements: true,
-					});
-				} else {
-					mark.mark([w.value], {
-						accuracy: 'exactly',
-					});
-				}
+				markJsUtils.markKeyword(mark, w, {
+					pregQuote: pregQuote,
+					replaceRegexDiacritics: replaceRegexDiacritics,
+				});
 			}

 			// Note: in this case it is safe to use dangerouslySetInnerHTML because titleElement
--- a/ElectronClient/app/gui/note-viewer/index.html
+++ b/ElectronClient/app/gui/note-viewer/index.html
@ -262,39 +262,46 @@
 		for (let i = 0; i < keywords.length; i++) {
 			let keyword = keywords[i];

-			if (typeof keyword === 'string') {
-				keyword = {
-					type: 'text',
-					value: keyword,
-				};
-			}
+			markJsUtils.markKeyword(mark_, keyword, {
+				pregQuote: pregQuote,
+				replaceRegexDiacritics: replaceRegexDiacritics,
+			}, {
+				each: onEachElement,				
+			});

-			const isBasicSearch = ['ja', 'zh', 'ko'].indexOf(keyword.scriptType) >= 0;
+			// if (typeof keyword === 'string') {
+			// 	keyword = {
+			// 		type: 'text',
+			// 		value: keyword,
+			// 	};
+			// }

-			if (keyword.type === 'regex') {
-				const b = '[' + pregQuote(' \t\n\r,.,+-*?!={}<>|:"\'()[]') + ']+';
+			// const isBasicSearch = ['ja', 'zh', 'ko'].indexOf(keyword.scriptType) >= 0;

-				// The capturing groups are a hack to go around the strange behaviour of the ignoreGroups property. What we want is to
-				// exclude the first and last matches (the boundaries). What ignoreGroups does is ignore the first X groups. So
-				// we put the first boundary and the keyword inside a group, that way the first groups is ignored (ignoreGroups = 1)
-				// the second is included. And the last boundary is dropped because it's not in any group (it's important NOT to
-				// put this one in a group because otherwise it cannot be excluded).
-				let regexString = '(' + b + ')' + '(' + replaceRegexDiacritics(keyword.value) + ')' + b;
-				if (isBasicSearch) regexString = keyword.value;
+			// if (keyword.type === 'regex') {
+			// 	const b = '[' + pregQuote(' \t\n\r,.,+-*?!={}<>|:"\'()[]') + ']+';

-				mark_.markRegExp(new RegExp(regexString, 'gmi'), {
-					each: onEachElement,
-					acrossElements: true,
-					ignoreGroups: 1,
-				});
-			} else {
-				let accuracy = keyword.accuracy ? keyword.accuracy : 'exactly';
-				if (isBasicSearch) accuracy = 'partially';
-				mark_.mark([keyword.value], {
-					each: onEachElement,
-					accuracy: accuracy, 
-				});
-			}
+			// 	// The capturing groups are a hack to go around the strange behaviour of the ignoreGroups property. What we want is to
+			// 	// exclude the first and last matches (the boundaries). What ignoreGroups does is ignore the first X groups. So
+			// 	// we put the first boundary and the keyword inside a group, that way the first groups is ignored (ignoreGroups = 1)
+			// 	// the second is included. And the last boundary is dropped because it's not in any group (it's important NOT to
+			// 	// put this one in a group because otherwise it cannot be excluded).
+			// 	let regexString = '(' + b + ')' + '(' + replaceRegexDiacritics(keyword.value) + ')' + b;
+			// 	if (isBasicSearch) regexString = keyword.value;
+
+			// 	mark_.markRegExp(new RegExp(regexString, 'gmi'), {
+			// 		each: onEachElement,
+			// 		acrossElements: true,
+			// 		ignoreGroups: 1,
+			// 	});
+			// } else {
+			// 	let accuracy = keyword.accuracy ? keyword.accuracy : 'exactly';
+			// 	if (isBasicSearch) accuracy = 'partially';
+			// 	mark_.mark([keyword.value], {
+			// 		each: onEachElement,
+			// 		accuracy: accuracy, 
+			// 	});
+			// }
 		}

 		ipcProxySendToHost('setMarkerCount', elementIndex);
--- a/ElectronClient/app/theme.js
+++ b/ElectronClient/app/theme.js
@ -227,7 +227,7 @@ function themeStyle(theme) {
 		output = Object.assign({}, globalStyle, fontSizes, darkStyle);
 	}

-	// TODO: All the theme specific things should go in addExtraStyles
+	// Note: All the theme specific things should go in addExtraStyles
 	// so that their definition is not split between here and the
 	// beginning of the file. At least new styles should go in
 	// addExtraStyles.
--- a/ReactNativeClient/lib/components/screens/notes.js
+++ b/ReactNativeClient/lib/components/screens/notes.js
@ -114,7 +114,7 @@ class NotesScreenComponent extends BaseScreenComponent {
 		if (props.notesParentType == 'Folder') {
 			notes = await Note.previews(props.selectedFolderId, options);
 		} else {
-			notes = await Tag.notes(props.selectedTagId, options); // TODO: should also return previews
+			notes = await Tag.notes(props.selectedTagId, options);
 		}

 		this.props.dispatch({
--- a/ReactNativeClient/lib/import-enex-md-gen.js
+++ b/ReactNativeClient/lib/import-enex-md-gen.js
@ -310,7 +310,7 @@ function isImageMimeType(m) {
 }

 function addResourceTag(lines, resource, alt = "") {
-	// TODO: refactor to use Resource.markdownTag
+	// Note: refactor to use Resource.markdownTag

 	let tagAlt = alt == "" ? resource.alt : alt;
 	if (!tagAlt) tagAlt = '';
@ -512,7 +512,6 @@ function enexXmlToMdArray(stream, resources) {
 			} else if (n == 'q') {
 				section.lines.push('"');
 			} else if (n == 'img') {
-				// TODO: TEST IMAGE
 				if (nodeAttributes.src) { // Many (most?) img tags don't have no source associated, especially when they were imported from HTML
 					let s = '![';
 					if (nodeAttributes.alt) s += nodeAttributes.alt;
--- a/ReactNativeClient/lib/joplin-database.js
+++ b/ReactNativeClient/lib/joplin-database.js
@ -263,7 +263,7 @@ class JoplinDatabase extends Database {
 		// must be set in the synchronizer too.

 		// Note: v16 and v17 don't do anything. They were used to debug an issue.
-		const existingDatabaseVersions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
+		const existingDatabaseVersions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18];

 		let currentVersionIndex = existingDatabaseVersions.indexOf(fromVersion);

@ -474,7 +474,7 @@ class JoplinDatabase extends Database {
 					END;`);
 			}

-			if (targetVersion == 16) {
+			if (targetVersion == 18) {
 				const notesNormalized = `
 					CREATE TABLE notes_normalized (
 						id TEXT NOT NULL,
@ -520,8 +520,8 @@ class JoplinDatabase extends Database {
 			try {
 				await this.transactionExecBatch(queries);
 			} catch (error) {
-				if (targetVersion === 15 || targetVersion === 16) {
-					this.logger().warn('Could not upgrade to database v15 or v16 - FTS feature will not be used', error);
+				if (targetVersion === 15 || targetVersion === 18) {
+					this.logger().warn('Could not upgrade to database v15 or v18 - FTS feature will not be used', error);
 				} else {
 					throw error;
 				}
--- a/ReactNativeClient/lib/markJsUtils.js
+++ b/ReactNativeClient/lib/markJsUtils.js
@ -0,0 +1,57 @@
+const markJsUtils = {}
+
+markJsUtils.markKeyword = (mark, keyword, stringUtils, extraOptions = null) => {
+	if (typeof keyword === 'string') {
+		keyword = {
+			type: 'text',
+			value: keyword,
+		};
+	}
+
+	const isBasicSearch = ['ja', 'zh', 'ko'].indexOf(keyword.scriptType) >= 0;
+
+
+
+
+	let value = keyword.value;
+	let accuracy = keyword.accuracy ? keyword.accuracy : 'exactly';
+	if (isBasicSearch) accuracy = 'partially';
+	if (keyword.type === 'regex') {
+		accuracy = 'complementary';
+		value = keyword.originalValue.substr(0, keyword.originalValue.length - 1);
+	}
+
+	mark.mark([value], Object.assign({}, {
+		accuracy: accuracy,
+	}, extraOptions));
+
+
+
+
+	// if (keyword.type === 'regex') {
+	// 	const b = '[' + stringUtils.pregQuote(' \t\n\r,.,+-*?!={}<>|:"\'()[]') + ']+';
+
+	// 	// The capturing groups are a hack to go around the strange behaviour of the ignoreGroups property. What we want is to
+	// 	// exclude the first and last matches (the boundaries). What ignoreGroups does is ignore the first X groups. So
+	// 	// we put the first boundary and the keyword inside a group, that way the first groups is ignored (ignoreGroups = 1)
+	// 	// the second is included. And the last boundary is dropped because it's not in any group (it's important NOT to
+	// 	// put this one in a group because otherwise it cannot be excluded).
+	// 	let regexString = '(' + b + ')' + '(' + stringUtils.replaceRegexDiacritics(keyword.value) + ')' + b;
+	// 	if (isBasicSearch) regexString = keyword.value;
+
+	// 	mark.markRegExp(new RegExp(regexString, 'gmi'), Object.assign({
+	// 		acrossElements: true,
+	// 		ignoreGroups: 1,
+	// 	}, extraOptions));
+	// } else {
+	// 	let accuracy = keyword.accuracy ? keyword.accuracy : 'exactly';
+	// 	if (isBasicSearch) accuracy = 'partially';
+	// 	mark.mark([keyword.value], Object.assign({}, {
+	// 		accuracy: accuracy, 
+	// 	}, extraOptions));
+	// }
+}
+
+if (typeof module !== 'undefined') {
+	module.exports = markJsUtils;
+}
--- a/ReactNativeClient/lib/models/Note.js
+++ b/ReactNativeClient/lib/models/Note.js
@ -238,8 +238,6 @@ class Note extends BaseItem {
 			fields: '*',
 		}

-		// TODO: add support for limits on .search()
-
 		let results = await this.previews(folderId, options);
 		return results.length ? results[0] : null;
 	}
--- a/ReactNativeClient/lib/services/SearchEngine.js
+++ b/ReactNativeClient/lib/services/SearchEngine.js
@ -298,9 +298,9 @@ class SearchEngine {
 				}

 				if (term.indexOf('*') >= 0) {
-					terms[col][i] = { type: 'regex', value: this.queryTermToRegex(term), scriptType: scriptType(term) };
+					terms[col][i] = { type: 'regex', value: this.queryTermToRegex(term), scriptType: scriptType(term), originalValue: term };
 				} else {
-					terms[col][i] = { type: 'text', value: term, scriptType: scriptType(term) };
+					terms[col][i] = { type: 'text', value: term, scriptType: scriptType(term), originalValue: term };
 				}
 			}

@ -363,9 +363,14 @@ class SearchEngine {
 		} else {
 			const parsedQuery = this.parseQuery(query);
 			const sql = 'SELECT id, title, offsets(notes_fts) AS offsets FROM notes_fts WHERE notes_fts MATCH ?'
-			const rows = await this.db().selectAll(sql, [query]);
-			this.orderResults_(rows, parsedQuery);
-			return rows;
+			try {
+				const rows = await this.db().selectAll(sql, [query]);
+				this.orderResults_(rows, parsedQuery);
+				return rows;
+			} catch (error) {
+				this.logger().warn('Cannot execute MATCH query: ' + query + ': ' + error.message);
+				return [];
+			}
 		}
 	}

--- a/ReactNativeClient/lib/synchronizer.js
+++ b/ReactNativeClient/lib/synchronizer.js
@ -267,7 +267,7 @@ class Synchronizer {
 							// This is a bit inefficient because if the resulting action is "updateRemote" we don't need the whole
 							// content, but for now that will do since being reliable is the priority.
 							//
-							// TODO: assuming a particular sync target is guaranteed to have accurate timestamps, the driver maybe
+							// Note: assuming a particular sync target is guaranteed to have accurate timestamps, the driver maybe
 							// could expose this with a accurateTimestamps() method that returns "true". In that case, the test
 							// could be done using the file timestamp and the potentially unnecessary content loading could be skipped.
 							// OneDrive does not appear to have accurate timestamps as lastModifiedDateTime would occasionally be