1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-11-27 08:21:03 +02:00

Desktop, Mobile, Cli: Fixes #9694: Correctly search HTML-entity encoded text

This commit is contained in:
Laurent Cozic 2024-02-08 17:58:07 +00:00
parent c54603025b
commit 0b3a5a431d
3 changed files with 18 additions and 1 deletions

View File

@ -536,6 +536,15 @@ describe('services/SearchEngine', () => {
expect((await engine.search('hello', { appendWildCards: true })).length).toBe(2);
}));
it('should search HTML-entity encoded text', (async () => {
await Note.save({ title: 'éçà' }); // éçà
await engine.syncTables();
const rows = await engine.search('éçà');
expect(rows.length).toBe(1);
}));
// Disabled for now:
// https://github.com/laurent22/joplin/issues/9769#issuecomment-1912459744

View File

@ -14,6 +14,7 @@ import NoteResource from '../../models/NoteResource';
import BaseItem from '../../models/BaseItem';
import { isCallbackUrl, parseCallbackUrl } from '../../callbackUrlUtils';
import replaceUnsupportedCharacters from '../../utils/replaceUnsupportedCharacters';
import { htmlentitiesDecode } from '@joplin/utils/html';
const { sprintf } = require('sprintf-js');
const { pregQuote, scriptType, removeDiacritics } = require('../../string-utils.js');
@ -607,6 +608,10 @@ export default class SearchEngine {
// NULL characters can break FTS. Remove them.
normalizedText = replaceUnsupportedCharacters(normalizedText);
// We need to decode HTML entities too
// https://github.com/laurent22/joplin/issues/9694
normalizedText = htmlentitiesDecode(normalizedText);
return removeDiacritics(normalizedText.toLowerCase());
}

View File

@ -25,7 +25,10 @@ const selfClosingElements = [
'wbr',
];
export const htmlentities = new Entities().encode;
const entitiesInstance = new Entities();
export const htmlentities = entitiesInstance.encode;
export const htmlentitiesDecode = entitiesInstance.decode;
export const attributesHtml = (attr: Record<string, any>) => {
const output = [];