1
0
mirror of https://github.com/vcmi/vcmi.git synced 2025-08-08 22:26:51 +02:00

Merge pull request #5317 from Laserlicht/search_inp

[1.6.4] improved text search
This commit is contained in:
Ivan Savenko
2025-01-25 13:54:27 +02:00
committed by GitHub
5 changed files with 84 additions and 54 deletions

View File

@@ -41,6 +41,7 @@
#include "../../lib/spells/CSpellHandler.h"
#include "../../lib/spells/ISpellMechanics.h"
#include "../../lib/spells/Problem.h"
#include "../../lib/texts/TextOperations.h"
#include "../../lib/GameConstants.h"
#include "../../lib/mapObjects/CGHeroInstance.h"
@@ -242,7 +243,7 @@ void CSpellWindow::processSpells()
mySpells.reserve(CGI->spellh->objects.size());
for(auto const & spell : CGI->spellh->objects)
{
bool searchTextFound = !searchBox || boost::algorithm::contains(boost::algorithm::to_lower_copy(spell->getNameTranslated()), boost::algorithm::to_lower_copy(searchBox->getText()));
bool searchTextFound = !searchBox || TextOperations::textSearchSimilar(searchBox->getText(), spell->getNameTranslated());
if(onSpellSelect)
{

View File

@@ -55,6 +55,7 @@
#include "../lib/gameState/TavernHeroesPool.h"
#include "../lib/gameState/UpgradeInfo.h"
#include "../lib/texts/CGeneralTextHandler.h"
#include "../lib/texts/TextOperations.h"
#include "../lib/IGameSettings.h"
#include "ConditionalWait.h"
#include "../lib/CConfigHandler.h"
@@ -1594,7 +1595,7 @@ void CObjectListWindow::init(std::shared_ptr<CIntObject> titleWidget_, std::stri
itemsVisible.clear();
for(auto & item : items)
if(boost::algorithm::contains(boost::algorithm::to_lower_copy(item.second), boost::algorithm::to_lower_copy(text)))
if(TextOperations::textSearchSimilar(text, item.second))
itemsVisible.push_back(item);
selected = 0;

View File

@@ -17,61 +17,11 @@
#include "../filesystem/Filesystem.h"
#include "../modding/ModScope.h"
#include "../modding/CModHandler.h"
#include "../texts/TextOperations.h"
#include "../ScopeGuard.h"
VCMI_LIB_NAMESPACE_BEGIN
// Algorithm for detection of typos in words
// Determines how 'different' two strings are - how many changes must be done to turn one string into another one
// https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
static int getLevenshteinDistance(const std::string & s, const std::string & t)
{
int n = t.size();
int m = s.size();
// create two work vectors of integer distances
std::vector<int> v0(n+1, 0);
std::vector<int> v1(n+1, 0);
// initialize v0 (the previous row of distances)
// this row is A[0][i]: edit distance from an empty s to t;
// that distance is the number of characters to append to s to make t.
for (int i = 0; i < n; ++i)
v0[i] = i;
for (int i = 0; i < m; ++i)
{
// calculate v1 (current row distances) from the previous row v0
// first element of v1 is A[i + 1][0]
// edit distance is delete (i + 1) chars from s to match empty t
v1[0] = i + 1;
// use formula to fill in the rest of the row
for (int j = 0; j < n; ++j)
{
// calculating costs for A[i + 1][j + 1]
int deletionCost = v0[j + 1] + 1;
int insertionCost = v1[j] + 1;
int substitutionCost;
if (s[i] == t[j])
substitutionCost = v0[j];
else
substitutionCost = v0[j] + 1;
v1[j + 1] = std::min({deletionCost, insertionCost, substitutionCost});
}
// copy v1 (current row) to v0 (previous row) for next iteration
// since data in v1 is always invalidated, a swap without copy could be more efficient
std::swap(v0, v1);
}
// after the last swap, the results of v1 are now in v0
return v0[n];
}
/// Searches for keys similar to 'target' in 'candidates' map
/// Returns closest match or empty string if no suitable candidates are found
static std::string findClosestMatch(const JsonMap & candidates, const std::string & target)
@@ -84,7 +34,7 @@ static std::string findClosestMatch(const JsonMap & candidates, const std::strin
for (auto const & candidate : candidates)
{
int newDistance = getLevenshteinDistance(candidate.first, target);
int newDistance = TextOperations::getLevenshteinDistance(candidate.first, target);
if (newDistance < bestDistance)
{

View File

@@ -236,5 +236,75 @@ std::string TextOperations::getCurrentFormattedDateTimeLocal(std::chrono::second
return TextOperations::getFormattedDateTimeLocal(std::chrono::system_clock::to_time_t(timepoint));
}
int TextOperations::getLevenshteinDistance(const std::string & s, const std::string & t)
{
int n = t.size();
int m = s.size();
// create two work vectors of integer distances
std::vector<int> v0(n+1, 0);
std::vector<int> v1(n+1, 0);
// initialize v0 (the previous row of distances)
// this row is A[0][i]: edit distance from an empty s to t;
// that distance is the number of characters to append to s to make t.
for (int i = 0; i < n; ++i)
v0[i] = i;
for (int i = 0; i < m; ++i)
{
// calculate v1 (current row distances) from the previous row v0
// first element of v1 is A[i + 1][0]
// edit distance is delete (i + 1) chars from s to match empty t
v1[0] = i + 1;
// use formula to fill in the rest of the row
for (int j = 0; j < n; ++j)
{
// calculating costs for A[i + 1][j + 1]
int deletionCost = v0[j + 1] + 1;
int insertionCost = v1[j] + 1;
int substitutionCost;
if (s[i] == t[j])
substitutionCost = v0[j];
else
substitutionCost = v0[j] + 1;
v1[j + 1] = std::min({deletionCost, insertionCost, substitutionCost});
}
// copy v1 (current row) to v0 (previous row) for next iteration
// since data in v1 is always invalidated, a swap without copy could be more efficient
std::swap(v0, v1);
}
// after the last swap, the results of v1 are now in v0
return v0[n];
}
bool TextOperations::textSearchSimilar(const std::string & s, const std::string & t)
{
boost::locale::generator gen;
std::locale loc = gen("en_US.UTF-8"); // support for UTF8 lowercase
auto haystack = boost::locale::to_lower(t, loc);
auto needle = boost::locale::to_lower(s, loc);
if(boost::algorithm::contains(haystack, needle))
return true;
for(int i = 0; i < haystack.size() - needle.size(); i++)
{
auto dist = getLevenshteinDistance(haystack.substr(i, needle.size()), needle);
if(needle.size() > 2 && dist <= 1)
return true;
else if(needle.size() > 4 && dist <= 2)
return true;
}
return false;
}
VCMI_LIB_NAMESPACE_END

View File

@@ -70,6 +70,14 @@ namespace TextOperations
/// get formatted time (without date)
/// timeOffset - optional parameter to modify current time by specified time in seconds
DLL_LINKAGE std::string getCurrentFormattedTimeLocal(std::chrono::seconds timeOffset = {});
/// Algorithm for detection of typos in words
/// Determines how 'different' two strings are - how many changes must be done to turn one string into another one
/// https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
DLL_LINKAGE int getLevenshteinDistance(const std::string & s, const std::string & t);
/// Check if texts have similarity when typing into search boxes
DLL_LINKAGE bool textSearchSimilar(const std::string & s, const std::string & t);
};
template<typename Arithmetic>