mirror of
				https://github.com/vcmi/vcmi.git
				synced 2025-10-31 00:07:39 +02:00 
			
		
		
		
	Implement detection of typos in json using Levenshtein Distance
This commit is contained in:
		| @@ -21,6 +21,80 @@ | ||||
|  | ||||
| VCMI_LIB_NAMESPACE_BEGIN | ||||
|  | ||||
| // Algorithm for detection of typos in words | ||||
| // Determines how 'different' two strings are - how many changes must be done to turn one string into another one | ||||
| // https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows | ||||
| static int getLevenshteinDistance(const std::string & s, const std::string & t) | ||||
| { | ||||
| 	int n = t.size(); | ||||
| 	int m = s.size(); | ||||
|  | ||||
| 	// create two work vectors of integer distances | ||||
| 	std::vector<int> v0(n+1, 0); | ||||
| 	std::vector<int> v1(n+1, 0); | ||||
|  | ||||
| 	// initialize v0 (the previous row of distances) | ||||
| 	// this row is A[0][i]: edit distance from an empty s to t; | ||||
| 	// that distance is the number of characters to append to  s to make t. | ||||
| 	for (int i = 0; i < n; ++i) | ||||
| 		v0[i] = i; | ||||
|  | ||||
| 	for (int i = 0; i < m; ++i) | ||||
| 	{ | ||||
| 		// calculate v1 (current row distances) from the previous row v0 | ||||
|  | ||||
| 		// first element of v1 is A[i + 1][0] | ||||
| 		// edit distance is delete (i + 1) chars from s to match empty t | ||||
| 		v1[0] = i + 1; | ||||
|  | ||||
| 		// use formula to fill in the rest of the row | ||||
| 		for (int j = 0; j < n; ++j) | ||||
| 		{ | ||||
| 			// calculating costs for A[i + 1][j + 1] | ||||
| 			int deletionCost = v0[j + 1] + 1; | ||||
| 			int insertionCost = v1[j] + 1; | ||||
| 			int substitutionCost; | ||||
|  | ||||
| 			if (s[i] == t[j]) | ||||
| 				substitutionCost = v0[j]; | ||||
| 			else | ||||
| 				substitutionCost = v0[j] + 1; | ||||
|  | ||||
| 			v1[j + 1] = std::min({deletionCost, insertionCost, substitutionCost}); | ||||
| 		} | ||||
|  | ||||
| 		// copy v1 (current row) to v0 (previous row) for next iteration | ||||
| 		// since data in v1 is always invalidated, a swap without copy could be more efficient | ||||
| 		std::swap(v0, v1); | ||||
| 	} | ||||
|  | ||||
| 	// after the last swap, the results of v1 are now in v0 | ||||
| 	return v0[n]; | ||||
| } | ||||
|  | ||||
| /// Searches for keys similar to 'target' in 'candidates' map | ||||
| /// Returns closest match or empty string if no suitable candidates are found | ||||
| static std::string findClosestMatch(JsonMap candidates, std::string target) | ||||
| { | ||||
| 	// Maximum distance at which we can consider strings to be similar | ||||
| 	// If strings have more different symbols than this number then it is not a typo, but a completely different word | ||||
| 	static constexpr int maxDistance = 5; | ||||
| 	int bestDistance = maxDistance; | ||||
| 	std::string bestMatch; | ||||
|  | ||||
| 	for (auto const & candidate : candidates) | ||||
| 	{ | ||||
| 		int newDistance = getLevenshteinDistance(candidate.first, target); | ||||
|  | ||||
| 		if (newDistance < bestDistance) | ||||
| 		{ | ||||
| 			bestDistance = newDistance; | ||||
| 			bestMatch = candidate.first; | ||||
| 		} | ||||
| 	} | ||||
| 	return bestMatch; | ||||
| } | ||||
|  | ||||
| static std::string emptyCheck(JsonValidator & validator, const JsonNode & baseSchema, const JsonNode & schema, const JsonNode & data) | ||||
| { | ||||
| 	// check is not needed - e.g. incorporated into another check | ||||
| @@ -417,7 +491,13 @@ static std::string additionalPropertiesCheck(JsonValidator & validator, const Js | ||||
|  | ||||
| 			// or, additionalItems field can be bool which indicates if such items are allowed | ||||
| 			else if(!schema.isNull() && !schema.Bool()) // present and set to false - error | ||||
| 				errors += validator.makeErrorMessage("Unknown entry found: " + entry.first); | ||||
| 			{ | ||||
| 				std::string bestCandidate = findClosestMatch(baseSchema["properties"].Struct(), entry.first); | ||||
| 				if (!bestCandidate.empty()) | ||||
| 					errors += validator.makeErrorMessage("Unknown entry found: '" + entry.first + "'. Perhaps you meant '" + bestCandidate + "'?"); | ||||
| 				else | ||||
| 					errors += validator.makeErrorMessage("Unknown entry found: " + entry.first); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return errors; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user