Moved Unicode namespace out of GeneralTextHandler file

2025-10-31 00:07:39 +02:00 · 2023-02-12 23:00:56 +02:00
parent 974dd3a3db
commit 65c020ef34
14 changed files with 209 additions and 177 deletions
--- a/client/adventureMap/CInGameConsole.cpp
+++ b/client/adventureMap/CInGameConsole.cpp
@@ -20,7 +20,7 @@

 #include "../../CCallback.h"
 #include "../../lib/CConfigHandler.h"
-#include "../../lib/CGeneralTextHandler.h"
+#include "../../lib/TextOperations.h"
 #include "../../lib/mapObjects/CArmedInstance.h"

 #include <SDL_timer.h>
--- a/client/render/IFont.cpp
+++ b/client/render/IFont.cpp
@@ -12,7 +12,7 @@
 #include "IFont.h"

 #include "../../lib/Point.h"
-#include "../../lib/CGeneralTextHandler.h"
+#include "../../lib/TextOperations.h"
 //

 size_t IFont::getStringWidth(const std::string & data) const
--- a/client/renderSDL/CBitmapFont.cpp
+++ b/client/renderSDL/CBitmapFont.cpp
@@ -14,7 +14,7 @@

 #include "../../lib/vcmi_endian.h"
 #include "../../lib/filesystem/Filesystem.h"
-#include "../../lib/CGeneralTextHandler.h"
+#include "../../lib/TextOperations.h"
 #include "../../lib/Rect.h"

 #include <SDL_surface.h>
--- a/client/renderSDL/CBitmapHanFont.cpp
+++ b/client/renderSDL/CBitmapHanFont.cpp
@@ -15,7 +15,7 @@

 #include "../../lib/JsonNode.h"
 #include "../../lib/filesystem/Filesystem.h"
-#include "../../lib/CGeneralTextHandler.h"
+#include "../../lib/TextOperations.h"
 #include "../../lib/Rect.h"

 #include <SDL_surface.h>
--- a/client/renderSDL/CTrueTypeFont.cpp
+++ b/client/renderSDL/CTrueTypeFont.cpp
@@ -14,7 +14,7 @@
 #include "../renderSDL/SDL_Extensions.h"

 #include "../../lib/JsonNode.h"
-#include "../../lib/CGeneralTextHandler.h"
+#include "../../lib/TextOperations.h"
 #include "../../lib/filesystem/Filesystem.h"

 #include <SDL_ttf.h>
--- a/client/widgets/TextControls.cpp
+++ b/client/widgets/TextControls.cpp
@@ -19,7 +19,7 @@
 #include "../adventureMap/CInGameConsole.h"
 #include "../renderSDL/SDL_Extensions.h"

-#include "../../lib/CGeneralTextHandler.h"
+#include "../../lib/TextOperations.h"

 #ifdef VCMI_ANDROID
 #include "lib/CAndroidVMHelper.h"
--- a/client/windows/CMessage.cpp
+++ b/client/windows/CMessage.cpp
@@ -13,6 +13,7 @@

 #include "../CGameInfo.h"
 #include "../../lib/CGeneralTextHandler.h"
+#include "../../lib/TextOperations.h"

 #include "../windows/InfoWindows.h"
 #include "../widgets/Buttons.h"
--- a/cmake_modules/VCMI_lib.cmake
+++ b/cmake_modules/VCMI_lib.cmake
@@ -197,6 +197,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE)
 		${MAIN_LIB_DIR}/RoadHandler.cpp
 		${MAIN_LIB_DIR}/ScriptHandler.cpp
 		${MAIN_LIB_DIR}/TerrainHandler.cpp
+		${MAIN_LIB_DIR}/TextOperations.cpp
 		${MAIN_LIB_DIR}/VCMIDirs.cpp
 		${MAIN_LIB_DIR}/VCMI_Lib.cpp
 	)
@@ -454,6 +455,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE)
 		${MAIN_LIB_DIR}/StartInfo.h
 		${MAIN_LIB_DIR}/StringConstants.h
 		${MAIN_LIB_DIR}/TerrainHandler.h
+		${MAIN_LIB_DIR}/TextOperations.h
 		${MAIN_LIB_DIR}/UnlockGuard.h
 		${MAIN_LIB_DIR}/VCMIDirs.h
 		${MAIN_LIB_DIR}/vcmi_endian.h
--- a/lib/CGeneralTextHandler.cpp
+++ b/lib/CGeneralTextHandler.cpp
@@ -10,100 +10,16 @@
 #include "StdInc.h"
 #include "CGeneralTextHandler.h"

-#include <boost/locale.hpp>
-
 #include "filesystem/Filesystem.h"
 #include "CConfigHandler.h"
 #include "CModHandler.h"
-#include "GameConstants.h"
 #include "mapObjects/CQuest.h"
 #include "VCMI_Lib.h"
 #include "Languages.h"
+#include "TextOperations.h"

 VCMI_LIB_NAMESPACE_BEGIN

-size_t Unicode::getCharacterSize(char firstByte)
-{
-	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
-	// 0xxxxxxx -> 1 -  ASCII chars
-	// 110xxxxx -> 2
-	// 11110xxx -> 4 - last allowed in current standard
-	// 1111110x -> 6 - last allowed in original standard
-
-	if ((ui8)firstByte < 0x80)
-		return 1; // ASCII
-
-	size_t ret = 0;
-
-	for (size_t i=0; i<8; i++)
-	{
-		if (((ui8)firstByte & (0x80 >> i)) != 0)
-			ret++;
-		else
-			break;
-	}
-	return ret;
-}
-
-bool Unicode::isValidCharacter(const char * character, size_t maxSize)
-{
-	// can't be first byte in UTF8
-	if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
-		return false;
-	// first character must follow rules checked in getCharacterSize
-	size_t size = getCharacterSize((ui8)character[0]);
-
-	if ((ui8)character[0] > 0xF4)
-		return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
-
-	if (size > maxSize)
-		return false;
-
-	// remaining characters must have highest bit set to 1
-	for (size_t i = 1; i < size; i++)
-	{
-		if (((ui8)character[i] & 0x80) == 0)
-			return false;
-	}
-	return true;
-}
-
-bool Unicode::isValidASCII(const std::string & text)
-{
-	for (const char & ch : text)
-		if (ui8(ch) >= 0x80 )
-			return false;
-	return true;
-}
-
-bool Unicode::isValidASCII(const char * data, size_t size)
-{
-	for (size_t i=0; i<size; i++)
-		if (ui8(data[i]) >= 0x80 )
-			return false;
-	return true;
-}
-
-bool Unicode::isValidString(const std::string & text)
-{
-	for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
-	{
-		if (!isValidCharacter(text.data() + i, text.size() - i))
-			return false;
-	}
-	return true;
-}
-
-bool Unicode::isValidString(const char * data, size_t size)
-{
-	for (size_t i=0; i<size; i += getCharacterSize(data[i]))
-	{
-		if (!isValidCharacter(data + i, size - i))
-			return false;
-	}
-	return true;
-}
-
 /// Detects language and encoding of H3 text files based on matching against pregenerated footprints of H3 file
 void CGeneralTextHandler::detectInstallParameters()
 {
@@ -173,55 +89,6 @@ void CGeneralTextHandler::detectInstallParameters()
 	encoding->String() =  Languages::getLanguageOptions(knownLanguages[bestIndex]).encoding;
 }

-std::string Unicode::toUnicode(const std::string &text)
-{
-	return toUnicode(text, CGeneralTextHandler::getInstalledEncoding());
-}
-
-std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
-{
-	return boost::locale::conv::to_utf<char>(text, encoding);
-}
-
-std::string Unicode::fromUnicode(const std::string & text)
-{
-	return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding());
-}
-
-std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
-{
-	return boost::locale::conv::from_utf<char>(text, encoding);
-}
-
-void Unicode::trimRight(std::string & text, const size_t amount)
-{
-	if(text.empty())
-		return;
-	//todo: more efficient algorithm
-	for(int i = 0; i< amount; i++){
-		auto b = text.begin();
-		auto e = text.end();
-		size_t lastLen = 0;
-		size_t len = 0;
-		while (b != e) {
-			lastLen = len;
-			size_t n = getCharacterSize(*b);
-
-			if(!isValidCharacter(&(*b),e-b))
-			{
-				logGlobal->error("Invalid UTF8 sequence");
-				break;//invalid sequence will be trimmed
-			}
-
-			len += n;
-			b += n;
-		}
-
-		text.resize(lastLen);
-	}
-}
-
-
 //Helper for string -> float conversion
 class LocaleWithComma: public std::numpunct<char>
 {
@@ -710,9 +577,9 @@ void CGeneralTextHandler::dumpAllTexts()
 	for ( auto const & entry : stringsLocalizations)
 	{
 		if (!entry.second.overrideValue.empty())
-			logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.overrideValue), entry.second.modContext, entry.second.overrideLanguage);
+			logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.overrideValue));
 		else
-			logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.baseValue), entry.second.modContext, entry.second.baseLanguage);
+			logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.baseValue));
 	}

 	logGlobal->info("END TEXT EXPORT");
--- a/lib/CGeneralTextHandler.h
+++ b/lib/CGeneralTextHandler.h
@@ -9,42 +9,10 @@
 */
 #pragma once

-#include "JsonNode.h"
-
 VCMI_LIB_NAMESPACE_BEGIN

-/// Namespace that provides utilites for unicode support (UTF-8)
-namespace Unicode
-{
-	/// evaluates size of UTF-8 character
-	size_t DLL_LINKAGE getCharacterSize(char firstByte);
-
-	/// test if character is a valid UTF-8 symbol
-	/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string)
-	bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize);
-
-	/// test if text contains ASCII-string (no need for unicode conversion)
-	bool DLL_LINKAGE isValidASCII(const std::string & text);
-	bool DLL_LINKAGE isValidASCII(const char * data, size_t size);
-
-	/// test if text contains valid UTF-8 sequence
-	bool DLL_LINKAGE isValidString(const std::string & text);
-	bool DLL_LINKAGE isValidString(const char * data, size_t size);
-
-	/// converts text to unicode from specified encoding or from one specified in settings
-	std::string DLL_LINKAGE toUnicode(const std::string & text);
-	std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);
-
-	/// converts text from unicode to specified encoding or to one specified in settings
-	/// NOTE: usage of these functions should be avoided if possible
-	std::string DLL_LINKAGE fromUnicode(const std::string & text);
-	std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);
-
-	///delete (amount) UTF characters from right
-	DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1);
-};
-
 class CInputStream;
+class JsonNode;

 /// Parser for any text files from H3
 class DLL_LINKAGE CLegacyConfigParser
--- a/lib/JsonDetail.cpp
+++ b/lib/JsonDetail.cpp
@@ -12,7 +12,7 @@
 #include "JsonDetail.h"

 #include "VCMI_Lib.h"
-#include "CGeneralTextHandler.h"
+#include "TextOperations.h"
 #include "CModHandler.h"

 #include "filesystem/Filesystem.h"
--- a/lib/TextOperations.cpp
+++ b/lib/TextOperations.cpp
@@ -0,0 +1,149 @@
+/*
+ * TextOperations.cpp, part of VCMI engine
+ *
+ * Authors: listed in file AUTHORS in main folder
+ *
+ * License: GNU General Public License v2.0 or later
+ * Full text of license available in license.txt file, in main folder
+ *
+ */
+#include "StdInc.h"
+#include "TextOperations.h"
+
+#include "CGeneralTextHandler.h"
+
+#include <boost/locale.hpp>
+
+VCMI_LIB_NAMESPACE_BEGIN
+
+size_t Unicode::getCharacterSize(char firstByte)
+{
+	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
+	// 0xxxxxxx -> 1 -  ASCII chars
+	// 110xxxxx -> 2
+	// 11110xxx -> 4 - last allowed in current standard
+	// 1111110x -> 6 - last allowed in original standard
+
+	if ((ui8)firstByte < 0x80)
+		return 1; // ASCII
+
+	size_t ret = 0;
+
+	for (size_t i=0; i<8; i++)
+	{
+		if (((ui8)firstByte & (0x80 >> i)) != 0)
+			ret++;
+		else
+			break;
+	}
+	return ret;
+}
+
+bool Unicode::isValidCharacter(const char * character, size_t maxSize)
+{
+	// can't be first byte in UTF8
+	if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
+		return false;
+	// first character must follow rules checked in getCharacterSize
+	size_t size = getCharacterSize((ui8)character[0]);
+
+	if ((ui8)character[0] > 0xF4)
+		return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
+
+	if (size > maxSize)
+		return false;
+
+	// remaining characters must have highest bit set to 1
+	for (size_t i = 1; i < size; i++)
+	{
+		if (((ui8)character[i] & 0x80) == 0)
+			return false;
+	}
+	return true;
+}
+
+bool Unicode::isValidASCII(const std::string & text)
+{
+	for (const char & ch : text)
+		if (ui8(ch) >= 0x80 )
+			return false;
+	return true;
+}
+
+bool Unicode::isValidASCII(const char * data, size_t size)
+{
+	for (size_t i=0; i<size; i++)
+		if (ui8(data[i]) >= 0x80 )
+			return false;
+	return true;
+}
+
+bool Unicode::isValidString(const std::string & text)
+{
+	for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
+	{
+		if (!isValidCharacter(text.data() + i, text.size() - i))
+			return false;
+	}
+	return true;
+}
+
+bool Unicode::isValidString(const char * data, size_t size)
+{
+	for (size_t i=0; i<size; i += getCharacterSize(data[i]))
+	{
+		if (!isValidCharacter(data + i, size - i))
+			return false;
+	}
+	return true;
+}
+
+std::string Unicode::toUnicode(const std::string &text)
+{
+	return toUnicode(text, CGeneralTextHandler::getInstalledEncoding());
+}
+
+std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
+{
+	return boost::locale::conv::to_utf<char>(text, encoding);
+}
+
+std::string Unicode::fromUnicode(const std::string & text)
+{
+	return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding());
+}
+
+std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
+{
+	return boost::locale::conv::from_utf<char>(text, encoding);
+}
+
+void Unicode::trimRight(std::string & text, const size_t amount)
+{
+	if(text.empty())
+		return;
+	//todo: more efficient algorithm
+	for(int i = 0; i< amount; i++){
+		auto b = text.begin();
+		auto e = text.end();
+		size_t lastLen = 0;
+		size_t len = 0;
+		while (b != e) {
+			lastLen = len;
+			size_t n = getCharacterSize(*b);
+
+			if(!isValidCharacter(&(*b),e-b))
+			{
+				logGlobal->error("Invalid UTF8 sequence");
+				break;//invalid sequence will be trimmed
+			}
+
+			len += n;
+			b += n;
+		}
+
+		text.resize(lastLen);
+	}
+}
+
+VCMI_LIB_NAMESPACE_END
--- a/lib/TextOperations.h
+++ b/lib/TextOperations.h
@@ -0,0 +1,45 @@
+/*
+ * TextOperations.h, part of VCMI engine
+ *
+ * Authors: listed in file AUTHORS in main folder
+ *
+ * License: GNU General Public License v2.0 or later
+ * Full text of license available in license.txt file, in main folder
+ *
+ */
+#pragma once
+
+VCMI_LIB_NAMESPACE_BEGIN
+
+/// Namespace that provides utilites for unicode support (UTF-8)
+namespace Unicode
+{
+	/// evaluates size of UTF-8 character
+	size_t DLL_LINKAGE getCharacterSize(char firstByte);
+
+	/// test if character is a valid UTF-8 symbol
+	/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string)
+	bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize);
+
+	/// test if text contains ASCII-string (no need for unicode conversion)
+	bool DLL_LINKAGE isValidASCII(const std::string & text);
+	bool DLL_LINKAGE isValidASCII(const char * data, size_t size);
+
+	/// test if text contains valid UTF-8 sequence
+	bool DLL_LINKAGE isValidString(const std::string & text);
+	bool DLL_LINKAGE isValidString(const char * data, size_t size);
+
+	/// converts text to unicode from specified encoding or from one specified in settings
+	std::string DLL_LINKAGE toUnicode(const std::string & text);
+	std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);
+
+	/// converts text from unicode to specified encoding or to one specified in settings
+	/// NOTE: usage of these functions should be avoided if possible
+	std::string DLL_LINKAGE fromUnicode(const std::string & text);
+	std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);
+
+	///delete (amount) UTF characters from right
+	DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1);
+};
+
+VCMI_LIB_NAMESPACE_END
--- a/lib/filesystem/CBinaryReader.cpp
+++ b/lib/filesystem/CBinaryReader.cpp
@@ -11,7 +11,7 @@
 #include "CBinaryReader.h"

 #include "CInputStream.h"
-#include "../CGeneralTextHandler.h"
+#include "../TextOperations.h"

 VCMI_LIB_NAMESPACE_BEGIN