mirror of
				https://github.com/vcmi/vcmi.git
				synced 2025-10-31 00:07:39 +02:00 
			
		
		
		
	Moved Unicode namespace out of GeneralTextHandler file
This commit is contained in:
		| @@ -20,7 +20,7 @@ | ||||
|  | ||||
| #include "../../CCallback.h" | ||||
| #include "../../lib/CConfigHandler.h" | ||||
| #include "../../lib/CGeneralTextHandler.h" | ||||
| #include "../../lib/TextOperations.h" | ||||
| #include "../../lib/mapObjects/CArmedInstance.h" | ||||
|  | ||||
| #include <SDL_timer.h> | ||||
|   | ||||
| @@ -12,7 +12,7 @@ | ||||
| #include "IFont.h" | ||||
|  | ||||
| #include "../../lib/Point.h" | ||||
| #include "../../lib/CGeneralTextHandler.h" | ||||
| #include "../../lib/TextOperations.h" | ||||
| // | ||||
|  | ||||
| size_t IFont::getStringWidth(const std::string & data) const | ||||
|   | ||||
| @@ -14,7 +14,7 @@ | ||||
|  | ||||
| #include "../../lib/vcmi_endian.h" | ||||
| #include "../../lib/filesystem/Filesystem.h" | ||||
| #include "../../lib/CGeneralTextHandler.h" | ||||
| #include "../../lib/TextOperations.h" | ||||
| #include "../../lib/Rect.h" | ||||
|  | ||||
| #include <SDL_surface.h> | ||||
|   | ||||
| @@ -15,7 +15,7 @@ | ||||
|  | ||||
| #include "../../lib/JsonNode.h" | ||||
| #include "../../lib/filesystem/Filesystem.h" | ||||
| #include "../../lib/CGeneralTextHandler.h" | ||||
| #include "../../lib/TextOperations.h" | ||||
| #include "../../lib/Rect.h" | ||||
|  | ||||
| #include <SDL_surface.h> | ||||
|   | ||||
| @@ -14,7 +14,7 @@ | ||||
| #include "../renderSDL/SDL_Extensions.h" | ||||
|  | ||||
| #include "../../lib/JsonNode.h" | ||||
| #include "../../lib/CGeneralTextHandler.h" | ||||
| #include "../../lib/TextOperations.h" | ||||
| #include "../../lib/filesystem/Filesystem.h" | ||||
|  | ||||
| #include <SDL_ttf.h> | ||||
|   | ||||
| @@ -19,7 +19,7 @@ | ||||
| #include "../adventureMap/CInGameConsole.h" | ||||
| #include "../renderSDL/SDL_Extensions.h" | ||||
|  | ||||
| #include "../../lib/CGeneralTextHandler.h" | ||||
| #include "../../lib/TextOperations.h" | ||||
|  | ||||
| #ifdef VCMI_ANDROID | ||||
| #include "lib/CAndroidVMHelper.h" | ||||
|   | ||||
| @@ -13,6 +13,7 @@ | ||||
|  | ||||
| #include "../CGameInfo.h" | ||||
| #include "../../lib/CGeneralTextHandler.h" | ||||
| #include "../../lib/TextOperations.h" | ||||
|  | ||||
| #include "../windows/InfoWindows.h" | ||||
| #include "../widgets/Buttons.h" | ||||
|   | ||||
| @@ -197,6 +197,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE) | ||||
| 		${MAIN_LIB_DIR}/RoadHandler.cpp | ||||
| 		${MAIN_LIB_DIR}/ScriptHandler.cpp | ||||
| 		${MAIN_LIB_DIR}/TerrainHandler.cpp | ||||
| 		${MAIN_LIB_DIR}/TextOperations.cpp | ||||
| 		${MAIN_LIB_DIR}/VCMIDirs.cpp | ||||
| 		${MAIN_LIB_DIR}/VCMI_Lib.cpp | ||||
| 	) | ||||
| @@ -454,6 +455,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE) | ||||
| 		${MAIN_LIB_DIR}/StartInfo.h | ||||
| 		${MAIN_LIB_DIR}/StringConstants.h | ||||
| 		${MAIN_LIB_DIR}/TerrainHandler.h | ||||
| 		${MAIN_LIB_DIR}/TextOperations.h | ||||
| 		${MAIN_LIB_DIR}/UnlockGuard.h | ||||
| 		${MAIN_LIB_DIR}/VCMIDirs.h | ||||
| 		${MAIN_LIB_DIR}/vcmi_endian.h | ||||
|   | ||||
| @@ -10,100 +10,16 @@ | ||||
| #include "StdInc.h" | ||||
| #include "CGeneralTextHandler.h" | ||||
|  | ||||
| #include <boost/locale.hpp> | ||||
|  | ||||
| #include "filesystem/Filesystem.h" | ||||
| #include "CConfigHandler.h" | ||||
| #include "CModHandler.h" | ||||
| #include "GameConstants.h" | ||||
| #include "mapObjects/CQuest.h" | ||||
| #include "VCMI_Lib.h" | ||||
| #include "Languages.h" | ||||
| #include "TextOperations.h" | ||||
|  | ||||
| VCMI_LIB_NAMESPACE_BEGIN | ||||
|  | ||||
| size_t Unicode::getCharacterSize(char firstByte) | ||||
| { | ||||
| 	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1: | ||||
| 	// 0xxxxxxx -> 1 -  ASCII chars | ||||
| 	// 110xxxxx -> 2 | ||||
| 	// 11110xxx -> 4 - last allowed in current standard | ||||
| 	// 1111110x -> 6 - last allowed in original standard | ||||
|  | ||||
| 	if ((ui8)firstByte < 0x80) | ||||
| 		return 1; // ASCII | ||||
|  | ||||
| 	size_t ret = 0; | ||||
|  | ||||
| 	for (size_t i=0; i<8; i++) | ||||
| 	{ | ||||
| 		if (((ui8)firstByte & (0x80 >> i)) != 0) | ||||
| 			ret++; | ||||
| 		else | ||||
| 			break; | ||||
| 	} | ||||
| 	return ret; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidCharacter(const char * character, size_t maxSize) | ||||
| { | ||||
| 	// can't be first byte in UTF8 | ||||
| 	if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0) | ||||
| 		return false; | ||||
| 	// first character must follow rules checked in getCharacterSize | ||||
| 	size_t size = getCharacterSize((ui8)character[0]); | ||||
|  | ||||
| 	if ((ui8)character[0] > 0xF4) | ||||
| 		return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF) | ||||
|  | ||||
| 	if (size > maxSize) | ||||
| 		return false; | ||||
|  | ||||
| 	// remaining characters must have highest bit set to 1 | ||||
| 	for (size_t i = 1; i < size; i++) | ||||
| 	{ | ||||
| 		if (((ui8)character[i] & 0x80) == 0) | ||||
| 			return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidASCII(const std::string & text) | ||||
| { | ||||
| 	for (const char & ch : text) | ||||
| 		if (ui8(ch) >= 0x80 ) | ||||
| 			return false; | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidASCII(const char * data, size_t size) | ||||
| { | ||||
| 	for (size_t i=0; i<size; i++) | ||||
| 		if (ui8(data[i]) >= 0x80 ) | ||||
| 			return false; | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidString(const std::string & text) | ||||
| { | ||||
| 	for (size_t i=0; i<text.size(); i += getCharacterSize(text[i])) | ||||
| 	{ | ||||
| 		if (!isValidCharacter(text.data() + i, text.size() - i)) | ||||
| 			return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidString(const char * data, size_t size) | ||||
| { | ||||
| 	for (size_t i=0; i<size; i += getCharacterSize(data[i])) | ||||
| 	{ | ||||
| 		if (!isValidCharacter(data + i, size - i)) | ||||
| 			return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| /// Detects language and encoding of H3 text files based on matching against pregenerated footprints of H3 file | ||||
| void CGeneralTextHandler::detectInstallParameters() | ||||
| { | ||||
| @@ -173,55 +89,6 @@ void CGeneralTextHandler::detectInstallParameters() | ||||
| 	encoding->String() =  Languages::getLanguageOptions(knownLanguages[bestIndex]).encoding; | ||||
| } | ||||
|  | ||||
| std::string Unicode::toUnicode(const std::string &text) | ||||
| { | ||||
| 	return toUnicode(text, CGeneralTextHandler::getInstalledEncoding()); | ||||
| } | ||||
|  | ||||
| std::string Unicode::toUnicode(const std::string &text, const std::string &encoding) | ||||
| { | ||||
| 	return boost::locale::conv::to_utf<char>(text, encoding); | ||||
| } | ||||
|  | ||||
| std::string Unicode::fromUnicode(const std::string & text) | ||||
| { | ||||
| 	return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding()); | ||||
| } | ||||
|  | ||||
| std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding) | ||||
| { | ||||
| 	return boost::locale::conv::from_utf<char>(text, encoding); | ||||
| } | ||||
|  | ||||
| void Unicode::trimRight(std::string & text, const size_t amount) | ||||
| { | ||||
| 	if(text.empty()) | ||||
| 		return; | ||||
| 	//todo: more efficient algorithm | ||||
| 	for(int i = 0; i< amount; i++){ | ||||
| 		auto b = text.begin(); | ||||
| 		auto e = text.end(); | ||||
| 		size_t lastLen = 0; | ||||
| 		size_t len = 0; | ||||
| 		while (b != e) { | ||||
| 			lastLen = len; | ||||
| 			size_t n = getCharacterSize(*b); | ||||
|  | ||||
| 			if(!isValidCharacter(&(*b),e-b)) | ||||
| 			{ | ||||
| 				logGlobal->error("Invalid UTF8 sequence"); | ||||
| 				break;//invalid sequence will be trimmed | ||||
| 			} | ||||
|  | ||||
| 			len += n; | ||||
| 			b += n; | ||||
| 		} | ||||
|  | ||||
| 		text.resize(lastLen); | ||||
| 	} | ||||
| } | ||||
|  | ||||
|  | ||||
| //Helper for string -> float conversion | ||||
| class LocaleWithComma: public std::numpunct<char> | ||||
| { | ||||
| @@ -710,9 +577,9 @@ void CGeneralTextHandler::dumpAllTexts() | ||||
| 	for ( auto const & entry : stringsLocalizations) | ||||
| 	{ | ||||
| 		if (!entry.second.overrideValue.empty()) | ||||
| 			logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.overrideValue), entry.second.modContext, entry.second.overrideLanguage); | ||||
| 			logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.overrideValue)); | ||||
| 		else | ||||
| 			logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.baseValue), entry.second.modContext, entry.second.baseLanguage); | ||||
| 			logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.baseValue)); | ||||
| 	} | ||||
|  | ||||
| 	logGlobal->info("END TEXT EXPORT"); | ||||
|   | ||||
| @@ -9,42 +9,10 @@ | ||||
|  */ | ||||
| #pragma once | ||||
|  | ||||
| #include "JsonNode.h" | ||||
|  | ||||
| VCMI_LIB_NAMESPACE_BEGIN | ||||
|  | ||||
| /// Namespace that provides utilites for unicode support (UTF-8) | ||||
| namespace Unicode | ||||
| { | ||||
| 	/// evaluates size of UTF-8 character | ||||
| 	size_t DLL_LINKAGE getCharacterSize(char firstByte); | ||||
|  | ||||
| 	/// test if character is a valid UTF-8 symbol | ||||
| 	/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string) | ||||
| 	bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize); | ||||
|  | ||||
| 	/// test if text contains ASCII-string (no need for unicode conversion) | ||||
| 	bool DLL_LINKAGE isValidASCII(const std::string & text); | ||||
| 	bool DLL_LINKAGE isValidASCII(const char * data, size_t size); | ||||
|  | ||||
| 	/// test if text contains valid UTF-8 sequence | ||||
| 	bool DLL_LINKAGE isValidString(const std::string & text); | ||||
| 	bool DLL_LINKAGE isValidString(const char * data, size_t size); | ||||
|  | ||||
| 	/// converts text to unicode from specified encoding or from one specified in settings | ||||
| 	std::string DLL_LINKAGE toUnicode(const std::string & text); | ||||
| 	std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding); | ||||
|  | ||||
| 	/// converts text from unicode to specified encoding or to one specified in settings | ||||
| 	/// NOTE: usage of these functions should be avoided if possible | ||||
| 	std::string DLL_LINKAGE fromUnicode(const std::string & text); | ||||
| 	std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding); | ||||
|  | ||||
| 	///delete (amount) UTF characters from right | ||||
| 	DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1); | ||||
| }; | ||||
|  | ||||
| class CInputStream; | ||||
| class JsonNode; | ||||
|  | ||||
| /// Parser for any text files from H3 | ||||
| class DLL_LINKAGE CLegacyConfigParser | ||||
|   | ||||
| @@ -12,7 +12,7 @@ | ||||
| #include "JsonDetail.h" | ||||
|  | ||||
| #include "VCMI_Lib.h" | ||||
| #include "CGeneralTextHandler.h" | ||||
| #include "TextOperations.h" | ||||
| #include "CModHandler.h" | ||||
|  | ||||
| #include "filesystem/Filesystem.h" | ||||
|   | ||||
							
								
								
									
										149
									
								
								lib/TextOperations.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								lib/TextOperations.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,149 @@ | ||||
| /* | ||||
|  * TextOperations.cpp, part of VCMI engine | ||||
|  * | ||||
|  * Authors: listed in file AUTHORS in main folder | ||||
|  * | ||||
|  * License: GNU General Public License v2.0 or later | ||||
|  * Full text of license available in license.txt file, in main folder | ||||
|  * | ||||
|  */ | ||||
| #include "StdInc.h" | ||||
| #include "TextOperations.h" | ||||
|  | ||||
| #include "CGeneralTextHandler.h" | ||||
|  | ||||
| #include <boost/locale.hpp> | ||||
|  | ||||
| VCMI_LIB_NAMESPACE_BEGIN | ||||
|  | ||||
| size_t Unicode::getCharacterSize(char firstByte) | ||||
| { | ||||
| 	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1: | ||||
| 	// 0xxxxxxx -> 1 -  ASCII chars | ||||
| 	// 110xxxxx -> 2 | ||||
| 	// 11110xxx -> 4 - last allowed in current standard | ||||
| 	// 1111110x -> 6 - last allowed in original standard | ||||
|  | ||||
| 	if ((ui8)firstByte < 0x80) | ||||
| 		return 1; // ASCII | ||||
|  | ||||
| 	size_t ret = 0; | ||||
|  | ||||
| 	for (size_t i=0; i<8; i++) | ||||
| 	{ | ||||
| 		if (((ui8)firstByte & (0x80 >> i)) != 0) | ||||
| 			ret++; | ||||
| 		else | ||||
| 			break; | ||||
| 	} | ||||
| 	return ret; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidCharacter(const char * character, size_t maxSize) | ||||
| { | ||||
| 	// can't be first byte in UTF8 | ||||
| 	if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0) | ||||
| 		return false; | ||||
| 	// first character must follow rules checked in getCharacterSize | ||||
| 	size_t size = getCharacterSize((ui8)character[0]); | ||||
|  | ||||
| 	if ((ui8)character[0] > 0xF4) | ||||
| 		return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF) | ||||
|  | ||||
| 	if (size > maxSize) | ||||
| 		return false; | ||||
|  | ||||
| 	// remaining characters must have highest bit set to 1 | ||||
| 	for (size_t i = 1; i < size; i++) | ||||
| 	{ | ||||
| 		if (((ui8)character[i] & 0x80) == 0) | ||||
| 			return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidASCII(const std::string & text) | ||||
| { | ||||
| 	for (const char & ch : text) | ||||
| 		if (ui8(ch) >= 0x80 ) | ||||
| 			return false; | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidASCII(const char * data, size_t size) | ||||
| { | ||||
| 	for (size_t i=0; i<size; i++) | ||||
| 		if (ui8(data[i]) >= 0x80 ) | ||||
| 			return false; | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidString(const std::string & text) | ||||
| { | ||||
| 	for (size_t i=0; i<text.size(); i += getCharacterSize(text[i])) | ||||
| 	{ | ||||
| 		if (!isValidCharacter(text.data() + i, text.size() - i)) | ||||
| 			return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidString(const char * data, size_t size) | ||||
| { | ||||
| 	for (size_t i=0; i<size; i += getCharacterSize(data[i])) | ||||
| 	{ | ||||
| 		if (!isValidCharacter(data + i, size - i)) | ||||
| 			return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| std::string Unicode::toUnicode(const std::string &text) | ||||
| { | ||||
| 	return toUnicode(text, CGeneralTextHandler::getInstalledEncoding()); | ||||
| } | ||||
|  | ||||
| std::string Unicode::toUnicode(const std::string &text, const std::string &encoding) | ||||
| { | ||||
| 	return boost::locale::conv::to_utf<char>(text, encoding); | ||||
| } | ||||
|  | ||||
| std::string Unicode::fromUnicode(const std::string & text) | ||||
| { | ||||
| 	return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding()); | ||||
| } | ||||
|  | ||||
| std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding) | ||||
| { | ||||
| 	return boost::locale::conv::from_utf<char>(text, encoding); | ||||
| } | ||||
|  | ||||
| void Unicode::trimRight(std::string & text, const size_t amount) | ||||
| { | ||||
| 	if(text.empty()) | ||||
| 		return; | ||||
| 	//todo: more efficient algorithm | ||||
| 	for(int i = 0; i< amount; i++){ | ||||
| 		auto b = text.begin(); | ||||
| 		auto e = text.end(); | ||||
| 		size_t lastLen = 0; | ||||
| 		size_t len = 0; | ||||
| 		while (b != e) { | ||||
| 			lastLen = len; | ||||
| 			size_t n = getCharacterSize(*b); | ||||
|  | ||||
| 			if(!isValidCharacter(&(*b),e-b)) | ||||
| 			{ | ||||
| 				logGlobal->error("Invalid UTF8 sequence"); | ||||
| 				break;//invalid sequence will be trimmed | ||||
| 			} | ||||
|  | ||||
| 			len += n; | ||||
| 			b += n; | ||||
| 		} | ||||
|  | ||||
| 		text.resize(lastLen); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| VCMI_LIB_NAMESPACE_END | ||||
							
								
								
									
										45
									
								
								lib/TextOperations.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								lib/TextOperations.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| /* | ||||
|  * TextOperations.h, part of VCMI engine | ||||
|  * | ||||
|  * Authors: listed in file AUTHORS in main folder | ||||
|  * | ||||
|  * License: GNU General Public License v2.0 or later | ||||
|  * Full text of license available in license.txt file, in main folder | ||||
|  * | ||||
|  */ | ||||
| #pragma once | ||||
|  | ||||
| VCMI_LIB_NAMESPACE_BEGIN | ||||
|  | ||||
| /// Namespace that provides utilites for unicode support (UTF-8) | ||||
| namespace Unicode | ||||
| { | ||||
| 	/// evaluates size of UTF-8 character | ||||
| 	size_t DLL_LINKAGE getCharacterSize(char firstByte); | ||||
|  | ||||
| 	/// test if character is a valid UTF-8 symbol | ||||
| 	/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string) | ||||
| 	bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize); | ||||
|  | ||||
| 	/// test if text contains ASCII-string (no need for unicode conversion) | ||||
| 	bool DLL_LINKAGE isValidASCII(const std::string & text); | ||||
| 	bool DLL_LINKAGE isValidASCII(const char * data, size_t size); | ||||
|  | ||||
| 	/// test if text contains valid UTF-8 sequence | ||||
| 	bool DLL_LINKAGE isValidString(const std::string & text); | ||||
| 	bool DLL_LINKAGE isValidString(const char * data, size_t size); | ||||
|  | ||||
| 	/// converts text to unicode from specified encoding or from one specified in settings | ||||
| 	std::string DLL_LINKAGE toUnicode(const std::string & text); | ||||
| 	std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding); | ||||
|  | ||||
| 	/// converts text from unicode to specified encoding or to one specified in settings | ||||
| 	/// NOTE: usage of these functions should be avoided if possible | ||||
| 	std::string DLL_LINKAGE fromUnicode(const std::string & text); | ||||
| 	std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding); | ||||
|  | ||||
| 	///delete (amount) UTF characters from right | ||||
| 	DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1); | ||||
| }; | ||||
|  | ||||
| VCMI_LIB_NAMESPACE_END | ||||
| @@ -11,7 +11,7 @@ | ||||
| #include "CBinaryReader.h" | ||||
|  | ||||
| #include "CInputStream.h" | ||||
| #include "../CGeneralTextHandler.h" | ||||
| #include "../TextOperations.h" | ||||
|  | ||||
| VCMI_LIB_NAMESPACE_BEGIN | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user