mirror of
				https://github.com/vcmi/vcmi.git
				synced 2025-10-31 00:07:39 +02:00 
			
		
		
		
	Moved Unicode namespace out of GeneralTextHandler file
This commit is contained in:
		| @@ -20,7 +20,7 @@ | |||||||
|  |  | ||||||
| #include "../../CCallback.h" | #include "../../CCallback.h" | ||||||
| #include "../../lib/CConfigHandler.h" | #include "../../lib/CConfigHandler.h" | ||||||
| #include "../../lib/CGeneralTextHandler.h" | #include "../../lib/TextOperations.h" | ||||||
| #include "../../lib/mapObjects/CArmedInstance.h" | #include "../../lib/mapObjects/CArmedInstance.h" | ||||||
|  |  | ||||||
| #include <SDL_timer.h> | #include <SDL_timer.h> | ||||||
|   | |||||||
| @@ -12,7 +12,7 @@ | |||||||
| #include "IFont.h" | #include "IFont.h" | ||||||
|  |  | ||||||
| #include "../../lib/Point.h" | #include "../../lib/Point.h" | ||||||
| #include "../../lib/CGeneralTextHandler.h" | #include "../../lib/TextOperations.h" | ||||||
| // | // | ||||||
|  |  | ||||||
| size_t IFont::getStringWidth(const std::string & data) const | size_t IFont::getStringWidth(const std::string & data) const | ||||||
|   | |||||||
| @@ -14,7 +14,7 @@ | |||||||
|  |  | ||||||
| #include "../../lib/vcmi_endian.h" | #include "../../lib/vcmi_endian.h" | ||||||
| #include "../../lib/filesystem/Filesystem.h" | #include "../../lib/filesystem/Filesystem.h" | ||||||
| #include "../../lib/CGeneralTextHandler.h" | #include "../../lib/TextOperations.h" | ||||||
| #include "../../lib/Rect.h" | #include "../../lib/Rect.h" | ||||||
|  |  | ||||||
| #include <SDL_surface.h> | #include <SDL_surface.h> | ||||||
|   | |||||||
| @@ -15,7 +15,7 @@ | |||||||
|  |  | ||||||
| #include "../../lib/JsonNode.h" | #include "../../lib/JsonNode.h" | ||||||
| #include "../../lib/filesystem/Filesystem.h" | #include "../../lib/filesystem/Filesystem.h" | ||||||
| #include "../../lib/CGeneralTextHandler.h" | #include "../../lib/TextOperations.h" | ||||||
| #include "../../lib/Rect.h" | #include "../../lib/Rect.h" | ||||||
|  |  | ||||||
| #include <SDL_surface.h> | #include <SDL_surface.h> | ||||||
|   | |||||||
| @@ -14,7 +14,7 @@ | |||||||
| #include "../renderSDL/SDL_Extensions.h" | #include "../renderSDL/SDL_Extensions.h" | ||||||
|  |  | ||||||
| #include "../../lib/JsonNode.h" | #include "../../lib/JsonNode.h" | ||||||
| #include "../../lib/CGeneralTextHandler.h" | #include "../../lib/TextOperations.h" | ||||||
| #include "../../lib/filesystem/Filesystem.h" | #include "../../lib/filesystem/Filesystem.h" | ||||||
|  |  | ||||||
| #include <SDL_ttf.h> | #include <SDL_ttf.h> | ||||||
|   | |||||||
| @@ -19,7 +19,7 @@ | |||||||
| #include "../adventureMap/CInGameConsole.h" | #include "../adventureMap/CInGameConsole.h" | ||||||
| #include "../renderSDL/SDL_Extensions.h" | #include "../renderSDL/SDL_Extensions.h" | ||||||
|  |  | ||||||
| #include "../../lib/CGeneralTextHandler.h" | #include "../../lib/TextOperations.h" | ||||||
|  |  | ||||||
| #ifdef VCMI_ANDROID | #ifdef VCMI_ANDROID | ||||||
| #include "lib/CAndroidVMHelper.h" | #include "lib/CAndroidVMHelper.h" | ||||||
|   | |||||||
| @@ -13,6 +13,7 @@ | |||||||
|  |  | ||||||
| #include "../CGameInfo.h" | #include "../CGameInfo.h" | ||||||
| #include "../../lib/CGeneralTextHandler.h" | #include "../../lib/CGeneralTextHandler.h" | ||||||
|  | #include "../../lib/TextOperations.h" | ||||||
|  |  | ||||||
| #include "../windows/InfoWindows.h" | #include "../windows/InfoWindows.h" | ||||||
| #include "../widgets/Buttons.h" | #include "../widgets/Buttons.h" | ||||||
|   | |||||||
| @@ -197,6 +197,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE) | |||||||
| 		${MAIN_LIB_DIR}/RoadHandler.cpp | 		${MAIN_LIB_DIR}/RoadHandler.cpp | ||||||
| 		${MAIN_LIB_DIR}/ScriptHandler.cpp | 		${MAIN_LIB_DIR}/ScriptHandler.cpp | ||||||
| 		${MAIN_LIB_DIR}/TerrainHandler.cpp | 		${MAIN_LIB_DIR}/TerrainHandler.cpp | ||||||
|  | 		${MAIN_LIB_DIR}/TextOperations.cpp | ||||||
| 		${MAIN_LIB_DIR}/VCMIDirs.cpp | 		${MAIN_LIB_DIR}/VCMIDirs.cpp | ||||||
| 		${MAIN_LIB_DIR}/VCMI_Lib.cpp | 		${MAIN_LIB_DIR}/VCMI_Lib.cpp | ||||||
| 	) | 	) | ||||||
| @@ -454,6 +455,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE) | |||||||
| 		${MAIN_LIB_DIR}/StartInfo.h | 		${MAIN_LIB_DIR}/StartInfo.h | ||||||
| 		${MAIN_LIB_DIR}/StringConstants.h | 		${MAIN_LIB_DIR}/StringConstants.h | ||||||
| 		${MAIN_LIB_DIR}/TerrainHandler.h | 		${MAIN_LIB_DIR}/TerrainHandler.h | ||||||
|  | 		${MAIN_LIB_DIR}/TextOperations.h | ||||||
| 		${MAIN_LIB_DIR}/UnlockGuard.h | 		${MAIN_LIB_DIR}/UnlockGuard.h | ||||||
| 		${MAIN_LIB_DIR}/VCMIDirs.h | 		${MAIN_LIB_DIR}/VCMIDirs.h | ||||||
| 		${MAIN_LIB_DIR}/vcmi_endian.h | 		${MAIN_LIB_DIR}/vcmi_endian.h | ||||||
|   | |||||||
| @@ -10,100 +10,16 @@ | |||||||
| #include "StdInc.h" | #include "StdInc.h" | ||||||
| #include "CGeneralTextHandler.h" | #include "CGeneralTextHandler.h" | ||||||
|  |  | ||||||
| #include <boost/locale.hpp> |  | ||||||
|  |  | ||||||
| #include "filesystem/Filesystem.h" | #include "filesystem/Filesystem.h" | ||||||
| #include "CConfigHandler.h" | #include "CConfigHandler.h" | ||||||
| #include "CModHandler.h" | #include "CModHandler.h" | ||||||
| #include "GameConstants.h" |  | ||||||
| #include "mapObjects/CQuest.h" | #include "mapObjects/CQuest.h" | ||||||
| #include "VCMI_Lib.h" | #include "VCMI_Lib.h" | ||||||
| #include "Languages.h" | #include "Languages.h" | ||||||
|  | #include "TextOperations.h" | ||||||
|  |  | ||||||
| VCMI_LIB_NAMESPACE_BEGIN | VCMI_LIB_NAMESPACE_BEGIN | ||||||
|  |  | ||||||
| size_t Unicode::getCharacterSize(char firstByte) |  | ||||||
| { |  | ||||||
| 	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1: |  | ||||||
| 	// 0xxxxxxx -> 1 -  ASCII chars |  | ||||||
| 	// 110xxxxx -> 2 |  | ||||||
| 	// 11110xxx -> 4 - last allowed in current standard |  | ||||||
| 	// 1111110x -> 6 - last allowed in original standard |  | ||||||
|  |  | ||||||
| 	if ((ui8)firstByte < 0x80) |  | ||||||
| 		return 1; // ASCII |  | ||||||
|  |  | ||||||
| 	size_t ret = 0; |  | ||||||
|  |  | ||||||
| 	for (size_t i=0; i<8; i++) |  | ||||||
| 	{ |  | ||||||
| 		if (((ui8)firstByte & (0x80 >> i)) != 0) |  | ||||||
| 			ret++; |  | ||||||
| 		else |  | ||||||
| 			break; |  | ||||||
| 	} |  | ||||||
| 	return ret; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool Unicode::isValidCharacter(const char * character, size_t maxSize) |  | ||||||
| { |  | ||||||
| 	// can't be first byte in UTF8 |  | ||||||
| 	if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0) |  | ||||||
| 		return false; |  | ||||||
| 	// first character must follow rules checked in getCharacterSize |  | ||||||
| 	size_t size = getCharacterSize((ui8)character[0]); |  | ||||||
|  |  | ||||||
| 	if ((ui8)character[0] > 0xF4) |  | ||||||
| 		return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF) |  | ||||||
|  |  | ||||||
| 	if (size > maxSize) |  | ||||||
| 		return false; |  | ||||||
|  |  | ||||||
| 	// remaining characters must have highest bit set to 1 |  | ||||||
| 	for (size_t i = 1; i < size; i++) |  | ||||||
| 	{ |  | ||||||
| 		if (((ui8)character[i] & 0x80) == 0) |  | ||||||
| 			return false; |  | ||||||
| 	} |  | ||||||
| 	return true; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool Unicode::isValidASCII(const std::string & text) |  | ||||||
| { |  | ||||||
| 	for (const char & ch : text) |  | ||||||
| 		if (ui8(ch) >= 0x80 ) |  | ||||||
| 			return false; |  | ||||||
| 	return true; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool Unicode::isValidASCII(const char * data, size_t size) |  | ||||||
| { |  | ||||||
| 	for (size_t i=0; i<size; i++) |  | ||||||
| 		if (ui8(data[i]) >= 0x80 ) |  | ||||||
| 			return false; |  | ||||||
| 	return true; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool Unicode::isValidString(const std::string & text) |  | ||||||
| { |  | ||||||
| 	for (size_t i=0; i<text.size(); i += getCharacterSize(text[i])) |  | ||||||
| 	{ |  | ||||||
| 		if (!isValidCharacter(text.data() + i, text.size() - i)) |  | ||||||
| 			return false; |  | ||||||
| 	} |  | ||||||
| 	return true; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool Unicode::isValidString(const char * data, size_t size) |  | ||||||
| { |  | ||||||
| 	for (size_t i=0; i<size; i += getCharacterSize(data[i])) |  | ||||||
| 	{ |  | ||||||
| 		if (!isValidCharacter(data + i, size - i)) |  | ||||||
| 			return false; |  | ||||||
| 	} |  | ||||||
| 	return true; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// Detects language and encoding of H3 text files based on matching against pregenerated footprints of H3 file | /// Detects language and encoding of H3 text files based on matching against pregenerated footprints of H3 file | ||||||
| void CGeneralTextHandler::detectInstallParameters() | void CGeneralTextHandler::detectInstallParameters() | ||||||
| { | { | ||||||
| @@ -173,55 +89,6 @@ void CGeneralTextHandler::detectInstallParameters() | |||||||
| 	encoding->String() =  Languages::getLanguageOptions(knownLanguages[bestIndex]).encoding; | 	encoding->String() =  Languages::getLanguageOptions(knownLanguages[bestIndex]).encoding; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::string Unicode::toUnicode(const std::string &text) |  | ||||||
| { |  | ||||||
| 	return toUnicode(text, CGeneralTextHandler::getInstalledEncoding()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::string Unicode::toUnicode(const std::string &text, const std::string &encoding) |  | ||||||
| { |  | ||||||
| 	return boost::locale::conv::to_utf<char>(text, encoding); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::string Unicode::fromUnicode(const std::string & text) |  | ||||||
| { |  | ||||||
| 	return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding) |  | ||||||
| { |  | ||||||
| 	return boost::locale::conv::from_utf<char>(text, encoding); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void Unicode::trimRight(std::string & text, const size_t amount) |  | ||||||
| { |  | ||||||
| 	if(text.empty()) |  | ||||||
| 		return; |  | ||||||
| 	//todo: more efficient algorithm |  | ||||||
| 	for(int i = 0; i< amount; i++){ |  | ||||||
| 		auto b = text.begin(); |  | ||||||
| 		auto e = text.end(); |  | ||||||
| 		size_t lastLen = 0; |  | ||||||
| 		size_t len = 0; |  | ||||||
| 		while (b != e) { |  | ||||||
| 			lastLen = len; |  | ||||||
| 			size_t n = getCharacterSize(*b); |  | ||||||
|  |  | ||||||
| 			if(!isValidCharacter(&(*b),e-b)) |  | ||||||
| 			{ |  | ||||||
| 				logGlobal->error("Invalid UTF8 sequence"); |  | ||||||
| 				break;//invalid sequence will be trimmed |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			len += n; |  | ||||||
| 			b += n; |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		text.resize(lastLen); |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| //Helper for string -> float conversion | //Helper for string -> float conversion | ||||||
| class LocaleWithComma: public std::numpunct<char> | class LocaleWithComma: public std::numpunct<char> | ||||||
| { | { | ||||||
| @@ -710,9 +577,9 @@ void CGeneralTextHandler::dumpAllTexts() | |||||||
| 	for ( auto const & entry : stringsLocalizations) | 	for ( auto const & entry : stringsLocalizations) | ||||||
| 	{ | 	{ | ||||||
| 		if (!entry.second.overrideValue.empty()) | 		if (!entry.second.overrideValue.empty()) | ||||||
| 			logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.overrideValue), entry.second.modContext, entry.second.overrideLanguage); | 			logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.overrideValue)); | ||||||
| 		else | 		else | ||||||
| 			logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.baseValue), entry.second.modContext, entry.second.baseLanguage); | 			logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.baseValue)); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	logGlobal->info("END TEXT EXPORT"); | 	logGlobal->info("END TEXT EXPORT"); | ||||||
|   | |||||||
| @@ -9,42 +9,10 @@ | |||||||
|  */ |  */ | ||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
| #include "JsonNode.h" |  | ||||||
|  |  | ||||||
| VCMI_LIB_NAMESPACE_BEGIN | VCMI_LIB_NAMESPACE_BEGIN | ||||||
|  |  | ||||||
| /// Namespace that provides utilites for unicode support (UTF-8) |  | ||||||
| namespace Unicode |  | ||||||
| { |  | ||||||
| 	/// evaluates size of UTF-8 character |  | ||||||
| 	size_t DLL_LINKAGE getCharacterSize(char firstByte); |  | ||||||
|  |  | ||||||
| 	/// test if character is a valid UTF-8 symbol |  | ||||||
| 	/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string) |  | ||||||
| 	bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize); |  | ||||||
|  |  | ||||||
| 	/// test if text contains ASCII-string (no need for unicode conversion) |  | ||||||
| 	bool DLL_LINKAGE isValidASCII(const std::string & text); |  | ||||||
| 	bool DLL_LINKAGE isValidASCII(const char * data, size_t size); |  | ||||||
|  |  | ||||||
| 	/// test if text contains valid UTF-8 sequence |  | ||||||
| 	bool DLL_LINKAGE isValidString(const std::string & text); |  | ||||||
| 	bool DLL_LINKAGE isValidString(const char * data, size_t size); |  | ||||||
|  |  | ||||||
| 	/// converts text to unicode from specified encoding or from one specified in settings |  | ||||||
| 	std::string DLL_LINKAGE toUnicode(const std::string & text); |  | ||||||
| 	std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding); |  | ||||||
|  |  | ||||||
| 	/// converts text from unicode to specified encoding or to one specified in settings |  | ||||||
| 	/// NOTE: usage of these functions should be avoided if possible |  | ||||||
| 	std::string DLL_LINKAGE fromUnicode(const std::string & text); |  | ||||||
| 	std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding); |  | ||||||
|  |  | ||||||
| 	///delete (amount) UTF characters from right |  | ||||||
| 	DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class CInputStream; | class CInputStream; | ||||||
|  | class JsonNode; | ||||||
|  |  | ||||||
| /// Parser for any text files from H3 | /// Parser for any text files from H3 | ||||||
| class DLL_LINKAGE CLegacyConfigParser | class DLL_LINKAGE CLegacyConfigParser | ||||||
|   | |||||||
| @@ -12,7 +12,7 @@ | |||||||
| #include "JsonDetail.h" | #include "JsonDetail.h" | ||||||
|  |  | ||||||
| #include "VCMI_Lib.h" | #include "VCMI_Lib.h" | ||||||
| #include "CGeneralTextHandler.h" | #include "TextOperations.h" | ||||||
| #include "CModHandler.h" | #include "CModHandler.h" | ||||||
|  |  | ||||||
| #include "filesystem/Filesystem.h" | #include "filesystem/Filesystem.h" | ||||||
|   | |||||||
							
								
								
									
										149
									
								
								lib/TextOperations.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								lib/TextOperations.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,149 @@ | |||||||
|  | /* | ||||||
|  |  * TextOperations.cpp, part of VCMI engine | ||||||
|  |  * | ||||||
|  |  * Authors: listed in file AUTHORS in main folder | ||||||
|  |  * | ||||||
|  |  * License: GNU General Public License v2.0 or later | ||||||
|  |  * Full text of license available in license.txt file, in main folder | ||||||
|  |  * | ||||||
|  |  */ | ||||||
|  | #include "StdInc.h" | ||||||
|  | #include "TextOperations.h" | ||||||
|  |  | ||||||
|  | #include "CGeneralTextHandler.h" | ||||||
|  |  | ||||||
|  | #include <boost/locale.hpp> | ||||||
|  |  | ||||||
|  | VCMI_LIB_NAMESPACE_BEGIN | ||||||
|  |  | ||||||
|  | size_t Unicode::getCharacterSize(char firstByte) | ||||||
|  | { | ||||||
|  | 	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1: | ||||||
|  | 	// 0xxxxxxx -> 1 -  ASCII chars | ||||||
|  | 	// 110xxxxx -> 2 | ||||||
|  | 	// 11110xxx -> 4 - last allowed in current standard | ||||||
|  | 	// 1111110x -> 6 - last allowed in original standard | ||||||
|  |  | ||||||
|  | 	if ((ui8)firstByte < 0x80) | ||||||
|  | 		return 1; // ASCII | ||||||
|  |  | ||||||
|  | 	size_t ret = 0; | ||||||
|  |  | ||||||
|  | 	for (size_t i=0; i<8; i++) | ||||||
|  | 	{ | ||||||
|  | 		if (((ui8)firstByte & (0x80 >> i)) != 0) | ||||||
|  | 			ret++; | ||||||
|  | 		else | ||||||
|  | 			break; | ||||||
|  | 	} | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool Unicode::isValidCharacter(const char * character, size_t maxSize) | ||||||
|  | { | ||||||
|  | 	// can't be first byte in UTF8 | ||||||
|  | 	if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0) | ||||||
|  | 		return false; | ||||||
|  | 	// first character must follow rules checked in getCharacterSize | ||||||
|  | 	size_t size = getCharacterSize((ui8)character[0]); | ||||||
|  |  | ||||||
|  | 	if ((ui8)character[0] > 0xF4) | ||||||
|  | 		return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF) | ||||||
|  |  | ||||||
|  | 	if (size > maxSize) | ||||||
|  | 		return false; | ||||||
|  |  | ||||||
|  | 	// remaining characters must have highest bit set to 1 | ||||||
|  | 	for (size_t i = 1; i < size; i++) | ||||||
|  | 	{ | ||||||
|  | 		if (((ui8)character[i] & 0x80) == 0) | ||||||
|  | 			return false; | ||||||
|  | 	} | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool Unicode::isValidASCII(const std::string & text) | ||||||
|  | { | ||||||
|  | 	for (const char & ch : text) | ||||||
|  | 		if (ui8(ch) >= 0x80 ) | ||||||
|  | 			return false; | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool Unicode::isValidASCII(const char * data, size_t size) | ||||||
|  | { | ||||||
|  | 	for (size_t i=0; i<size; i++) | ||||||
|  | 		if (ui8(data[i]) >= 0x80 ) | ||||||
|  | 			return false; | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool Unicode::isValidString(const std::string & text) | ||||||
|  | { | ||||||
|  | 	for (size_t i=0; i<text.size(); i += getCharacterSize(text[i])) | ||||||
|  | 	{ | ||||||
|  | 		if (!isValidCharacter(text.data() + i, text.size() - i)) | ||||||
|  | 			return false; | ||||||
|  | 	} | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool Unicode::isValidString(const char * data, size_t size) | ||||||
|  | { | ||||||
|  | 	for (size_t i=0; i<size; i += getCharacterSize(data[i])) | ||||||
|  | 	{ | ||||||
|  | 		if (!isValidCharacter(data + i, size - i)) | ||||||
|  | 			return false; | ||||||
|  | 	} | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::string Unicode::toUnicode(const std::string &text) | ||||||
|  | { | ||||||
|  | 	return toUnicode(text, CGeneralTextHandler::getInstalledEncoding()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::string Unicode::toUnicode(const std::string &text, const std::string &encoding) | ||||||
|  | { | ||||||
|  | 	return boost::locale::conv::to_utf<char>(text, encoding); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::string Unicode::fromUnicode(const std::string & text) | ||||||
|  | { | ||||||
|  | 	return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding) | ||||||
|  | { | ||||||
|  | 	return boost::locale::conv::from_utf<char>(text, encoding); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Unicode::trimRight(std::string & text, const size_t amount) | ||||||
|  | { | ||||||
|  | 	if(text.empty()) | ||||||
|  | 		return; | ||||||
|  | 	//todo: more efficient algorithm | ||||||
|  | 	for(int i = 0; i< amount; i++){ | ||||||
|  | 		auto b = text.begin(); | ||||||
|  | 		auto e = text.end(); | ||||||
|  | 		size_t lastLen = 0; | ||||||
|  | 		size_t len = 0; | ||||||
|  | 		while (b != e) { | ||||||
|  | 			lastLen = len; | ||||||
|  | 			size_t n = getCharacterSize(*b); | ||||||
|  |  | ||||||
|  | 			if(!isValidCharacter(&(*b),e-b)) | ||||||
|  | 			{ | ||||||
|  | 				logGlobal->error("Invalid UTF8 sequence"); | ||||||
|  | 				break;//invalid sequence will be trimmed | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			len += n; | ||||||
|  | 			b += n; | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		text.resize(lastLen); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | VCMI_LIB_NAMESPACE_END | ||||||
							
								
								
									
										45
									
								
								lib/TextOperations.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								lib/TextOperations.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | |||||||
|  | /* | ||||||
|  |  * TextOperations.h, part of VCMI engine | ||||||
|  |  * | ||||||
|  |  * Authors: listed in file AUTHORS in main folder | ||||||
|  |  * | ||||||
|  |  * License: GNU General Public License v2.0 or later | ||||||
|  |  * Full text of license available in license.txt file, in main folder | ||||||
|  |  * | ||||||
|  |  */ | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | VCMI_LIB_NAMESPACE_BEGIN | ||||||
|  |  | ||||||
|  | /// Namespace that provides utilites for unicode support (UTF-8) | ||||||
|  | namespace Unicode | ||||||
|  | { | ||||||
|  | 	/// evaluates size of UTF-8 character | ||||||
|  | 	size_t DLL_LINKAGE getCharacterSize(char firstByte); | ||||||
|  |  | ||||||
|  | 	/// test if character is a valid UTF-8 symbol | ||||||
|  | 	/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string) | ||||||
|  | 	bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize); | ||||||
|  |  | ||||||
|  | 	/// test if text contains ASCII-string (no need for unicode conversion) | ||||||
|  | 	bool DLL_LINKAGE isValidASCII(const std::string & text); | ||||||
|  | 	bool DLL_LINKAGE isValidASCII(const char * data, size_t size); | ||||||
|  |  | ||||||
|  | 	/// test if text contains valid UTF-8 sequence | ||||||
|  | 	bool DLL_LINKAGE isValidString(const std::string & text); | ||||||
|  | 	bool DLL_LINKAGE isValidString(const char * data, size_t size); | ||||||
|  |  | ||||||
|  | 	/// converts text to unicode from specified encoding or from one specified in settings | ||||||
|  | 	std::string DLL_LINKAGE toUnicode(const std::string & text); | ||||||
|  | 	std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding); | ||||||
|  |  | ||||||
|  | 	/// converts text from unicode to specified encoding or to one specified in settings | ||||||
|  | 	/// NOTE: usage of these functions should be avoided if possible | ||||||
|  | 	std::string DLL_LINKAGE fromUnicode(const std::string & text); | ||||||
|  | 	std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding); | ||||||
|  |  | ||||||
|  | 	///delete (amount) UTF characters from right | ||||||
|  | 	DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | VCMI_LIB_NAMESPACE_END | ||||||
| @@ -11,7 +11,7 @@ | |||||||
| #include "CBinaryReader.h" | #include "CBinaryReader.h" | ||||||
|  |  | ||||||
| #include "CInputStream.h" | #include "CInputStream.h" | ||||||
| #include "../CGeneralTextHandler.h" | #include "../TextOperations.h" | ||||||
|  |  | ||||||
| VCMI_LIB_NAMESPACE_BEGIN | VCMI_LIB_NAMESPACE_BEGIN | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user