diff --git a/client/adventureMap/CInGameConsole.cpp b/client/adventureMap/CInGameConsole.cpp index 57a67c09b..aa1227f36 100644 --- a/client/adventureMap/CInGameConsole.cpp +++ b/client/adventureMap/CInGameConsole.cpp @@ -20,7 +20,7 @@ #include "../../CCallback.h" #include "../../lib/CConfigHandler.h" -#include "../../lib/CGeneralTextHandler.h" +#include "../../lib/TextOperations.h" #include "../../lib/mapObjects/CArmedInstance.h" #include diff --git a/client/render/IFont.cpp b/client/render/IFont.cpp index 0086cd9ee..5575dd19a 100644 --- a/client/render/IFont.cpp +++ b/client/render/IFont.cpp @@ -12,7 +12,7 @@ #include "IFont.h" #include "../../lib/Point.h" -#include "../../lib/CGeneralTextHandler.h" +#include "../../lib/TextOperations.h" // size_t IFont::getStringWidth(const std::string & data) const diff --git a/client/renderSDL/CBitmapFont.cpp b/client/renderSDL/CBitmapFont.cpp index f2eb49d00..531e7f391 100644 --- a/client/renderSDL/CBitmapFont.cpp +++ b/client/renderSDL/CBitmapFont.cpp @@ -14,7 +14,7 @@ #include "../../lib/vcmi_endian.h" #include "../../lib/filesystem/Filesystem.h" -#include "../../lib/CGeneralTextHandler.h" +#include "../../lib/TextOperations.h" #include "../../lib/Rect.h" #include diff --git a/client/renderSDL/CBitmapHanFont.cpp b/client/renderSDL/CBitmapHanFont.cpp index bcaa94d59..663cb3467 100644 --- a/client/renderSDL/CBitmapHanFont.cpp +++ b/client/renderSDL/CBitmapHanFont.cpp @@ -15,7 +15,7 @@ #include "../../lib/JsonNode.h" #include "../../lib/filesystem/Filesystem.h" -#include "../../lib/CGeneralTextHandler.h" +#include "../../lib/TextOperations.h" #include "../../lib/Rect.h" #include diff --git a/client/renderSDL/CTrueTypeFont.cpp b/client/renderSDL/CTrueTypeFont.cpp index d05cf42c3..c1c97bb8d 100644 --- a/client/renderSDL/CTrueTypeFont.cpp +++ b/client/renderSDL/CTrueTypeFont.cpp @@ -14,7 +14,7 @@ #include "../renderSDL/SDL_Extensions.h" #include "../../lib/JsonNode.h" -#include "../../lib/CGeneralTextHandler.h" +#include "../../lib/TextOperations.h" #include "../../lib/filesystem/Filesystem.h" #include diff --git a/client/widgets/TextControls.cpp b/client/widgets/TextControls.cpp index b9cc7dbb0..e5f163d95 100644 --- a/client/widgets/TextControls.cpp +++ b/client/widgets/TextControls.cpp @@ -19,7 +19,7 @@ #include "../adventureMap/CInGameConsole.h" #include "../renderSDL/SDL_Extensions.h" -#include "../../lib/CGeneralTextHandler.h" +#include "../../lib/TextOperations.h" #ifdef VCMI_ANDROID #include "lib/CAndroidVMHelper.h" diff --git a/client/windows/CMessage.cpp b/client/windows/CMessage.cpp index 27b7c825a..1a513f838 100644 --- a/client/windows/CMessage.cpp +++ b/client/windows/CMessage.cpp @@ -13,6 +13,7 @@ #include "../CGameInfo.h" #include "../../lib/CGeneralTextHandler.h" +#include "../../lib/TextOperations.h" #include "../windows/InfoWindows.h" #include "../widgets/Buttons.h" diff --git a/cmake_modules/VCMI_lib.cmake b/cmake_modules/VCMI_lib.cmake index 8f3f44bed..041bc386b 100644 --- a/cmake_modules/VCMI_lib.cmake +++ b/cmake_modules/VCMI_lib.cmake @@ -197,6 +197,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE) ${MAIN_LIB_DIR}/RoadHandler.cpp ${MAIN_LIB_DIR}/ScriptHandler.cpp ${MAIN_LIB_DIR}/TerrainHandler.cpp + ${MAIN_LIB_DIR}/TextOperations.cpp ${MAIN_LIB_DIR}/VCMIDirs.cpp ${MAIN_LIB_DIR}/VCMI_Lib.cpp ) @@ -454,6 +455,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE) ${MAIN_LIB_DIR}/StartInfo.h ${MAIN_LIB_DIR}/StringConstants.h ${MAIN_LIB_DIR}/TerrainHandler.h + ${MAIN_LIB_DIR}/TextOperations.h ${MAIN_LIB_DIR}/UnlockGuard.h ${MAIN_LIB_DIR}/VCMIDirs.h ${MAIN_LIB_DIR}/vcmi_endian.h diff --git a/lib/CGeneralTextHandler.cpp b/lib/CGeneralTextHandler.cpp index 59c375429..e0c1cbeb8 100644 --- a/lib/CGeneralTextHandler.cpp +++ b/lib/CGeneralTextHandler.cpp @@ -10,100 +10,16 @@ #include "StdInc.h" #include "CGeneralTextHandler.h" -#include - #include "filesystem/Filesystem.h" #include "CConfigHandler.h" #include "CModHandler.h" -#include "GameConstants.h" #include "mapObjects/CQuest.h" #include "VCMI_Lib.h" #include "Languages.h" +#include "TextOperations.h" VCMI_LIB_NAMESPACE_BEGIN -size_t Unicode::getCharacterSize(char firstByte) -{ - // length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1: - // 0xxxxxxx -> 1 - ASCII chars - // 110xxxxx -> 2 - // 11110xxx -> 4 - last allowed in current standard - // 1111110x -> 6 - last allowed in original standard - - if ((ui8)firstByte < 0x80) - return 1; // ASCII - - size_t ret = 0; - - for (size_t i=0; i<8; i++) - { - if (((ui8)firstByte & (0x80 >> i)) != 0) - ret++; - else - break; - } - return ret; -} - -bool Unicode::isValidCharacter(const char * character, size_t maxSize) -{ - // can't be first byte in UTF8 - if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0) - return false; - // first character must follow rules checked in getCharacterSize - size_t size = getCharacterSize((ui8)character[0]); - - if ((ui8)character[0] > 0xF4) - return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF) - - if (size > maxSize) - return false; - - // remaining characters must have highest bit set to 1 - for (size_t i = 1; i < size; i++) - { - if (((ui8)character[i] & 0x80) == 0) - return false; - } - return true; -} - -bool Unicode::isValidASCII(const std::string & text) -{ - for (const char & ch : text) - if (ui8(ch) >= 0x80 ) - return false; - return true; -} - -bool Unicode::isValidASCII(const char * data, size_t size) -{ - for (size_t i=0; i= 0x80 ) - return false; - return true; -} - -bool Unicode::isValidString(const std::string & text) -{ - for (size_t i=0; iString() = Languages::getLanguageOptions(knownLanguages[bestIndex]).encoding; } -std::string Unicode::toUnicode(const std::string &text) -{ - return toUnicode(text, CGeneralTextHandler::getInstalledEncoding()); -} - -std::string Unicode::toUnicode(const std::string &text, const std::string &encoding) -{ - return boost::locale::conv::to_utf(text, encoding); -} - -std::string Unicode::fromUnicode(const std::string & text) -{ - return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding()); -} - -std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding) -{ - return boost::locale::conv::from_utf(text, encoding); -} - -void Unicode::trimRight(std::string & text, const size_t amount) -{ - if(text.empty()) - return; - //todo: more efficient algorithm - for(int i = 0; i< amount; i++){ - auto b = text.begin(); - auto e = text.end(); - size_t lastLen = 0; - size_t len = 0; - while (b != e) { - lastLen = len; - size_t n = getCharacterSize(*b); - - if(!isValidCharacter(&(*b),e-b)) - { - logGlobal->error("Invalid UTF8 sequence"); - break;//invalid sequence will be trimmed - } - - len += n; - b += n; - } - - text.resize(lastLen); - } -} - - //Helper for string -> float conversion class LocaleWithComma: public std::numpunct { @@ -710,9 +577,9 @@ void CGeneralTextHandler::dumpAllTexts() for ( auto const & entry : stringsLocalizations) { if (!entry.second.overrideValue.empty()) - logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.overrideValue), entry.second.modContext, entry.second.overrideLanguage); + logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.overrideValue)); else - logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.baseValue), entry.second.modContext, entry.second.baseLanguage); + logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.baseValue)); } logGlobal->info("END TEXT EXPORT"); diff --git a/lib/CGeneralTextHandler.h b/lib/CGeneralTextHandler.h index 2561bad38..b32511c75 100644 --- a/lib/CGeneralTextHandler.h +++ b/lib/CGeneralTextHandler.h @@ -9,42 +9,10 @@ */ #pragma once -#include "JsonNode.h" - VCMI_LIB_NAMESPACE_BEGIN -/// Namespace that provides utilites for unicode support (UTF-8) -namespace Unicode -{ - /// evaluates size of UTF-8 character - size_t DLL_LINKAGE getCharacterSize(char firstByte); - - /// test if character is a valid UTF-8 symbol - /// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string) - bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize); - - /// test if text contains ASCII-string (no need for unicode conversion) - bool DLL_LINKAGE isValidASCII(const std::string & text); - bool DLL_LINKAGE isValidASCII(const char * data, size_t size); - - /// test if text contains valid UTF-8 sequence - bool DLL_LINKAGE isValidString(const std::string & text); - bool DLL_LINKAGE isValidString(const char * data, size_t size); - - /// converts text to unicode from specified encoding or from one specified in settings - std::string DLL_LINKAGE toUnicode(const std::string & text); - std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding); - - /// converts text from unicode to specified encoding or to one specified in settings - /// NOTE: usage of these functions should be avoided if possible - std::string DLL_LINKAGE fromUnicode(const std::string & text); - std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding); - - ///delete (amount) UTF characters from right - DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1); -}; - class CInputStream; +class JsonNode; /// Parser for any text files from H3 class DLL_LINKAGE CLegacyConfigParser diff --git a/lib/JsonDetail.cpp b/lib/JsonDetail.cpp index 07ccb86ea..8835b502c 100644 --- a/lib/JsonDetail.cpp +++ b/lib/JsonDetail.cpp @@ -12,7 +12,7 @@ #include "JsonDetail.h" #include "VCMI_Lib.h" -#include "CGeneralTextHandler.h" +#include "TextOperations.h" #include "CModHandler.h" #include "filesystem/Filesystem.h" diff --git a/lib/TextOperations.cpp b/lib/TextOperations.cpp new file mode 100644 index 000000000..580599102 --- /dev/null +++ b/lib/TextOperations.cpp @@ -0,0 +1,149 @@ +/* + * TextOperations.cpp, part of VCMI engine + * + * Authors: listed in file AUTHORS in main folder + * + * License: GNU General Public License v2.0 or later + * Full text of license available in license.txt file, in main folder + * + */ +#include "StdInc.h" +#include "TextOperations.h" + +#include "CGeneralTextHandler.h" + +#include + +VCMI_LIB_NAMESPACE_BEGIN + +size_t Unicode::getCharacterSize(char firstByte) +{ + // length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1: + // 0xxxxxxx -> 1 - ASCII chars + // 110xxxxx -> 2 + // 11110xxx -> 4 - last allowed in current standard + // 1111110x -> 6 - last allowed in original standard + + if ((ui8)firstByte < 0x80) + return 1; // ASCII + + size_t ret = 0; + + for (size_t i=0; i<8; i++) + { + if (((ui8)firstByte & (0x80 >> i)) != 0) + ret++; + else + break; + } + return ret; +} + +bool Unicode::isValidCharacter(const char * character, size_t maxSize) +{ + // can't be first byte in UTF8 + if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0) + return false; + // first character must follow rules checked in getCharacterSize + size_t size = getCharacterSize((ui8)character[0]); + + if ((ui8)character[0] > 0xF4) + return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF) + + if (size > maxSize) + return false; + + // remaining characters must have highest bit set to 1 + for (size_t i = 1; i < size; i++) + { + if (((ui8)character[i] & 0x80) == 0) + return false; + } + return true; +} + +bool Unicode::isValidASCII(const std::string & text) +{ + for (const char & ch : text) + if (ui8(ch) >= 0x80 ) + return false; + return true; +} + +bool Unicode::isValidASCII(const char * data, size_t size) +{ + for (size_t i=0; i= 0x80 ) + return false; + return true; +} + +bool Unicode::isValidString(const std::string & text) +{ + for (size_t i=0; i(text, encoding); +} + +std::string Unicode::fromUnicode(const std::string & text) +{ + return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding()); +} + +std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding) +{ + return boost::locale::conv::from_utf(text, encoding); +} + +void Unicode::trimRight(std::string & text, const size_t amount) +{ + if(text.empty()) + return; + //todo: more efficient algorithm + for(int i = 0; i< amount; i++){ + auto b = text.begin(); + auto e = text.end(); + size_t lastLen = 0; + size_t len = 0; + while (b != e) { + lastLen = len; + size_t n = getCharacterSize(*b); + + if(!isValidCharacter(&(*b),e-b)) + { + logGlobal->error("Invalid UTF8 sequence"); + break;//invalid sequence will be trimmed + } + + len += n; + b += n; + } + + text.resize(lastLen); + } +} + +VCMI_LIB_NAMESPACE_END diff --git a/lib/TextOperations.h b/lib/TextOperations.h new file mode 100644 index 000000000..8092c95da --- /dev/null +++ b/lib/TextOperations.h @@ -0,0 +1,45 @@ +/* + * TextOperations.h, part of VCMI engine + * + * Authors: listed in file AUTHORS in main folder + * + * License: GNU General Public License v2.0 or later + * Full text of license available in license.txt file, in main folder + * + */ +#pragma once + +VCMI_LIB_NAMESPACE_BEGIN + +/// Namespace that provides utilites for unicode support (UTF-8) +namespace Unicode +{ + /// evaluates size of UTF-8 character + size_t DLL_LINKAGE getCharacterSize(char firstByte); + + /// test if character is a valid UTF-8 symbol + /// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string) + bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize); + + /// test if text contains ASCII-string (no need for unicode conversion) + bool DLL_LINKAGE isValidASCII(const std::string & text); + bool DLL_LINKAGE isValidASCII(const char * data, size_t size); + + /// test if text contains valid UTF-8 sequence + bool DLL_LINKAGE isValidString(const std::string & text); + bool DLL_LINKAGE isValidString(const char * data, size_t size); + + /// converts text to unicode from specified encoding or from one specified in settings + std::string DLL_LINKAGE toUnicode(const std::string & text); + std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding); + + /// converts text from unicode to specified encoding or to one specified in settings + /// NOTE: usage of these functions should be avoided if possible + std::string DLL_LINKAGE fromUnicode(const std::string & text); + std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding); + + ///delete (amount) UTF characters from right + DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1); +}; + +VCMI_LIB_NAMESPACE_END diff --git a/lib/filesystem/CBinaryReader.cpp b/lib/filesystem/CBinaryReader.cpp index 7385a903d..714fe74de 100644 --- a/lib/filesystem/CBinaryReader.cpp +++ b/lib/filesystem/CBinaryReader.cpp @@ -11,7 +11,7 @@ #include "CBinaryReader.h" #include "CInputStream.h" -#include "../CGeneralTextHandler.h" +#include "../TextOperations.h" VCMI_LIB_NAMESPACE_BEGIN