From acdb8d6e06cafa26715a85e2fad13aacd62657f1 Mon Sep 17 00:00:00 2001 From: Ivan Savenko Date: Sun, 12 Feb 2023 23:52:35 +0200 Subject: [PATCH] renamed Unicode -> TextOperations, to use for all text processing --- Global.h | 21 ----- client/adventureMap/CInGameConsole.cpp | 2 +- client/battle/BattleInterfaceClasses.cpp | 3 +- client/battle/BattleStacksController.cpp | 3 +- client/render/IFont.cpp | 2 +- client/renderSDL/CBitmapFont.cpp | 6 +- client/renderSDL/CBitmapHanFont.cpp | 6 +- client/renderSDL/CTrueTypeFont.cpp | 2 +- client/widgets/CGarrisonInt.cpp | 3 +- client/widgets/MiscWidgets.cpp | 11 +-- client/widgets/TextControls.cpp | 4 +- client/windows/CCreatureWindow.cpp | 3 +- client/windows/CMessage.cpp | 2 +- client/windows/GUIClasses.cpp | 5 +- lib/CGeneralTextHandler.cpp | 34 ++------ lib/JsonDetail.cpp | 2 +- lib/TextOperations.cpp | 101 ++++++++++++++--------- lib/TextOperations.h | 51 +++++++++--- lib/filesystem/CBinaryReader.cpp | 4 +- 19 files changed, 142 insertions(+), 123 deletions(-) diff --git a/Global.h b/Global.h index 736027317..12fb734cd 100644 --- a/Global.h +++ b/Global.h @@ -730,28 +730,7 @@ namespace vstd return a + (b - a) * f; } - /// converts number into string using metric system prefixes, e.g. 'k' or 'M' to keep resulting strings within specified size - /// Note that resulting string may have more symbols than digits: minus sign and prefix symbol - template - std::string formatMetric(Arithmetic number, int maxDigits) - { - Arithmetic max = std::pow(10, maxDigits); - if (std::abs(number) < max) - return std::to_string(number); - std::string symbols = " kMGTPE"; - auto iter = symbols.begin(); - - while (std::abs(number) >= max) - { - number /= 1000; - iter++; - - assert(iter != symbols.end());//should be enough even for int64 - } - return std::to_string(number) + *iter; - } - ///compile-time version of std::abs for ints for int3, in clang++15 std::abs is constexpr static constexpr int abs(int i) { if(i < 0) return -i; diff --git a/client/adventureMap/CInGameConsole.cpp b/client/adventureMap/CInGameConsole.cpp index aa1227f36..d89e7cfe1 100644 --- a/client/adventureMap/CInGameConsole.cpp +++ b/client/adventureMap/CInGameConsole.cpp @@ -127,7 +127,7 @@ void CInGameConsole::keyPressed (const SDL_Keycode & key) { if(enteredText.size() > 1) { - Unicode::trimRight(enteredText,2); + TextOperations::trimRightUnicode(enteredText,2); enteredText += '_'; refreshEnteredText(); } diff --git a/client/battle/BattleInterfaceClasses.cpp b/client/battle/BattleInterfaceClasses.cpp index 057bfea21..72a4f0b0d 100644 --- a/client/battle/BattleInterfaceClasses.cpp +++ b/client/battle/BattleInterfaceClasses.cpp @@ -47,6 +47,7 @@ #include "../../lib/StartInfo.h" #include "../../lib/CondSh.h" #include "../../lib/mapObjects/CGTownInstance.h" +#include "../../lib/TextOperations.h" void BattleConsole::showAll(SDL_Surface * to) { @@ -717,7 +718,7 @@ void StackQueue::StackBox::setUnit(const battle::Unit * unit, size_t turn) if (unit->unitType()->idNumber == CreatureID::ARROW_TOWERS) icon->setFrame(owner->getSiegeShooterIconID(), 1); - amount->setText(vstd::formatMetric(unit->getCount(), 4)); + amount->setText(TextOperations::formatMetric(unit->getCount(), 4)); if(stateIcon) { diff --git a/client/battle/BattleStacksController.cpp b/client/battle/BattleStacksController.cpp index 75c3acb8f..cd08b8c90 100644 --- a/client/battle/BattleStacksController.cpp +++ b/client/battle/BattleStacksController.cpp @@ -36,6 +36,7 @@ #include "../../lib/CGameState.h" #include "../../lib/CStack.h" #include "../../lib/CondSh.h" +#include "../../lib/TextOperations.h" static void onAnimationFinished(const CStack *stack, std::weak_ptr anim) { @@ -316,7 +317,7 @@ void BattleStacksController::showStackAmountBox(Canvas & canvas, const CStack * //blitting amount Point textPos = stackAnimation[stack->ID]->pos.topLeft() + amountBG->dimensions()/2 + Point(xAdd, yAdd); - canvas.drawText(textPos, EFonts::FONT_TINY, Colors::WHITE, ETextAlignment::CENTER, vstd::formatMetric(stack->getCount(), 4)); + canvas.drawText(textPos, EFonts::FONT_TINY, Colors::WHITE, ETextAlignment::CENTER, TextOperations::formatMetric(stack->getCount(), 4)); } void BattleStacksController::showStack(Canvas & canvas, const CStack * stack) diff --git a/client/render/IFont.cpp b/client/render/IFont.cpp index 5575dd19a..48a84cab4 100644 --- a/client/render/IFont.cpp +++ b/client/render/IFont.cpp @@ -19,7 +19,7 @@ size_t IFont::getStringWidth(const std::string & data) const { size_t width = 0; - for(size_t i=0; irenderCharacter(surface, fallback->chars[ui8(localChar[0])], color, posX, posY); @@ -115,7 +115,7 @@ size_t CBitmapHanFont::getLineHeight() const size_t CBitmapHanFont::getGlyphWidth(const char * data) const { - std::string localChar = Unicode::fromUnicode(std::string(data, Unicode::getCharacterSize(data[0]))); + std::string localChar = TextOperations::fromUnicode(std::string(data, TextOperations::getUnicodeCharacterSize(data[0]))); if (localChar.size() == 1) return fallback->getGlyphWidth(data); diff --git a/client/renderSDL/CTrueTypeFont.cpp b/client/renderSDL/CTrueTypeFont.cpp index c1c97bb8d..dbae45d2e 100644 --- a/client/renderSDL/CTrueTypeFont.cpp +++ b/client/renderSDL/CTrueTypeFont.cpp @@ -66,7 +66,7 @@ size_t CTrueTypeFont::getLineHeight() const size_t CTrueTypeFont::getGlyphWidth(const char *data) const { - return getStringWidth(std::string(data, Unicode::getCharacterSize(*data))); + return getStringWidth(std::string(data, TextOperations::getUnicodeCharacterSize(*data))); /* int advance; TTF_GlyphMetrics(font.get(), *data, nullptr, nullptr, nullptr, nullptr, &advance); diff --git a/client/widgets/CGarrisonInt.cpp b/client/widgets/CGarrisonInt.cpp index 6a0b7a178..240c43534 100644 --- a/client/widgets/CGarrisonInt.cpp +++ b/client/widgets/CGarrisonInt.cpp @@ -25,6 +25,7 @@ #include "../../lib/CGeneralTextHandler.h" #include "../../lib/CCreatureHandler.h" #include "../../lib/mapObjects/CGHeroInstance.h" +#include "../../lib/TextOperations.h" #include "../../lib/CGameState.h" @@ -374,7 +375,7 @@ void CGarrisonSlot::update() creatureImage->setFrame(creature->getIconIndex()); stackCount->enable(); - stackCount->setText(vstd::formatMetric(myStack->count, 4)); + stackCount->setText(TextOperations::formatMetric(myStack->count, 4)); } else { diff --git a/client/widgets/MiscWidgets.cpp b/client/widgets/MiscWidgets.cpp index 14baa7b2d..aae6198d7 100644 --- a/client/widgets/MiscWidgets.cpp +++ b/client/widgets/MiscWidgets.cpp @@ -24,12 +24,13 @@ #include "../../CCallback.h" -#include "../../lib/mapObjects/CGHeroInstance.h" -#include "../../lib/mapObjects/CGTownInstance.h" +#include "../../lib/CConfigHandler.h" +#include "../../lib/CGameState.h" #include "../../lib/CGeneralTextHandler.h" #include "../../lib/CModHandler.h" -#include "../../lib/CGameState.h" -#include "../../lib/CConfigHandler.h" +#include "../../lib/TextOperations.h" +#include "../../lib/mapObjects/CGHeroInstance.h" +#include "../../lib/mapObjects/CGTownInstance.h" void CHoverableArea::hover (bool on) { @@ -251,7 +252,7 @@ void CArmyTooltip::init(const InfoAboutArmy &army) std::string subtitle; if(army.army.isDetailed) { - subtitle = vstd::formatMetric(slot.second.count, 4); + subtitle = TextOperations::formatMetric(slot.second.count, 4); } else { diff --git a/client/widgets/TextControls.cpp b/client/widgets/TextControls.cpp index e5f163d95..4bc229cbb 100644 --- a/client/widgets/TextControls.cpp +++ b/client/widgets/TextControls.cpp @@ -570,12 +570,12 @@ void CTextInput::keyPressed(const SDL_Keycode & key) case SDLK_BACKSPACE: if(!newText.empty()) { - Unicode::trimRight(newText); + TextOperations::trimRightUnicode(newText); redrawNeeded = true; } else if(!text.empty()) { - Unicode::trimRight(text); + TextOperations::trimRightUnicode(text); redrawNeeded = true; } break; diff --git a/client/windows/CCreatureWindow.cpp b/client/windows/CCreatureWindow.cpp index 320809bbe..5b4cece27 100644 --- a/client/windows/CCreatureWindow.cpp +++ b/client/windows/CCreatureWindow.cpp @@ -31,6 +31,7 @@ #include "../../lib/CModHandler.h" #include "../../lib/CHeroHandler.h" #include "../../lib/CGameState.h" +#include "../../lib/TextOperations.h" class CCreatureArtifactInstance; class CSelectableSkill; @@ -582,7 +583,7 @@ CStackWindow::MainSection::MainSection(CStackWindow * owner, int yOffset, bool s } expLabel = std::make_shared( pos.x + 21, pos.y + 52, FONT_SMALL, ETextAlignment::CENTER, Colors::WHITE, - vstd::formatMetric(stack->experience, 6)); + TextOperations::formatMetric(stack->experience, 6)); } if(showArt) diff --git a/client/windows/CMessage.cpp b/client/windows/CMessage.cpp index 1a513f838..25a6f607f 100644 --- a/client/windows/CMessage.cpp +++ b/client/windows/CMessage.cpp @@ -133,7 +133,7 @@ std::vector CMessage::breakText( std::string text, size_t maxLineWi // loops till line is full or end of text reached while(currPos < text.length() && text[currPos] != 0x0a && lineWidth < maxLineWidth) { - symbolSize = Unicode::getCharacterSize(text[currPos]); + symbolSize = TextOperations::getUnicodeCharacterSize(text[currPos]); glyphWidth = graphics->fonts[font]->getGlyphWidth(text.data() + currPos); // candidate for line break diff --git a/client/windows/GUIClasses.cpp b/client/windows/GUIClasses.cpp index 3209e0119..1e5e89122 100644 --- a/client/windows/GUIClasses.cpp +++ b/client/windows/GUIClasses.cpp @@ -63,6 +63,7 @@ #include "../lib/mapping/CMap.h" #include "../lib/NetPacksBase.h" #include "../lib/StartInfo.h" +#include "../lib/TextOperations.h" #include @@ -1066,8 +1067,8 @@ void CExchangeWindow::updateWidgets() secSkillIcons[leftRight][m]->setFrame(2 + id * 3 + level); } - expValues[leftRight]->setText(vstd::formatMetric(hero->exp, 3)); - manaValues[leftRight]->setText(vstd::formatMetric(hero->mana, 3)); + expValues[leftRight]->setText(TextOperations::formatMetric(hero->exp, 3)); + manaValues[leftRight]->setText(TextOperations::formatMetric(hero->mana, 3)); morale[leftRight]->set(hero); luck[leftRight]->set(hero); diff --git a/lib/CGeneralTextHandler.cpp b/lib/CGeneralTextHandler.cpp index e0c1cbeb8..64284c37a 100644 --- a/lib/CGeneralTextHandler.cpp +++ b/lib/CGeneralTextHandler.cpp @@ -188,9 +188,9 @@ std::string CLegacyConfigParser::readString() { // do not convert strings that are already in ASCII - this will only slow down loading process std::string str = readRawString(); - if (Unicode::isValidASCII(str)) + if (TextOperations::isValidASCII(str)) return str; - return Unicode::toUnicode(str); + return TextOperations::toUnicode(str); } float CLegacyConfigParser::readNumber() @@ -285,17 +285,6 @@ void CGeneralTextHandler::registerStringOverride(const std::string & modContext, bool CGeneralTextHandler::validateTranslation(const std::string & language, const std::string & modContext, const JsonNode & config) const { - auto escapeString = [](std::string input) - { - boost::replace_all(input, "\\", "\\\\"); - boost::replace_all(input, "\n", "\\n"); - boost::replace_all(input, "\r", "\\r"); - boost::replace_all(input, "\t", "\\t"); - boost::replace_all(input, "\"", "\\\""); - - return input; - }; - bool allPresent = true; for (auto const & string : stringsLocalizations) @@ -318,7 +307,7 @@ bool CGeneralTextHandler::validateTranslation(const std::string & language, cons else currentText = string.second.overrideValue; - logMod->warn(R"( "%s" : "%s",)", string.first, escapeString(currentText)); + logMod->warn(R"( "%s" : "%s",)", string.first, TextOperations::escapeString(currentText)); allPresent = false; } @@ -332,7 +321,7 @@ bool CGeneralTextHandler::validateTranslation(const std::string & language, cons if (allFound) logMod->warn("Translation into language '%s' in mod '%s' has unused lines:", language, modContext); - logMod->warn(R"( "%s" : "%s",)", string.first, escapeString(string.second.String())); + logMod->warn(R"( "%s" : "%s",)", string.first, TextOperations::escapeString(string.second.String())); allFound = false; } @@ -562,24 +551,13 @@ int32_t CGeneralTextHandler::pluralText(int32_t textIndex, int32_t count) const void CGeneralTextHandler::dumpAllTexts() { - auto escapeString = [](std::string input) - { - boost::replace_all(input, "\\", "\\\\"); - boost::replace_all(input, "\n", "\\n"); - boost::replace_all(input, "\r", "\\r"); - boost::replace_all(input, "\t", "\\t"); - boost::replace_all(input, "\"", "\\\""); - - return input; - }; - logGlobal->info("BEGIN TEXT EXPORT"); for ( auto const & entry : stringsLocalizations) { if (!entry.second.overrideValue.empty()) - logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.overrideValue)); + logGlobal->info(R"("%s" : "%s",)", entry.first, TextOperations::escapeString(entry.second.overrideValue)); else - logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.baseValue)); + logGlobal->info(R"("%s" : "%s",)", entry.first, TextOperations::escapeString(entry.second.baseValue)); } logGlobal->info("END TEXT EXPORT"); diff --git a/lib/JsonDetail.cpp b/lib/JsonDetail.cpp index 8835b502c..50cda76d0 100644 --- a/lib/JsonDetail.cpp +++ b/lib/JsonDetail.cpp @@ -169,7 +169,7 @@ JsonNode JsonParser::parse(std::string fileName) } else { - if (!Unicode::isValidString(&input[0], input.size())) + if (!TextOperations::isValidUnicodeString(&input[0], input.size())) error("Not a valid UTF-8 file", false); extractValue(root); diff --git a/lib/TextOperations.cpp b/lib/TextOperations.cpp index 580599102..38f0c4bd5 100644 --- a/lib/TextOperations.cpp +++ b/lib/TextOperations.cpp @@ -16,39 +16,52 @@ VCMI_LIB_NAMESPACE_BEGIN -size_t Unicode::getCharacterSize(char firstByte) +size_t TextOperations::getUnicodeCharacterSize(char firstByte) { // length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1: // 0xxxxxxx -> 1 - ASCII chars // 110xxxxx -> 2 + // 1110xxxx -> 3 // 11110xxx -> 4 - last allowed in current standard - // 1111110x -> 6 - last allowed in original standard - if ((ui8)firstByte < 0x80) + auto value = static_cast(firstByte); + + if ((value & 0b10000000) == 0) return 1; // ASCII - size_t ret = 0; + if ((value & 0b11100000) == 0b11000000) + return 2; - for (size_t i=0; i<8; i++) - { - if (((ui8)firstByte & (0x80 >> i)) != 0) - ret++; - else - break; - } - return ret; + if ((value & 0b11110000) == 0b11100000) + return 3; + + if ((value & 0b11111000) == 0b11110000) + return 4; + + assert(0);// invalid unicode sequence + return 4; } -bool Unicode::isValidCharacter(const char * character, size_t maxSize) +bool TextOperations::isValidUnicodeCharacter(const char * character, size_t maxSize) { - // can't be first byte in UTF8 - if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0) - return false; - // first character must follow rules checked in getCharacterSize - size_t size = getCharacterSize((ui8)character[0]); + assert(maxSize > 0); - if ((ui8)character[0] > 0xF4) - return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF) + auto value = static_cast(character[0]); + + // ASCII + if ( value < 0b10000000) + return maxSize > 0; + + // can't be first byte in UTF8 + if (value < 0b11000000) + return false; + + // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF) + if (value > 0b11110000) + return false; + + // first character must follow rules checked in getUnicodeCharacterSize + size_t size = getUnicodeCharacterSize(character[0]); if (size > maxSize) return false; @@ -56,69 +69,70 @@ bool Unicode::isValidCharacter(const char * character, size_t maxSize) // remaining characters must have highest bit set to 1 for (size_t i = 1; i < size; i++) { - if (((ui8)character[i] & 0x80) == 0) + auto characterValue = static_cast(character[i]); + if (characterValue < 0b10000000) return false; } return true; } -bool Unicode::isValidASCII(const std::string & text) +bool TextOperations::isValidASCII(const std::string & text) { for (const char & ch : text) - if (ui8(ch) >= 0x80 ) + if (static_cast(ch) >= 0x80 ) return false; return true; } -bool Unicode::isValidASCII(const char * data, size_t size) +bool TextOperations::isValidASCII(const char * data, size_t size) { for (size_t i=0; i= 0x80 ) + if (static_cast(data[i]) >= 0x80 ) return false; return true; } -bool Unicode::isValidString(const std::string & text) +bool TextOperations::isValidUnicodeString(const std::string & text) { - for (size_t i=0; i(text, encoding); } -std::string Unicode::fromUnicode(const std::string & text) +std::string TextOperations::fromUnicode(const std::string & text) { return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding()); } -std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding) +std::string TextOperations::fromUnicode(const std::string &text, const std::string &encoding) { return boost::locale::conv::from_utf(text, encoding); } -void Unicode::trimRight(std::string & text, const size_t amount) +void TextOperations::trimRightUnicode(std::string & text, const size_t amount) { if(text.empty()) return; @@ -130,9 +144,9 @@ void Unicode::trimRight(std::string & text, const size_t amount) size_t len = 0; while (b != e) { lastLen = len; - size_t n = getCharacterSize(*b); + size_t n = getUnicodeCharacterSize(*b); - if(!isValidCharacter(&(*b),e-b)) + if(!isValidUnicodeCharacter(&(*b),e-b)) { logGlobal->error("Invalid UTF8 sequence"); break;//invalid sequence will be trimmed @@ -146,4 +160,15 @@ void Unicode::trimRight(std::string & text, const size_t amount) } } +std::string TextOperations::escapeString(std::string input) +{ + boost::replace_all(input, "\\", "\\\\"); + boost::replace_all(input, "\n", "\\n"); + boost::replace_all(input, "\r", "\\r"); + boost::replace_all(input, "\t", "\\t"); + boost::replace_all(input, "\"", "\\\""); + + return input; +} + VCMI_LIB_NAMESPACE_END diff --git a/lib/TextOperations.h b/lib/TextOperations.h index 8092c95da..df33738b9 100644 --- a/lib/TextOperations.h +++ b/lib/TextOperations.h @@ -12,24 +12,25 @@ VCMI_LIB_NAMESPACE_BEGIN /// Namespace that provides utilites for unicode support (UTF-8) -namespace Unicode +namespace TextOperations { - /// evaluates size of UTF-8 character - size_t DLL_LINKAGE getCharacterSize(char firstByte); + /// returns length (in bytes) of UTF-8 character starting from specified character + size_t DLL_LINKAGE getUnicodeCharacterSize(char firstByte); /// test if character is a valid UTF-8 symbol /// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string) - bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize); + bool DLL_LINKAGE isValidUnicodeCharacter(const char * character, size_t maxSize); - /// test if text contains ASCII-string (no need for unicode conversion) + /// returns true if text contains valid ASCII-string + /// Note that since UTF-8 extends ASCII, any ASCII string is also UTF-8 string bool DLL_LINKAGE isValidASCII(const std::string & text); bool DLL_LINKAGE isValidASCII(const char * data, size_t size); /// test if text contains valid UTF-8 sequence - bool DLL_LINKAGE isValidString(const std::string & text); - bool DLL_LINKAGE isValidString(const char * data, size_t size); + bool DLL_LINKAGE isValidUnicodeString(const std::string & text); + bool DLL_LINKAGE isValidUnicodeString(const char * data, size_t size); - /// converts text to unicode from specified encoding or from one specified in settings + /// converts text to UTF-8 from specified encoding or from one specified in settings std::string DLL_LINKAGE toUnicode(const std::string & text); std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding); @@ -38,8 +39,38 @@ namespace Unicode std::string DLL_LINKAGE fromUnicode(const std::string & text); std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding); - ///delete (amount) UTF characters from right - DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1); + ///delete specified amount of UTF-8 characters from right + DLL_LINKAGE void trimRightUnicode(std::string & text, size_t amount = 1); + + /// converts number into string using metric system prefixes, e.g. 'k' or 'M' to keep resulting strings within specified size + /// Note that resulting string may have more symbols than digits: minus sign and prefix symbol + template + inline std::string formatMetric(Arithmetic number, int maxDigits); + + /// replaces all symbols that normally need escaping with appropriate escape sequences + std::string escapeString(std::string input); }; + + +template +inline std::string TextOperations::formatMetric(Arithmetic number, int maxDigits) +{ + Arithmetic max = std::pow(10, maxDigits); + if (std::abs(number) < max) + return std::to_string(number); + + std::string symbols = " kMGTPE"; + auto iter = symbols.begin(); + + while (std::abs(number) >= max) + { + number /= 1000; + iter++; + + assert(iter != symbols.end());//should be enough even for int64 + } + return std::to_string(number) + *iter; +} + VCMI_LIB_NAMESPACE_END diff --git a/lib/filesystem/CBinaryReader.cpp b/lib/filesystem/CBinaryReader.cpp index 714fe74de..4e4594450 100644 --- a/lib/filesystem/CBinaryReader.cpp +++ b/lib/filesystem/CBinaryReader.cpp @@ -96,9 +96,9 @@ std::string CBinaryReader::readString() ret.resize(len); read(reinterpret_cast(&ret[0]), len); //FIXME: any need to move this into separate "read localized string" method? - if (Unicode::isValidASCII(ret)) + if (TextOperations::isValidASCII(ret)) return ret; - return Unicode::toUnicode(ret); + return TextOperations::toUnicode(ret); } return "";