1
0
mirror of https://github.com/vcmi/vcmi.git synced 2025-01-24 03:47:18 +02:00

renamed Unicode -> TextOperations, to use for all text processing

This commit is contained in:
Ivan Savenko 2023-02-12 23:52:35 +02:00
parent 65c020ef34
commit acdb8d6e06
19 changed files with 142 additions and 123 deletions

View File

@ -730,28 +730,7 @@ namespace vstd
return a + (b - a) * f;
}
/// converts number into string using metric system prefixes, e.g. 'k' or 'M' to keep resulting strings within specified size
/// Note that resulting string may have more symbols than digits: minus sign and prefix symbol
template<typename Arithmetic>
std::string formatMetric(Arithmetic number, int maxDigits)
{
Arithmetic max = std::pow(10, maxDigits);
if (std::abs(number) < max)
return std::to_string(number);
std::string symbols = " kMGTPE";
auto iter = symbols.begin();
while (std::abs(number) >= max)
{
number /= 1000;
iter++;
assert(iter != symbols.end());//should be enough even for int64
}
return std::to_string(number) + *iter;
}
///compile-time version of std::abs for ints for int3, in clang++15 std::abs is constexpr
static constexpr int abs(int i) {
if(i < 0) return -i;

View File

@ -127,7 +127,7 @@ void CInGameConsole::keyPressed (const SDL_Keycode & key)
{
if(enteredText.size() > 1)
{
Unicode::trimRight(enteredText,2);
TextOperations::trimRightUnicode(enteredText,2);
enteredText += '_';
refreshEnteredText();
}

View File

@ -47,6 +47,7 @@
#include "../../lib/StartInfo.h"
#include "../../lib/CondSh.h"
#include "../../lib/mapObjects/CGTownInstance.h"
#include "../../lib/TextOperations.h"
void BattleConsole::showAll(SDL_Surface * to)
{
@ -717,7 +718,7 @@ void StackQueue::StackBox::setUnit(const battle::Unit * unit, size_t turn)
if (unit->unitType()->idNumber == CreatureID::ARROW_TOWERS)
icon->setFrame(owner->getSiegeShooterIconID(), 1);
amount->setText(vstd::formatMetric(unit->getCount(), 4));
amount->setText(TextOperations::formatMetric(unit->getCount(), 4));
if(stateIcon)
{

View File

@ -36,6 +36,7 @@
#include "../../lib/CGameState.h"
#include "../../lib/CStack.h"
#include "../../lib/CondSh.h"
#include "../../lib/TextOperations.h"
static void onAnimationFinished(const CStack *stack, std::weak_ptr<CreatureAnimation> anim)
{
@ -316,7 +317,7 @@ void BattleStacksController::showStackAmountBox(Canvas & canvas, const CStack *
//blitting amount
Point textPos = stackAnimation[stack->ID]->pos.topLeft() + amountBG->dimensions()/2 + Point(xAdd, yAdd);
canvas.drawText(textPos, EFonts::FONT_TINY, Colors::WHITE, ETextAlignment::CENTER, vstd::formatMetric(stack->getCount(), 4));
canvas.drawText(textPos, EFonts::FONT_TINY, Colors::WHITE, ETextAlignment::CENTER, TextOperations::formatMetric(stack->getCount(), 4));
}
void BattleStacksController::showStack(Canvas & canvas, const CStack * stack)

View File

@ -19,7 +19,7 @@ size_t IFont::getStringWidth(const std::string & data) const
{
size_t width = 0;
for(size_t i=0; i<data.size(); i += Unicode::getCharacterSize(data[i]))
for(size_t i=0; i<data.size(); i += TextOperations::getUnicodeCharacterSize(data[i]))
{
width += getGlyphWidth(data.data() + i);
}

View File

@ -55,7 +55,7 @@ size_t CBitmapFont::getLineHeight() const
size_t CBitmapFont::getGlyphWidth(const char * data) const
{
std::string localChar = Unicode::fromUnicode(std::string(data, Unicode::getCharacterSize(data[0])));
std::string localChar = TextOperations::fromUnicode(std::string(data, TextOperations::getUnicodeCharacterSize(data[0])));
if (localChar.size() == 1)
{
@ -131,9 +131,9 @@ void CBitmapFont::renderText(SDL_Surface * surface, const std::string & data, co
SDL_LockSurface(surface);
for(size_t i=0; i<data.size(); i += Unicode::getCharacterSize(data[i]))
for(size_t i=0; i<data.size(); i += TextOperations::getUnicodeCharacterSize(data[i]))
{
std::string localChar = Unicode::fromUnicode(data.substr(i, Unicode::getCharacterSize(data[i])));
std::string localChar = TextOperations::fromUnicode(data.substr(i, TextOperations::getUnicodeCharacterSize(data[i])));
if (localChar.size() == 1)
renderCharacter(surface, chars[ui8(localChar[0])], color, posX, posY);

View File

@ -83,9 +83,9 @@ void CBitmapHanFont::renderText(SDL_Surface * surface, const std::string & data,
SDL_LockSurface(surface);
for(size_t i=0; i<data.size(); i += Unicode::getCharacterSize(data[i]))
for(size_t i=0; i<data.size(); i += TextOperations::getUnicodeCharacterSize(data[i]))
{
std::string localChar = Unicode::fromUnicode(data.substr(i, Unicode::getCharacterSize(data[i])));
std::string localChar = TextOperations::fromUnicode(data.substr(i, TextOperations::getUnicodeCharacterSize(data[i])));
if (localChar.size() == 1)
fallback->renderCharacter(surface, fallback->chars[ui8(localChar[0])], color, posX, posY);
@ -115,7 +115,7 @@ size_t CBitmapHanFont::getLineHeight() const
size_t CBitmapHanFont::getGlyphWidth(const char * data) const
{
std::string localChar = Unicode::fromUnicode(std::string(data, Unicode::getCharacterSize(data[0])));
std::string localChar = TextOperations::fromUnicode(std::string(data, TextOperations::getUnicodeCharacterSize(data[0])));
if (localChar.size() == 1)
return fallback->getGlyphWidth(data);

View File

@ -66,7 +66,7 @@ size_t CTrueTypeFont::getLineHeight() const
size_t CTrueTypeFont::getGlyphWidth(const char *data) const
{
return getStringWidth(std::string(data, Unicode::getCharacterSize(*data)));
return getStringWidth(std::string(data, TextOperations::getUnicodeCharacterSize(*data)));
/*
int advance;
TTF_GlyphMetrics(font.get(), *data, nullptr, nullptr, nullptr, nullptr, &advance);

View File

@ -25,6 +25,7 @@
#include "../../lib/CGeneralTextHandler.h"
#include "../../lib/CCreatureHandler.h"
#include "../../lib/mapObjects/CGHeroInstance.h"
#include "../../lib/TextOperations.h"
#include "../../lib/CGameState.h"
@ -374,7 +375,7 @@ void CGarrisonSlot::update()
creatureImage->setFrame(creature->getIconIndex());
stackCount->enable();
stackCount->setText(vstd::formatMetric(myStack->count, 4));
stackCount->setText(TextOperations::formatMetric(myStack->count, 4));
}
else
{

View File

@ -24,12 +24,13 @@
#include "../../CCallback.h"
#include "../../lib/mapObjects/CGHeroInstance.h"
#include "../../lib/mapObjects/CGTownInstance.h"
#include "../../lib/CConfigHandler.h"
#include "../../lib/CGameState.h"
#include "../../lib/CGeneralTextHandler.h"
#include "../../lib/CModHandler.h"
#include "../../lib/CGameState.h"
#include "../../lib/CConfigHandler.h"
#include "../../lib/TextOperations.h"
#include "../../lib/mapObjects/CGHeroInstance.h"
#include "../../lib/mapObjects/CGTownInstance.h"
void CHoverableArea::hover (bool on)
{
@ -251,7 +252,7 @@ void CArmyTooltip::init(const InfoAboutArmy &army)
std::string subtitle;
if(army.army.isDetailed)
{
subtitle = vstd::formatMetric(slot.second.count, 4);
subtitle = TextOperations::formatMetric(slot.second.count, 4);
}
else
{

View File

@ -570,12 +570,12 @@ void CTextInput::keyPressed(const SDL_Keycode & key)
case SDLK_BACKSPACE:
if(!newText.empty())
{
Unicode::trimRight(newText);
TextOperations::trimRightUnicode(newText);
redrawNeeded = true;
}
else if(!text.empty())
{
Unicode::trimRight(text);
TextOperations::trimRightUnicode(text);
redrawNeeded = true;
}
break;

View File

@ -31,6 +31,7 @@
#include "../../lib/CModHandler.h"
#include "../../lib/CHeroHandler.h"
#include "../../lib/CGameState.h"
#include "../../lib/TextOperations.h"
class CCreatureArtifactInstance;
class CSelectableSkill;
@ -582,7 +583,7 @@ CStackWindow::MainSection::MainSection(CStackWindow * owner, int yOffset, bool s
}
expLabel = std::make_shared<CLabel>(
pos.x + 21, pos.y + 52, FONT_SMALL, ETextAlignment::CENTER, Colors::WHITE,
vstd::formatMetric(stack->experience, 6));
TextOperations::formatMetric(stack->experience, 6));
}
if(showArt)

View File

@ -133,7 +133,7 @@ std::vector<std::string> CMessage::breakText( std::string text, size_t maxLineWi
// loops till line is full or end of text reached
while(currPos < text.length() && text[currPos] != 0x0a && lineWidth < maxLineWidth)
{
symbolSize = Unicode::getCharacterSize(text[currPos]);
symbolSize = TextOperations::getUnicodeCharacterSize(text[currPos]);
glyphWidth = graphics->fonts[font]->getGlyphWidth(text.data() + currPos);
// candidate for line break

View File

@ -63,6 +63,7 @@
#include "../lib/mapping/CMap.h"
#include "../lib/NetPacksBase.h"
#include "../lib/StartInfo.h"
#include "../lib/TextOperations.h"
#include <SDL_surface.h>
@ -1066,8 +1067,8 @@ void CExchangeWindow::updateWidgets()
secSkillIcons[leftRight][m]->setFrame(2 + id * 3 + level);
}
expValues[leftRight]->setText(vstd::formatMetric(hero->exp, 3));
manaValues[leftRight]->setText(vstd::formatMetric(hero->mana, 3));
expValues[leftRight]->setText(TextOperations::formatMetric(hero->exp, 3));
manaValues[leftRight]->setText(TextOperations::formatMetric(hero->mana, 3));
morale[leftRight]->set(hero);
luck[leftRight]->set(hero);

View File

@ -188,9 +188,9 @@ std::string CLegacyConfigParser::readString()
{
// do not convert strings that are already in ASCII - this will only slow down loading process
std::string str = readRawString();
if (Unicode::isValidASCII(str))
if (TextOperations::isValidASCII(str))
return str;
return Unicode::toUnicode(str);
return TextOperations::toUnicode(str);
}
float CLegacyConfigParser::readNumber()
@ -285,17 +285,6 @@ void CGeneralTextHandler::registerStringOverride(const std::string & modContext,
bool CGeneralTextHandler::validateTranslation(const std::string & language, const std::string & modContext, const JsonNode & config) const
{
auto escapeString = [](std::string input)
{
boost::replace_all(input, "\\", "\\\\");
boost::replace_all(input, "\n", "\\n");
boost::replace_all(input, "\r", "\\r");
boost::replace_all(input, "\t", "\\t");
boost::replace_all(input, "\"", "\\\"");
return input;
};
bool allPresent = true;
for (auto const & string : stringsLocalizations)
@ -318,7 +307,7 @@ bool CGeneralTextHandler::validateTranslation(const std::string & language, cons
else
currentText = string.second.overrideValue;
logMod->warn(R"( "%s" : "%s",)", string.first, escapeString(currentText));
logMod->warn(R"( "%s" : "%s",)", string.first, TextOperations::escapeString(currentText));
allPresent = false;
}
@ -332,7 +321,7 @@ bool CGeneralTextHandler::validateTranslation(const std::string & language, cons
if (allFound)
logMod->warn("Translation into language '%s' in mod '%s' has unused lines:", language, modContext);
logMod->warn(R"( "%s" : "%s",)", string.first, escapeString(string.second.String()));
logMod->warn(R"( "%s" : "%s",)", string.first, TextOperations::escapeString(string.second.String()));
allFound = false;
}
@ -562,24 +551,13 @@ int32_t CGeneralTextHandler::pluralText(int32_t textIndex, int32_t count) const
void CGeneralTextHandler::dumpAllTexts()
{
auto escapeString = [](std::string input)
{
boost::replace_all(input, "\\", "\\\\");
boost::replace_all(input, "\n", "\\n");
boost::replace_all(input, "\r", "\\r");
boost::replace_all(input, "\t", "\\t");
boost::replace_all(input, "\"", "\\\"");
return input;
};
logGlobal->info("BEGIN TEXT EXPORT");
for ( auto const & entry : stringsLocalizations)
{
if (!entry.second.overrideValue.empty())
logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.overrideValue));
logGlobal->info(R"("%s" : "%s",)", entry.first, TextOperations::escapeString(entry.second.overrideValue));
else
logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.baseValue));
logGlobal->info(R"("%s" : "%s",)", entry.first, TextOperations::escapeString(entry.second.baseValue));
}
logGlobal->info("END TEXT EXPORT");

View File

@ -169,7 +169,7 @@ JsonNode JsonParser::parse(std::string fileName)
}
else
{
if (!Unicode::isValidString(&input[0], input.size()))
if (!TextOperations::isValidUnicodeString(&input[0], input.size()))
error("Not a valid UTF-8 file", false);
extractValue(root);

View File

@ -16,39 +16,52 @@
VCMI_LIB_NAMESPACE_BEGIN
size_t Unicode::getCharacterSize(char firstByte)
size_t TextOperations::getUnicodeCharacterSize(char firstByte)
{
// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
// 0xxxxxxx -> 1 - ASCII chars
// 110xxxxx -> 2
// 1110xxxx -> 3
// 11110xxx -> 4 - last allowed in current standard
// 1111110x -> 6 - last allowed in original standard
if ((ui8)firstByte < 0x80)
auto value = static_cast<uint8_t>(firstByte);
if ((value & 0b10000000) == 0)
return 1; // ASCII
size_t ret = 0;
if ((value & 0b11100000) == 0b11000000)
return 2;
for (size_t i=0; i<8; i++)
{
if (((ui8)firstByte & (0x80 >> i)) != 0)
ret++;
else
break;
}
return ret;
if ((value & 0b11110000) == 0b11100000)
return 3;
if ((value & 0b11111000) == 0b11110000)
return 4;
assert(0);// invalid unicode sequence
return 4;
}
bool Unicode::isValidCharacter(const char * character, size_t maxSize)
bool TextOperations::isValidUnicodeCharacter(const char * character, size_t maxSize)
{
// can't be first byte in UTF8
if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
return false;
// first character must follow rules checked in getCharacterSize
size_t size = getCharacterSize((ui8)character[0]);
assert(maxSize > 0);
if ((ui8)character[0] > 0xF4)
return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
auto value = static_cast<uint8_t>(character[0]);
// ASCII
if ( value < 0b10000000)
return maxSize > 0;
// can't be first byte in UTF8
if (value < 0b11000000)
return false;
// above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
if (value > 0b11110000)
return false;
// first character must follow rules checked in getUnicodeCharacterSize
size_t size = getUnicodeCharacterSize(character[0]);
if (size > maxSize)
return false;
@ -56,69 +69,70 @@ bool Unicode::isValidCharacter(const char * character, size_t maxSize)
// remaining characters must have highest bit set to 1
for (size_t i = 1; i < size; i++)
{
if (((ui8)character[i] & 0x80) == 0)
auto characterValue = static_cast<uint8_t>(character[i]);
if (characterValue < 0b10000000)
return false;
}
return true;
}
bool Unicode::isValidASCII(const std::string & text)
bool TextOperations::isValidASCII(const std::string & text)
{
for (const char & ch : text)
if (ui8(ch) >= 0x80 )
if (static_cast<uint8_t>(ch) >= 0x80 )
return false;
return true;
}
bool Unicode::isValidASCII(const char * data, size_t size)
bool TextOperations::isValidASCII(const char * data, size_t size)
{
for (size_t i=0; i<size; i++)
if (ui8(data[i]) >= 0x80 )
if (static_cast<uint8_t>(data[i]) >= 0x80 )
return false;
return true;
}
bool Unicode::isValidString(const std::string & text)
bool TextOperations::isValidUnicodeString(const std::string & text)
{
for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
for (size_t i=0; i<text.size(); i += getUnicodeCharacterSize(text[i]))
{
if (!isValidCharacter(text.data() + i, text.size() - i))
if (!isValidUnicodeCharacter(text.data() + i, text.size() - i))
return false;
}
return true;
}
bool Unicode::isValidString(const char * data, size_t size)
bool TextOperations::isValidUnicodeString(const char * data, size_t size)
{
for (size_t i=0; i<size; i += getCharacterSize(data[i]))
for (size_t i=0; i<size; i += getUnicodeCharacterSize(data[i]))
{
if (!isValidCharacter(data + i, size - i))
if (!isValidUnicodeCharacter(data + i, size - i))
return false;
}
return true;
}
std::string Unicode::toUnicode(const std::string &text)
std::string TextOperations::toUnicode(const std::string &text)
{
return toUnicode(text, CGeneralTextHandler::getInstalledEncoding());
}
std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
std::string TextOperations::toUnicode(const std::string &text, const std::string &encoding)
{
return boost::locale::conv::to_utf<char>(text, encoding);
}
std::string Unicode::fromUnicode(const std::string & text)
std::string TextOperations::fromUnicode(const std::string & text)
{
return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding());
}
std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
std::string TextOperations::fromUnicode(const std::string &text, const std::string &encoding)
{
return boost::locale::conv::from_utf<char>(text, encoding);
}
void Unicode::trimRight(std::string & text, const size_t amount)
void TextOperations::trimRightUnicode(std::string & text, const size_t amount)
{
if(text.empty())
return;
@ -130,9 +144,9 @@ void Unicode::trimRight(std::string & text, const size_t amount)
size_t len = 0;
while (b != e) {
lastLen = len;
size_t n = getCharacterSize(*b);
size_t n = getUnicodeCharacterSize(*b);
if(!isValidCharacter(&(*b),e-b))
if(!isValidUnicodeCharacter(&(*b),e-b))
{
logGlobal->error("Invalid UTF8 sequence");
break;//invalid sequence will be trimmed
@ -146,4 +160,15 @@ void Unicode::trimRight(std::string & text, const size_t amount)
}
}
std::string TextOperations::escapeString(std::string input)
{
boost::replace_all(input, "\\", "\\\\");
boost::replace_all(input, "\n", "\\n");
boost::replace_all(input, "\r", "\\r");
boost::replace_all(input, "\t", "\\t");
boost::replace_all(input, "\"", "\\\"");
return input;
}
VCMI_LIB_NAMESPACE_END

View File

@ -12,24 +12,25 @@
VCMI_LIB_NAMESPACE_BEGIN
/// Namespace that provides utilites for unicode support (UTF-8)
namespace Unicode
namespace TextOperations
{
/// evaluates size of UTF-8 character
size_t DLL_LINKAGE getCharacterSize(char firstByte);
/// returns length (in bytes) of UTF-8 character starting from specified character
size_t DLL_LINKAGE getUnicodeCharacterSize(char firstByte);
/// test if character is a valid UTF-8 symbol
/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string)
bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize);
bool DLL_LINKAGE isValidUnicodeCharacter(const char * character, size_t maxSize);
/// test if text contains ASCII-string (no need for unicode conversion)
/// returns true if text contains valid ASCII-string
/// Note that since UTF-8 extends ASCII, any ASCII string is also UTF-8 string
bool DLL_LINKAGE isValidASCII(const std::string & text);
bool DLL_LINKAGE isValidASCII(const char * data, size_t size);
/// test if text contains valid UTF-8 sequence
bool DLL_LINKAGE isValidString(const std::string & text);
bool DLL_LINKAGE isValidString(const char * data, size_t size);
bool DLL_LINKAGE isValidUnicodeString(const std::string & text);
bool DLL_LINKAGE isValidUnicodeString(const char * data, size_t size);
/// converts text to unicode from specified encoding or from one specified in settings
/// converts text to UTF-8 from specified encoding or from one specified in settings
std::string DLL_LINKAGE toUnicode(const std::string & text);
std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);
@ -38,8 +39,38 @@ namespace Unicode
std::string DLL_LINKAGE fromUnicode(const std::string & text);
std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);
///delete (amount) UTF characters from right
DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1);
///delete specified amount of UTF-8 characters from right
DLL_LINKAGE void trimRightUnicode(std::string & text, size_t amount = 1);
/// converts number into string using metric system prefixes, e.g. 'k' or 'M' to keep resulting strings within specified size
/// Note that resulting string may have more symbols than digits: minus sign and prefix symbol
template<typename Arithmetic>
inline std::string formatMetric(Arithmetic number, int maxDigits);
/// replaces all symbols that normally need escaping with appropriate escape sequences
std::string escapeString(std::string input);
};
template<typename Arithmetic>
inline std::string TextOperations::formatMetric(Arithmetic number, int maxDigits)
{
Arithmetic max = std::pow(10, maxDigits);
if (std::abs(number) < max)
return std::to_string(number);
std::string symbols = " kMGTPE";
auto iter = symbols.begin();
while (std::abs(number) >= max)
{
number /= 1000;
iter++;
assert(iter != symbols.end());//should be enough even for int64
}
return std::to_string(number) + *iter;
}
VCMI_LIB_NAMESPACE_END

View File

@ -96,9 +96,9 @@ std::string CBinaryReader::readString()
ret.resize(len);
read(reinterpret_cast<ui8*>(&ret[0]), len);
//FIXME: any need to move this into separate "read localized string" method?
if (Unicode::isValidASCII(ret))
if (TextOperations::isValidASCII(ret))
return ret;
return Unicode::toUnicode(ret);
return TextOperations::toUnicode(ret);
}
return "";