1
0
mirror of https://github.com/vcmi/vcmi.git synced 2024-11-28 08:48:48 +02:00

Moved Unicode namespace out of GeneralTextHandler file

This commit is contained in:
Ivan Savenko 2023-02-12 23:00:56 +02:00
parent 974dd3a3db
commit 65c020ef34
14 changed files with 209 additions and 177 deletions

View File

@ -20,7 +20,7 @@
#include "../../CCallback.h"
#include "../../lib/CConfigHandler.h"
#include "../../lib/CGeneralTextHandler.h"
#include "../../lib/TextOperations.h"
#include "../../lib/mapObjects/CArmedInstance.h"
#include <SDL_timer.h>

View File

@ -12,7 +12,7 @@
#include "IFont.h"
#include "../../lib/Point.h"
#include "../../lib/CGeneralTextHandler.h"
#include "../../lib/TextOperations.h"
//
size_t IFont::getStringWidth(const std::string & data) const

View File

@ -14,7 +14,7 @@
#include "../../lib/vcmi_endian.h"
#include "../../lib/filesystem/Filesystem.h"
#include "../../lib/CGeneralTextHandler.h"
#include "../../lib/TextOperations.h"
#include "../../lib/Rect.h"
#include <SDL_surface.h>

View File

@ -15,7 +15,7 @@
#include "../../lib/JsonNode.h"
#include "../../lib/filesystem/Filesystem.h"
#include "../../lib/CGeneralTextHandler.h"
#include "../../lib/TextOperations.h"
#include "../../lib/Rect.h"
#include <SDL_surface.h>

View File

@ -14,7 +14,7 @@
#include "../renderSDL/SDL_Extensions.h"
#include "../../lib/JsonNode.h"
#include "../../lib/CGeneralTextHandler.h"
#include "../../lib/TextOperations.h"
#include "../../lib/filesystem/Filesystem.h"
#include <SDL_ttf.h>

View File

@ -19,7 +19,7 @@
#include "../adventureMap/CInGameConsole.h"
#include "../renderSDL/SDL_Extensions.h"
#include "../../lib/CGeneralTextHandler.h"
#include "../../lib/TextOperations.h"
#ifdef VCMI_ANDROID
#include "lib/CAndroidVMHelper.h"

View File

@ -13,6 +13,7 @@
#include "../CGameInfo.h"
#include "../../lib/CGeneralTextHandler.h"
#include "../../lib/TextOperations.h"
#include "../windows/InfoWindows.h"
#include "../widgets/Buttons.h"

View File

@ -197,6 +197,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE)
${MAIN_LIB_DIR}/RoadHandler.cpp
${MAIN_LIB_DIR}/ScriptHandler.cpp
${MAIN_LIB_DIR}/TerrainHandler.cpp
${MAIN_LIB_DIR}/TextOperations.cpp
${MAIN_LIB_DIR}/VCMIDirs.cpp
${MAIN_LIB_DIR}/VCMI_Lib.cpp
)
@ -454,6 +455,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE)
${MAIN_LIB_DIR}/StartInfo.h
${MAIN_LIB_DIR}/StringConstants.h
${MAIN_LIB_DIR}/TerrainHandler.h
${MAIN_LIB_DIR}/TextOperations.h
${MAIN_LIB_DIR}/UnlockGuard.h
${MAIN_LIB_DIR}/VCMIDirs.h
${MAIN_LIB_DIR}/vcmi_endian.h

View File

@ -10,100 +10,16 @@
#include "StdInc.h"
#include "CGeneralTextHandler.h"
#include <boost/locale.hpp>
#include "filesystem/Filesystem.h"
#include "CConfigHandler.h"
#include "CModHandler.h"
#include "GameConstants.h"
#include "mapObjects/CQuest.h"
#include "VCMI_Lib.h"
#include "Languages.h"
#include "TextOperations.h"
VCMI_LIB_NAMESPACE_BEGIN
size_t Unicode::getCharacterSize(char firstByte)
{
// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
// 0xxxxxxx -> 1 - ASCII chars
// 110xxxxx -> 2
// 11110xxx -> 4 - last allowed in current standard
// 1111110x -> 6 - last allowed in original standard
if ((ui8)firstByte < 0x80)
return 1; // ASCII
size_t ret = 0;
for (size_t i=0; i<8; i++)
{
if (((ui8)firstByte & (0x80 >> i)) != 0)
ret++;
else
break;
}
return ret;
}
bool Unicode::isValidCharacter(const char * character, size_t maxSize)
{
// can't be first byte in UTF8
if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
return false;
// first character must follow rules checked in getCharacterSize
size_t size = getCharacterSize((ui8)character[0]);
if ((ui8)character[0] > 0xF4)
return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
if (size > maxSize)
return false;
// remaining characters must have highest bit set to 1
for (size_t i = 1; i < size; i++)
{
if (((ui8)character[i] & 0x80) == 0)
return false;
}
return true;
}
bool Unicode::isValidASCII(const std::string & text)
{
for (const char & ch : text)
if (ui8(ch) >= 0x80 )
return false;
return true;
}
bool Unicode::isValidASCII(const char * data, size_t size)
{
for (size_t i=0; i<size; i++)
if (ui8(data[i]) >= 0x80 )
return false;
return true;
}
bool Unicode::isValidString(const std::string & text)
{
for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
{
if (!isValidCharacter(text.data() + i, text.size() - i))
return false;
}
return true;
}
bool Unicode::isValidString(const char * data, size_t size)
{
for (size_t i=0; i<size; i += getCharacterSize(data[i]))
{
if (!isValidCharacter(data + i, size - i))
return false;
}
return true;
}
/// Detects language and encoding of H3 text files based on matching against pregenerated footprints of H3 file
void CGeneralTextHandler::detectInstallParameters()
{
@ -173,55 +89,6 @@ void CGeneralTextHandler::detectInstallParameters()
encoding->String() = Languages::getLanguageOptions(knownLanguages[bestIndex]).encoding;
}
std::string Unicode::toUnicode(const std::string &text)
{
return toUnicode(text, CGeneralTextHandler::getInstalledEncoding());
}
std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
{
return boost::locale::conv::to_utf<char>(text, encoding);
}
std::string Unicode::fromUnicode(const std::string & text)
{
return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding());
}
std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
{
return boost::locale::conv::from_utf<char>(text, encoding);
}
void Unicode::trimRight(std::string & text, const size_t amount)
{
if(text.empty())
return;
//todo: more efficient algorithm
for(int i = 0; i< amount; i++){
auto b = text.begin();
auto e = text.end();
size_t lastLen = 0;
size_t len = 0;
while (b != e) {
lastLen = len;
size_t n = getCharacterSize(*b);
if(!isValidCharacter(&(*b),e-b))
{
logGlobal->error("Invalid UTF8 sequence");
break;//invalid sequence will be trimmed
}
len += n;
b += n;
}
text.resize(lastLen);
}
}
//Helper for string -> float conversion
class LocaleWithComma: public std::numpunct<char>
{
@ -710,9 +577,9 @@ void CGeneralTextHandler::dumpAllTexts()
for ( auto const & entry : stringsLocalizations)
{
if (!entry.second.overrideValue.empty())
logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.overrideValue), entry.second.modContext, entry.second.overrideLanguage);
logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.overrideValue));
else
logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.baseValue), entry.second.modContext, entry.second.baseLanguage);
logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.baseValue));
}
logGlobal->info("END TEXT EXPORT");

View File

@ -9,42 +9,10 @@
*/
#pragma once
#include "JsonNode.h"
VCMI_LIB_NAMESPACE_BEGIN
/// Namespace that provides utilites for unicode support (UTF-8)
namespace Unicode
{
/// evaluates size of UTF-8 character
size_t DLL_LINKAGE getCharacterSize(char firstByte);
/// test if character is a valid UTF-8 symbol
/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string)
bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize);
/// test if text contains ASCII-string (no need for unicode conversion)
bool DLL_LINKAGE isValidASCII(const std::string & text);
bool DLL_LINKAGE isValidASCII(const char * data, size_t size);
/// test if text contains valid UTF-8 sequence
bool DLL_LINKAGE isValidString(const std::string & text);
bool DLL_LINKAGE isValidString(const char * data, size_t size);
/// converts text to unicode from specified encoding or from one specified in settings
std::string DLL_LINKAGE toUnicode(const std::string & text);
std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);
/// converts text from unicode to specified encoding or to one specified in settings
/// NOTE: usage of these functions should be avoided if possible
std::string DLL_LINKAGE fromUnicode(const std::string & text);
std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);
///delete (amount) UTF characters from right
DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1);
};
class CInputStream;
class JsonNode;
/// Parser for any text files from H3
class DLL_LINKAGE CLegacyConfigParser

View File

@ -12,7 +12,7 @@
#include "JsonDetail.h"
#include "VCMI_Lib.h"
#include "CGeneralTextHandler.h"
#include "TextOperations.h"
#include "CModHandler.h"
#include "filesystem/Filesystem.h"

149
lib/TextOperations.cpp Normal file
View File

@ -0,0 +1,149 @@
/*
* TextOperations.cpp, part of VCMI engine
*
* Authors: listed in file AUTHORS in main folder
*
* License: GNU General Public License v2.0 or later
* Full text of license available in license.txt file, in main folder
*
*/
#include "StdInc.h"
#include "TextOperations.h"
#include "CGeneralTextHandler.h"
#include <boost/locale.hpp>
VCMI_LIB_NAMESPACE_BEGIN
size_t Unicode::getCharacterSize(char firstByte)
{
// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
// 0xxxxxxx -> 1 - ASCII chars
// 110xxxxx -> 2
// 11110xxx -> 4 - last allowed in current standard
// 1111110x -> 6 - last allowed in original standard
if ((ui8)firstByte < 0x80)
return 1; // ASCII
size_t ret = 0;
for (size_t i=0; i<8; i++)
{
if (((ui8)firstByte & (0x80 >> i)) != 0)
ret++;
else
break;
}
return ret;
}
bool Unicode::isValidCharacter(const char * character, size_t maxSize)
{
// can't be first byte in UTF8
if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
return false;
// first character must follow rules checked in getCharacterSize
size_t size = getCharacterSize((ui8)character[0]);
if ((ui8)character[0] > 0xF4)
return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
if (size > maxSize)
return false;
// remaining characters must have highest bit set to 1
for (size_t i = 1; i < size; i++)
{
if (((ui8)character[i] & 0x80) == 0)
return false;
}
return true;
}
bool Unicode::isValidASCII(const std::string & text)
{
for (const char & ch : text)
if (ui8(ch) >= 0x80 )
return false;
return true;
}
bool Unicode::isValidASCII(const char * data, size_t size)
{
for (size_t i=0; i<size; i++)
if (ui8(data[i]) >= 0x80 )
return false;
return true;
}
bool Unicode::isValidString(const std::string & text)
{
for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
{
if (!isValidCharacter(text.data() + i, text.size() - i))
return false;
}
return true;
}
bool Unicode::isValidString(const char * data, size_t size)
{
for (size_t i=0; i<size; i += getCharacterSize(data[i]))
{
if (!isValidCharacter(data + i, size - i))
return false;
}
return true;
}
std::string Unicode::toUnicode(const std::string &text)
{
return toUnicode(text, CGeneralTextHandler::getInstalledEncoding());
}
std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
{
return boost::locale::conv::to_utf<char>(text, encoding);
}
std::string Unicode::fromUnicode(const std::string & text)
{
return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding());
}
std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
{
return boost::locale::conv::from_utf<char>(text, encoding);
}
void Unicode::trimRight(std::string & text, const size_t amount)
{
if(text.empty())
return;
//todo: more efficient algorithm
for(int i = 0; i< amount; i++){
auto b = text.begin();
auto e = text.end();
size_t lastLen = 0;
size_t len = 0;
while (b != e) {
lastLen = len;
size_t n = getCharacterSize(*b);
if(!isValidCharacter(&(*b),e-b))
{
logGlobal->error("Invalid UTF8 sequence");
break;//invalid sequence will be trimmed
}
len += n;
b += n;
}
text.resize(lastLen);
}
}
VCMI_LIB_NAMESPACE_END

45
lib/TextOperations.h Normal file
View File

@ -0,0 +1,45 @@
/*
* TextOperations.h, part of VCMI engine
*
* Authors: listed in file AUTHORS in main folder
*
* License: GNU General Public License v2.0 or later
* Full text of license available in license.txt file, in main folder
*
*/
#pragma once
VCMI_LIB_NAMESPACE_BEGIN
/// Namespace that provides utilites for unicode support (UTF-8)
namespace Unicode
{
/// evaluates size of UTF-8 character
size_t DLL_LINKAGE getCharacterSize(char firstByte);
/// test if character is a valid UTF-8 symbol
/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string)
bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize);
/// test if text contains ASCII-string (no need for unicode conversion)
bool DLL_LINKAGE isValidASCII(const std::string & text);
bool DLL_LINKAGE isValidASCII(const char * data, size_t size);
/// test if text contains valid UTF-8 sequence
bool DLL_LINKAGE isValidString(const std::string & text);
bool DLL_LINKAGE isValidString(const char * data, size_t size);
/// converts text to unicode from specified encoding or from one specified in settings
std::string DLL_LINKAGE toUnicode(const std::string & text);
std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);
/// converts text from unicode to specified encoding or to one specified in settings
/// NOTE: usage of these functions should be avoided if possible
std::string DLL_LINKAGE fromUnicode(const std::string & text);
std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);
///delete (amount) UTF characters from right
DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1);
};
VCMI_LIB_NAMESPACE_END

View File

@ -11,7 +11,7 @@
#include "CBinaryReader.h"
#include "CInputStream.h"
#include "../CGeneralTextHandler.h"
#include "../TextOperations.h"
VCMI_LIB_NAMESPACE_BEGIN