mirror of
https://github.com/vcmi/vcmi.git
synced 2024-11-24 08:32:34 +02:00
Moved Unicode namespace out of GeneralTextHandler file
This commit is contained in:
parent
974dd3a3db
commit
65c020ef34
@ -20,7 +20,7 @@
|
||||
|
||||
#include "../../CCallback.h"
|
||||
#include "../../lib/CConfigHandler.h"
|
||||
#include "../../lib/CGeneralTextHandler.h"
|
||||
#include "../../lib/TextOperations.h"
|
||||
#include "../../lib/mapObjects/CArmedInstance.h"
|
||||
|
||||
#include <SDL_timer.h>
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include "IFont.h"
|
||||
|
||||
#include "../../lib/Point.h"
|
||||
#include "../../lib/CGeneralTextHandler.h"
|
||||
#include "../../lib/TextOperations.h"
|
||||
//
|
||||
|
||||
size_t IFont::getStringWidth(const std::string & data) const
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#include "../../lib/vcmi_endian.h"
|
||||
#include "../../lib/filesystem/Filesystem.h"
|
||||
#include "../../lib/CGeneralTextHandler.h"
|
||||
#include "../../lib/TextOperations.h"
|
||||
#include "../../lib/Rect.h"
|
||||
|
||||
#include <SDL_surface.h>
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
#include "../../lib/JsonNode.h"
|
||||
#include "../../lib/filesystem/Filesystem.h"
|
||||
#include "../../lib/CGeneralTextHandler.h"
|
||||
#include "../../lib/TextOperations.h"
|
||||
#include "../../lib/Rect.h"
|
||||
|
||||
#include <SDL_surface.h>
|
||||
|
@ -14,7 +14,7 @@
|
||||
#include "../renderSDL/SDL_Extensions.h"
|
||||
|
||||
#include "../../lib/JsonNode.h"
|
||||
#include "../../lib/CGeneralTextHandler.h"
|
||||
#include "../../lib/TextOperations.h"
|
||||
#include "../../lib/filesystem/Filesystem.h"
|
||||
|
||||
#include <SDL_ttf.h>
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "../adventureMap/CInGameConsole.h"
|
||||
#include "../renderSDL/SDL_Extensions.h"
|
||||
|
||||
#include "../../lib/CGeneralTextHandler.h"
|
||||
#include "../../lib/TextOperations.h"
|
||||
|
||||
#ifdef VCMI_ANDROID
|
||||
#include "lib/CAndroidVMHelper.h"
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "../CGameInfo.h"
|
||||
#include "../../lib/CGeneralTextHandler.h"
|
||||
#include "../../lib/TextOperations.h"
|
||||
|
||||
#include "../windows/InfoWindows.h"
|
||||
#include "../widgets/Buttons.h"
|
||||
|
@ -197,6 +197,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE)
|
||||
${MAIN_LIB_DIR}/RoadHandler.cpp
|
||||
${MAIN_LIB_DIR}/ScriptHandler.cpp
|
||||
${MAIN_LIB_DIR}/TerrainHandler.cpp
|
||||
${MAIN_LIB_DIR}/TextOperations.cpp
|
||||
${MAIN_LIB_DIR}/VCMIDirs.cpp
|
||||
${MAIN_LIB_DIR}/VCMI_Lib.cpp
|
||||
)
|
||||
@ -454,6 +455,7 @@ macro(add_main_lib TARGET_NAME LIBRARY_TYPE)
|
||||
${MAIN_LIB_DIR}/StartInfo.h
|
||||
${MAIN_LIB_DIR}/StringConstants.h
|
||||
${MAIN_LIB_DIR}/TerrainHandler.h
|
||||
${MAIN_LIB_DIR}/TextOperations.h
|
||||
${MAIN_LIB_DIR}/UnlockGuard.h
|
||||
${MAIN_LIB_DIR}/VCMIDirs.h
|
||||
${MAIN_LIB_DIR}/vcmi_endian.h
|
||||
|
@ -10,100 +10,16 @@
|
||||
#include "StdInc.h"
|
||||
#include "CGeneralTextHandler.h"
|
||||
|
||||
#include <boost/locale.hpp>
|
||||
|
||||
#include "filesystem/Filesystem.h"
|
||||
#include "CConfigHandler.h"
|
||||
#include "CModHandler.h"
|
||||
#include "GameConstants.h"
|
||||
#include "mapObjects/CQuest.h"
|
||||
#include "VCMI_Lib.h"
|
||||
#include "Languages.h"
|
||||
#include "TextOperations.h"
|
||||
|
||||
VCMI_LIB_NAMESPACE_BEGIN
|
||||
|
||||
size_t Unicode::getCharacterSize(char firstByte)
|
||||
{
|
||||
// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
|
||||
// 0xxxxxxx -> 1 - ASCII chars
|
||||
// 110xxxxx -> 2
|
||||
// 11110xxx -> 4 - last allowed in current standard
|
||||
// 1111110x -> 6 - last allowed in original standard
|
||||
|
||||
if ((ui8)firstByte < 0x80)
|
||||
return 1; // ASCII
|
||||
|
||||
size_t ret = 0;
|
||||
|
||||
for (size_t i=0; i<8; i++)
|
||||
{
|
||||
if (((ui8)firstByte & (0x80 >> i)) != 0)
|
||||
ret++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Unicode::isValidCharacter(const char * character, size_t maxSize)
|
||||
{
|
||||
// can't be first byte in UTF8
|
||||
if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
|
||||
return false;
|
||||
// first character must follow rules checked in getCharacterSize
|
||||
size_t size = getCharacterSize((ui8)character[0]);
|
||||
|
||||
if ((ui8)character[0] > 0xF4)
|
||||
return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
|
||||
|
||||
if (size > maxSize)
|
||||
return false;
|
||||
|
||||
// remaining characters must have highest bit set to 1
|
||||
for (size_t i = 1; i < size; i++)
|
||||
{
|
||||
if (((ui8)character[i] & 0x80) == 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Unicode::isValidASCII(const std::string & text)
|
||||
{
|
||||
for (const char & ch : text)
|
||||
if (ui8(ch) >= 0x80 )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Unicode::isValidASCII(const char * data, size_t size)
|
||||
{
|
||||
for (size_t i=0; i<size; i++)
|
||||
if (ui8(data[i]) >= 0x80 )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Unicode::isValidString(const std::string & text)
|
||||
{
|
||||
for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
|
||||
{
|
||||
if (!isValidCharacter(text.data() + i, text.size() - i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Unicode::isValidString(const char * data, size_t size)
|
||||
{
|
||||
for (size_t i=0; i<size; i += getCharacterSize(data[i]))
|
||||
{
|
||||
if (!isValidCharacter(data + i, size - i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Detects language and encoding of H3 text files based on matching against pregenerated footprints of H3 file
|
||||
void CGeneralTextHandler::detectInstallParameters()
|
||||
{
|
||||
@ -173,55 +89,6 @@ void CGeneralTextHandler::detectInstallParameters()
|
||||
encoding->String() = Languages::getLanguageOptions(knownLanguages[bestIndex]).encoding;
|
||||
}
|
||||
|
||||
std::string Unicode::toUnicode(const std::string &text)
|
||||
{
|
||||
return toUnicode(text, CGeneralTextHandler::getInstalledEncoding());
|
||||
}
|
||||
|
||||
std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
|
||||
{
|
||||
return boost::locale::conv::to_utf<char>(text, encoding);
|
||||
}
|
||||
|
||||
std::string Unicode::fromUnicode(const std::string & text)
|
||||
{
|
||||
return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding());
|
||||
}
|
||||
|
||||
std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
|
||||
{
|
||||
return boost::locale::conv::from_utf<char>(text, encoding);
|
||||
}
|
||||
|
||||
void Unicode::trimRight(std::string & text, const size_t amount)
|
||||
{
|
||||
if(text.empty())
|
||||
return;
|
||||
//todo: more efficient algorithm
|
||||
for(int i = 0; i< amount; i++){
|
||||
auto b = text.begin();
|
||||
auto e = text.end();
|
||||
size_t lastLen = 0;
|
||||
size_t len = 0;
|
||||
while (b != e) {
|
||||
lastLen = len;
|
||||
size_t n = getCharacterSize(*b);
|
||||
|
||||
if(!isValidCharacter(&(*b),e-b))
|
||||
{
|
||||
logGlobal->error("Invalid UTF8 sequence");
|
||||
break;//invalid sequence will be trimmed
|
||||
}
|
||||
|
||||
len += n;
|
||||
b += n;
|
||||
}
|
||||
|
||||
text.resize(lastLen);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Helper for string -> float conversion
|
||||
class LocaleWithComma: public std::numpunct<char>
|
||||
{
|
||||
@ -710,9 +577,9 @@ void CGeneralTextHandler::dumpAllTexts()
|
||||
for ( auto const & entry : stringsLocalizations)
|
||||
{
|
||||
if (!entry.second.overrideValue.empty())
|
||||
logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.overrideValue), entry.second.modContext, entry.second.overrideLanguage);
|
||||
logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.overrideValue));
|
||||
else
|
||||
logGlobal->info(R"("%s" : "%s", // %s / %s)", entry.first, escapeString(entry.second.baseValue), entry.second.modContext, entry.second.baseLanguage);
|
||||
logGlobal->info(R"("%s" : "%s",)", entry.first, escapeString(entry.second.baseValue));
|
||||
}
|
||||
|
||||
logGlobal->info("END TEXT EXPORT");
|
||||
|
@ -9,42 +9,10 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "JsonNode.h"
|
||||
|
||||
VCMI_LIB_NAMESPACE_BEGIN
|
||||
|
||||
/// Namespace that provides utilites for unicode support (UTF-8)
|
||||
namespace Unicode
|
||||
{
|
||||
/// evaluates size of UTF-8 character
|
||||
size_t DLL_LINKAGE getCharacterSize(char firstByte);
|
||||
|
||||
/// test if character is a valid UTF-8 symbol
|
||||
/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string)
|
||||
bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize);
|
||||
|
||||
/// test if text contains ASCII-string (no need for unicode conversion)
|
||||
bool DLL_LINKAGE isValidASCII(const std::string & text);
|
||||
bool DLL_LINKAGE isValidASCII(const char * data, size_t size);
|
||||
|
||||
/// test if text contains valid UTF-8 sequence
|
||||
bool DLL_LINKAGE isValidString(const std::string & text);
|
||||
bool DLL_LINKAGE isValidString(const char * data, size_t size);
|
||||
|
||||
/// converts text to unicode from specified encoding or from one specified in settings
|
||||
std::string DLL_LINKAGE toUnicode(const std::string & text);
|
||||
std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);
|
||||
|
||||
/// converts text from unicode to specified encoding or to one specified in settings
|
||||
/// NOTE: usage of these functions should be avoided if possible
|
||||
std::string DLL_LINKAGE fromUnicode(const std::string & text);
|
||||
std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);
|
||||
|
||||
///delete (amount) UTF characters from right
|
||||
DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1);
|
||||
};
|
||||
|
||||
class CInputStream;
|
||||
class JsonNode;
|
||||
|
||||
/// Parser for any text files from H3
|
||||
class DLL_LINKAGE CLegacyConfigParser
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include "JsonDetail.h"
|
||||
|
||||
#include "VCMI_Lib.h"
|
||||
#include "CGeneralTextHandler.h"
|
||||
#include "TextOperations.h"
|
||||
#include "CModHandler.h"
|
||||
|
||||
#include "filesystem/Filesystem.h"
|
||||
|
149
lib/TextOperations.cpp
Normal file
149
lib/TextOperations.cpp
Normal file
@ -0,0 +1,149 @@
|
||||
/*
|
||||
* TextOperations.cpp, part of VCMI engine
|
||||
*
|
||||
* Authors: listed in file AUTHORS in main folder
|
||||
*
|
||||
* License: GNU General Public License v2.0 or later
|
||||
* Full text of license available in license.txt file, in main folder
|
||||
*
|
||||
*/
|
||||
#include "StdInc.h"
|
||||
#include "TextOperations.h"
|
||||
|
||||
#include "CGeneralTextHandler.h"
|
||||
|
||||
#include <boost/locale.hpp>
|
||||
|
||||
VCMI_LIB_NAMESPACE_BEGIN
|
||||
|
||||
size_t Unicode::getCharacterSize(char firstByte)
|
||||
{
|
||||
// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
|
||||
// 0xxxxxxx -> 1 - ASCII chars
|
||||
// 110xxxxx -> 2
|
||||
// 11110xxx -> 4 - last allowed in current standard
|
||||
// 1111110x -> 6 - last allowed in original standard
|
||||
|
||||
if ((ui8)firstByte < 0x80)
|
||||
return 1; // ASCII
|
||||
|
||||
size_t ret = 0;
|
||||
|
||||
for (size_t i=0; i<8; i++)
|
||||
{
|
||||
if (((ui8)firstByte & (0x80 >> i)) != 0)
|
||||
ret++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Unicode::isValidCharacter(const char * character, size_t maxSize)
|
||||
{
|
||||
// can't be first byte in UTF8
|
||||
if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
|
||||
return false;
|
||||
// first character must follow rules checked in getCharacterSize
|
||||
size_t size = getCharacterSize((ui8)character[0]);
|
||||
|
||||
if ((ui8)character[0] > 0xF4)
|
||||
return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
|
||||
|
||||
if (size > maxSize)
|
||||
return false;
|
||||
|
||||
// remaining characters must have highest bit set to 1
|
||||
for (size_t i = 1; i < size; i++)
|
||||
{
|
||||
if (((ui8)character[i] & 0x80) == 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Unicode::isValidASCII(const std::string & text)
|
||||
{
|
||||
for (const char & ch : text)
|
||||
if (ui8(ch) >= 0x80 )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Unicode::isValidASCII(const char * data, size_t size)
|
||||
{
|
||||
for (size_t i=0; i<size; i++)
|
||||
if (ui8(data[i]) >= 0x80 )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Unicode::isValidString(const std::string & text)
|
||||
{
|
||||
for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
|
||||
{
|
||||
if (!isValidCharacter(text.data() + i, text.size() - i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Unicode::isValidString(const char * data, size_t size)
|
||||
{
|
||||
for (size_t i=0; i<size; i += getCharacterSize(data[i]))
|
||||
{
|
||||
if (!isValidCharacter(data + i, size - i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string Unicode::toUnicode(const std::string &text)
|
||||
{
|
||||
return toUnicode(text, CGeneralTextHandler::getInstalledEncoding());
|
||||
}
|
||||
|
||||
std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
|
||||
{
|
||||
return boost::locale::conv::to_utf<char>(text, encoding);
|
||||
}
|
||||
|
||||
std::string Unicode::fromUnicode(const std::string & text)
|
||||
{
|
||||
return fromUnicode(text, CGeneralTextHandler::getInstalledEncoding());
|
||||
}
|
||||
|
||||
std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
|
||||
{
|
||||
return boost::locale::conv::from_utf<char>(text, encoding);
|
||||
}
|
||||
|
||||
void Unicode::trimRight(std::string & text, const size_t amount)
|
||||
{
|
||||
if(text.empty())
|
||||
return;
|
||||
//todo: more efficient algorithm
|
||||
for(int i = 0; i< amount; i++){
|
||||
auto b = text.begin();
|
||||
auto e = text.end();
|
||||
size_t lastLen = 0;
|
||||
size_t len = 0;
|
||||
while (b != e) {
|
||||
lastLen = len;
|
||||
size_t n = getCharacterSize(*b);
|
||||
|
||||
if(!isValidCharacter(&(*b),e-b))
|
||||
{
|
||||
logGlobal->error("Invalid UTF8 sequence");
|
||||
break;//invalid sequence will be trimmed
|
||||
}
|
||||
|
||||
len += n;
|
||||
b += n;
|
||||
}
|
||||
|
||||
text.resize(lastLen);
|
||||
}
|
||||
}
|
||||
|
||||
VCMI_LIB_NAMESPACE_END
|
45
lib/TextOperations.h
Normal file
45
lib/TextOperations.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* TextOperations.h, part of VCMI engine
|
||||
*
|
||||
* Authors: listed in file AUTHORS in main folder
|
||||
*
|
||||
* License: GNU General Public License v2.0 or later
|
||||
* Full text of license available in license.txt file, in main folder
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
VCMI_LIB_NAMESPACE_BEGIN
|
||||
|
||||
/// Namespace that provides utilites for unicode support (UTF-8)
|
||||
namespace Unicode
|
||||
{
|
||||
/// evaluates size of UTF-8 character
|
||||
size_t DLL_LINKAGE getCharacterSize(char firstByte);
|
||||
|
||||
/// test if character is a valid UTF-8 symbol
|
||||
/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string)
|
||||
bool DLL_LINKAGE isValidCharacter(const char * character, size_t maxSize);
|
||||
|
||||
/// test if text contains ASCII-string (no need for unicode conversion)
|
||||
bool DLL_LINKAGE isValidASCII(const std::string & text);
|
||||
bool DLL_LINKAGE isValidASCII(const char * data, size_t size);
|
||||
|
||||
/// test if text contains valid UTF-8 sequence
|
||||
bool DLL_LINKAGE isValidString(const std::string & text);
|
||||
bool DLL_LINKAGE isValidString(const char * data, size_t size);
|
||||
|
||||
/// converts text to unicode from specified encoding or from one specified in settings
|
||||
std::string DLL_LINKAGE toUnicode(const std::string & text);
|
||||
std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);
|
||||
|
||||
/// converts text from unicode to specified encoding or to one specified in settings
|
||||
/// NOTE: usage of these functions should be avoided if possible
|
||||
std::string DLL_LINKAGE fromUnicode(const std::string & text);
|
||||
std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);
|
||||
|
||||
///delete (amount) UTF characters from right
|
||||
DLL_LINKAGE void trimRight(std::string & text, size_t amount = 1);
|
||||
};
|
||||
|
||||
VCMI_LIB_NAMESPACE_END
|
@ -11,7 +11,7 @@
|
||||
#include "CBinaryReader.h"
|
||||
|
||||
#include "CInputStream.h"
|
||||
#include "../CGeneralTextHandler.h"
|
||||
#include "../TextOperations.h"
|
||||
|
||||
VCMI_LIB_NAMESPACE_BEGIN
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user