1
0
mirror of https://github.com/vcmi/vcmi.git synced 2024-12-06 09:46:31 +02:00
vcmi/lib/CGeneralTextHandler.cpp

494 lines
12 KiB
C++
Raw Normal View History

#include "StdInc.h"
#include "CGeneralTextHandler.h"
#include <boost/locale.hpp>
#include "filesystem/Filesystem.h"
#include "CConfigHandler.h"
#include "CModHandler.h"
#include "GameConstants.h"
#include "VCMI_Lib.h"
/*
* CGeneralTextHandler.cpp, part of VCMI engine
*
* Authors: listed in file AUTHORS in main folder
*
* License: GNU General Public License v2.0 or later
* Full text of license available in license.txt file, in main folder
*
*/
size_t Unicode::getCharacterSize(char firstByte)
{
// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1:
// 0xxxxxxx -> 1 - ASCII chars
// 110xxxxx -> 2
// 11110xxx -> 4 - last allowed in current standard
// 1111110x -> 6 - last allowed in original standard
if ((ui8)firstByte < 0x80)
return 1; // ASCII
size_t ret = 0;
for (size_t i=0; i<8; i++)
{
if (((ui8)firstByte & (0x80 >> i)) != 0)
ret++;
else
break;
}
return ret;
}
bool Unicode::isValidCharacter(const char * character, size_t maxSize)
{
// can't be first byte in UTF8
if ((ui8)character[0] >= 0x80 && (ui8)character[0] < 0xC0)
return false;
// first character must follow rules checked in getCharacterSize
size_t size = getCharacterSize((ui8)character[0]);
if ((ui8)character[0] > 0xF4)
return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF)
if (size > maxSize)
return false;
// remaining characters must have highest bit set to 1
for (size_t i = 1; i < size; i++)
{
if (((ui8)character[i] & 0x80) == 0)
return false;
}
return true;
}
bool Unicode::isValidASCII(const std::string & text)
{
for (const char & ch : text)
if (ui8(ch) >= 0x80 )
return false;
return true;
}
bool Unicode::isValidASCII(const char * data, size_t size)
{
for (size_t i=0; i<size; i++)
if (ui8(data[i]) >= 0x80 )
return false;
return true;
}
bool Unicode::isValidString(const std::string & text)
{
for (size_t i=0; i<text.size(); i += getCharacterSize(text[i]))
{
if (!isValidCharacter(text.data() + i, text.size() - i))
return false;
}
return true;
}
bool Unicode::isValidString(const char * data, size_t size)
{
for (size_t i=0; i<size; i += getCharacterSize(data[i]))
{
if (!isValidCharacter(data + i, size - i))
return false;
}
return true;
}
static std::string getSelectedEncoding()
{
return settings["general"]["encoding"].String();
}
std::string Unicode::toUnicode(const std::string &text)
{
return toUnicode(text, getSelectedEncoding());
}
std::string Unicode::toUnicode(const std::string &text, const std::string &encoding)
{
return boost::locale::conv::to_utf<char>(text, encoding);
}
std::string Unicode::fromUnicode(const std::string & text)
{
return fromUnicode(text, getSelectedEncoding());
}
std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding)
{
return boost::locale::conv::from_utf<char>(text, encoding);
}
void Unicode::trimRight(std::string & text, const size_t amount/* =1 */)
{
if(text.empty())
return;
//todo: more efficient algorithm
for(int i = 0; i< amount; i++){
2016-08-12 11:10:27 +02:00
auto b = text.begin();
auto e = text.end();
size_t lastLen = 0;
size_t len = 0;
while (b != e) {
lastLen = len;
size_t n = getCharacterSize(*b);
if(!isValidCharacter(&(*b),e-b))
2016-08-12 11:10:27 +02:00
{
logGlobal->error("Invalid UTF8 sequence");
break;//invalid sequence will be trimmed
}
len += n;
b += n;
2016-08-12 11:10:27 +02:00
}
text.resize(lastLen);
}
}
//Helper for string -> float conversion
class LocaleWithComma: public std::numpunct<char>
{
protected:
char do_decimal_point() const override
{
return ',';
}
};
CLegacyConfigParser::CLegacyConfigParser(std::string URI)
{
init(CResourceHandler::get()->load(ResourceID(URI, EResType::TEXT)));
}
CLegacyConfigParser::CLegacyConfigParser(const std::unique_ptr<CInputStream> & input)
{
init(input);
}
void CLegacyConfigParser::init(const std::unique_ptr<CInputStream> & input)
{
data.reset(new char[input->getSize()]);
input->read((ui8*)data.get(), input->getSize());
curr = data.get();
end = curr + input->getSize();
}
std::string CLegacyConfigParser::extractQuotedPart()
{
assert(*curr == '\"');
curr++; // skip quote
char * begin = curr;
while (curr != end && *curr != '\"' && *curr != '\t')
curr++;
return std::string(begin, curr++); //increment curr to close quote
}
std::string CLegacyConfigParser::extractQuotedString()
{
assert(*curr == '\"');
std::string ret;
while (true)
{
ret += extractQuotedPart();
// double quote - add it to string and continue quoted part
if (curr < end && *curr == '\"')
{
ret += '\"';
}
//extract normal part
else if(curr < end && *curr != '\t' && *curr != '\r')
{
char * begin = curr;
while (curr < end && *curr != '\t' && *curr != '\r' && *curr != '\"')//find end of string or next quoted part start
2016-08-12 11:10:27 +02:00
curr++;
ret += std::string(begin, curr);
2016-08-12 11:10:27 +02:00
if(curr>=end || *curr != '\"')
return ret;
2016-08-12 11:10:27 +02:00
}
else // end of string
return ret;
}
}
std::string CLegacyConfigParser::extractNormalString()
{
char * begin = curr;
while (curr < end && *curr != '\t' && *curr != '\r')//find end of string
curr++;
return std::string(begin, curr);
}
std::string CLegacyConfigParser::readRawString()
{
if (curr >= end || *curr == '\n')
return "";
std::string ret;
if (*curr == '\"')
ret = extractQuotedString();// quoted text - find closing quote
else
ret = extractNormalString();//string without quotes - copy till \t or \r
curr++;
return ret;
}
std::string CLegacyConfigParser::readString()
{
// do not convert strings that are already in ASCII - this will only slow down loading process
std::string str = readRawString();
if (Unicode::isValidASCII(str))
return str;
return Unicode::toUnicode(str);
}
float CLegacyConfigParser::readNumber()
{
std::string input = readRawString();
std::istringstream stream(input);
if (input.find(',') != std::string::npos) // code to handle conversion with comma as decimal separator
stream.imbue(std::locale(std::locale(), new LocaleWithComma));
float result;
if ( !(stream >> result) )
return 0;
return result;
}
bool CLegacyConfigParser::isNextEntryEmpty() const
{
char * nextSymbol = curr;
while (nextSymbol < end && *nextSymbol == ' ')
nextSymbol++; //find next meaningfull symbol
return nextSymbol >= end || *nextSymbol == '\n' || *nextSymbol == '\r' || *nextSymbol == '\t';
}
bool CLegacyConfigParser::endLine()
{
while (curr < end && *curr != '\n')
readString();
curr++;
return curr < end;
}
2013-02-16 19:37:43 +03:00
void CGeneralTextHandler::readToVector(std::string sourceName, std::vector<std::string> & dest)
{
CLegacyConfigParser parser(sourceName);
do
{
dest.push_back(parser.readString());
}
while (parser.endLine());
}
CGeneralTextHandler::CGeneralTextHandler()
{
readToVector("DATA/VCDESC.TXT", victoryConditions);
readToVector("DATA/LCDESC.TXT", lossCondtions);
readToVector("DATA/TCOMMAND.TXT", tcommands);
readToVector("DATA/HALLINFO.TXT", hcommands);
readToVector("DATA/CASTINFO.TXT", fcommands);
readToVector("DATA/ADVEVENT.TXT", advobtxt);
readToVector("DATA/XTRAINFO.TXT", xtrainfo);
readToVector("DATA/RESTYPES.TXT", restypes);
readToVector("DATA/TERRNAME.TXT", terrainNames);
readToVector("DATA/RANDSIGN.TXT", randsign);
readToVector("DATA/CRGEN1.TXT", creGens);
readToVector("DATA/CRGEN4.TXT", creGens4);
readToVector("DATA/OVERVIEW.TXT", overview);
readToVector("DATA/ARRAYTXT.TXT", arraytxt);
readToVector("DATA/PRISKILL.TXT", primarySkillNames);
readToVector("DATA/JKTEXT.TXT", jktexts);
readToVector("DATA/TVRNINFO.TXT", tavernInfo);
readToVector("DATA/RANDTVRN.TXT", tavernRumors);
readToVector("DATA/TURNDUR.TXT", turnDurations);
readToVector("DATA/HEROSCRN.TXT", heroscrn);
readToVector("DATA/TENTCOLR.TXT", tentColors);
readToVector("DATA/SKILLLEV.TXT", levels);
localizedTexts = JsonNode(ResourceID("config/translate.json", EResType::TEXT));
{
CLegacyConfigParser parser("DATA/GENRLTXT.TXT");
parser.endLine();
do
{
allTexts.push_back(parser.readString());
}
while (parser.endLine());
}
{
CLegacyConfigParser parser("DATA/HELP.TXT");
do
{
std::string first = parser.readString();
std::string second = parser.readString();
zelp.push_back(std::make_pair(first, second));
}
while (parser.endLine());
}
{
CLegacyConfigParser nameParser("DATA/MINENAME.TXT");
CLegacyConfigParser eventParser("DATA/MINEEVNT.TXT");
do
{
std::string name = nameParser.readString();
std::string event = eventParser.readString();
mines.push_back(std::make_pair(name, event));
}
while (nameParser.endLine() && eventParser.endLine());
}
{
CLegacyConfigParser parser("DATA/PLCOLORS.TXT");
do
{
std::string color = parser.readString();
colors.push_back(color);
color[0] = toupper(color[0]);
capColors.push_back(color);
}
while (parser.endLine());
}
{
CLegacyConfigParser parser("DATA/SSTRAITS.TXT");
//skip header
parser.endLine();
parser.endLine();
do
{
skillName.push_back(parser.readString());
skillInfoTexts.push_back(std::vector<std::string>());
for(int j = 0; j < 3; j++)
skillInfoTexts.back().push_back(parser.readString());
}
while (parser.endLine());
}
{
CLegacyConfigParser parser("DATA/SEERHUT.TXT");
//skip header
parser.endLine();
for (int i = 0; i < 6; ++i)
seerEmpty.push_back(parser.readString());
parser.endLine();
quests.resize(10);
for (int i = 0; i < 9; ++i) //9 types of quests
2010-01-26 21:43:15 +02:00
{
quests[i].resize(5);
for (int j = 0; j < 5; ++j)
2010-01-26 21:43:15 +02:00
{
parser.readString(); //front description
for (int k = 0; k < 6; ++k)
quests[i][j].push_back(parser.readString());
parser.endLine();
2010-01-26 21:43:15 +02:00
}
}
quests[9].resize(1);
for (int k = 0; k < 6; ++k) //Time limit
{
quests[9][0].push_back(parser.readString());
}
parser.endLine();
parser.endLine(); // empty line
parser.endLine(); // header
for (int i = 0; i < 48; ++i)
{
seerNames.push_back(parser.readString());
parser.endLine();
}
}
{
CLegacyConfigParser parser("DATA/CAMPTEXT.TXT");
//skip header
parser.endLine();
std::string text;
do
{
text = parser.readString();
if (!text.empty())
campaignMapNames.push_back(text);
}
while (parser.endLine() && !text.empty());
for (size_t i=0; i<campaignMapNames.size(); i++)
{
do // skip empty space and header
{
text = parser.readString();
}
while (parser.endLine() && text.empty());
campaignRegionNames.push_back(std::vector<std::string>());
do
{
text = parser.readString();
if (!text.empty())
campaignRegionNames.back().push_back(text);
}
while (parser.endLine() && !text.empty());
}
}
if (VLC->modh->modules.STACK_EXP)
{
CLegacyConfigParser parser("DATA/ZCREXP.TXT");
parser.endLine();//header
for (size_t iter=0; iter<325; iter++)
{
parser.readString(); //ignore 1st column with description
zcrexp.push_back(parser.readString());
parser.endLine();
}
// line 325 - some weird formatting here
zcrexp.push_back(parser.readString());
parser.readString();
parser.endLine();
do // rest of file can be read normally
{
parser.readString(); //ignore 1st column with description
zcrexp.push_back(parser.readString());
}
while (parser.endLine());
}
}