mirror of
				https://github.com/vcmi/vcmi.git
				synced 2025-10-31 00:07:39 +02:00 
			
		
		
		
	Unicode support.
- boost-locale library is now required (boost 1.48 or higher) - Unicode namespace that contains UTF-8 handling - All non-ASCII strings from H3 data will be converted to UTF-8 during loading - All JSON files MUST use UTF-8. - H3 data encoding can be selected via launcher or directly in config file
This commit is contained in:
		| @@ -52,7 +52,7 @@ if (APPLE) | ||||
| 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftemplate-depth=256") | ||||
| endif() | ||||
|  | ||||
| find_package(Boost 1.46.0 COMPONENTS program_options filesystem system thread REQUIRED) | ||||
| find_package(Boost 1.48.0 COMPONENTS program_options filesystem system thread locale REQUIRED) | ||||
| find_package(SDL REQUIRED) | ||||
| find_package(SDL_image REQUIRED) | ||||
| find_package(SDL_mixer REQUIRED) | ||||
|   | ||||
| @@ -20,11 +20,12 @@ To compile, the following packages (and their development counterparts) are need | ||||
| 	* zlib and zlib-devel | ||||
| 	* (optional) Qt 5, widget and network modules | ||||
| 	* the ffmpeg libraries (libavformat and libswscale). Their name could be libavformat-devel and libswscale-devel, or ffmpeg-libs-devel or similar names. | ||||
| 	* boost c++ libraries v1.46+ (www.boost.org): | ||||
| 	* boost c++ libraries v1.48+ (www.boost.org): | ||||
| 		- program-options | ||||
| 		- filesystem | ||||
| 		- system | ||||
| 		- thread | ||||
| 		- locale | ||||
|  | ||||
| On Debian-based systems (e.g. Ubuntu) run: | ||||
|   sudo apt-get install cmake g++ libsdl1.2debian libsdl-image1.2-dev libsdl-ttf2.0-dev libsdl-mixer1.2-dev zlib1g-dev libavformat-dev libswscale-dev libboost-dev libboost-filesystem-dev libboost-system-dev libboost-thread-dev libboost-program-options-dev | ||||
|   | ||||
| @@ -149,7 +149,7 @@ std::vector<std::string> CMessage::breakText( std::string text, size_t maxLineWi | ||||
| 		// loops till line is full or end of text reached | ||||
| 		while(currPos < text.length()  &&  text[currPos] != 0x0a  &&  lineWidth < maxLineWidth) | ||||
| 		{ | ||||
| 			symbolSize = graphics->fonts[font]->getCharacterSize(text[currPos]); | ||||
| 			symbolSize = Unicode::getCharacterSize(text[currPos]); | ||||
| 			glyphWidth = graphics->fonts[font]->getGlyphWidth(text.data() + currPos); | ||||
|  | ||||
| 			// candidate for line break | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include "../../lib/JsonNode.h" | ||||
| #include "../../lib/vcmi_endian.h" | ||||
| #include "../../lib/filesystem/Filesystem.h" | ||||
| #include "../../lib/CGeneralTextHandler.h" | ||||
|  | ||||
| /* | ||||
|  * Fonts.cpp, part of VCMI engine | ||||
| @@ -22,7 +23,7 @@ size_t IFont::getStringWidth(const std::string & data) const | ||||
| { | ||||
| 	size_t width = 0; | ||||
|  | ||||
| 	for(size_t i=0; i<data.size(); i += getCharacterSize(data[i])) | ||||
| 	for(size_t i=0; i<data.size(); i += Unicode::getCharacterSize(data[i])) | ||||
| 	{ | ||||
| 		width += getGlyphWidth(data.data() + i); | ||||
| 	} | ||||
| @@ -81,9 +82,9 @@ void IFont::renderTextLinesCenter(SDL_Surface * surface, const std::vector<std:: | ||||
| 	} | ||||
| } | ||||
|  | ||||
| std::array<CBitmapFont::Char, CBitmapFont::totalChars> CBitmapFont::loadChars() const | ||||
| std::array<CBitmapFont::BitmapChar, CBitmapFont::totalChars> CBitmapFont::loadChars() const | ||||
| { | ||||
| 	std::array<Char, totalChars> ret; | ||||
| 	std::array<BitmapChar, totalChars> ret; | ||||
|  | ||||
| 	size_t offset = 32; | ||||
|  | ||||
| @@ -117,11 +118,17 @@ size_t CBitmapFont::getLineHeight() const | ||||
|  | ||||
| size_t CBitmapFont::getGlyphWidth(const char * data) const | ||||
| { | ||||
| 	const Char & ch = chars[ui8(*data)]; | ||||
| 	return ch.leftOffset + ch.width + ch.rightOffset; | ||||
| 	std::string localChar = Unicode::fromUnicode(std::string(data, Unicode::getCharacterSize(data[0]))); | ||||
|  | ||||
| 	if (localChar.size() == 1) | ||||
| 	{ | ||||
| 		const BitmapChar & ch = chars[ui8(localChar[0])]; | ||||
| 		return ch.leftOffset + ch.width + ch.rightOffset; | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| void CBitmapFont::renderCharacter(SDL_Surface * surface, const Char & character, const SDL_Color & color, int &posX, int &posY) const | ||||
| void CBitmapFont::renderCharacter(SDL_Surface * surface, const BitmapChar & character, const SDL_Color & color, int &posX, int &posY) const | ||||
| { | ||||
| 	Rect clipRect; | ||||
| 	SDL_GetClipRect(surface, &clipRect); | ||||
| @@ -186,19 +193,17 @@ void CBitmapFont::renderText(SDL_Surface * surface, const std::string & data, co | ||||
| 	//assert(data[data.size()-1] != '}'); | ||||
|  | ||||
| 	SDL_LockSurface(surface); | ||||
| 	// for each symbol | ||||
| 	for(auto & elem : data) | ||||
|  | ||||
| 	for(size_t i=0; i<data.size(); i += Unicode::getCharacterSize(data[i])) | ||||
| 	{ | ||||
| 		renderCharacter(surface, chars[ui8(elem)], color, posX, posY); | ||||
| 		std::string localChar = Unicode::fromUnicode(data.substr(i, Unicode::getCharacterSize(data[i]))); | ||||
|  | ||||
| 		if (localChar.size() == 1) | ||||
| 			renderCharacter(surface, chars[ui8(localChar[0])], color, posX, posY); | ||||
| 	} | ||||
| 	SDL_UnlockSurface(surface); | ||||
| } | ||||
|  | ||||
| size_t CBitmapFont::getCharacterSize(char data) const | ||||
| { | ||||
| 	return 1; | ||||
| } | ||||
|  | ||||
| std::pair<std::unique_ptr<ui8[]>, ui64> CTrueTypeFont::loadData(const JsonNode & config) | ||||
| { | ||||
| 	std::string filename = "Data/" + config["file"].String(); | ||||
| @@ -246,15 +251,18 @@ size_t CTrueTypeFont::getLineHeight() const | ||||
|  | ||||
| size_t CTrueTypeFont::getGlyphWidth(const char *data) const | ||||
| { | ||||
| 	return getStringWidth(std::string(data, Unicode::getCharacterSize(*data))); | ||||
| 	/* | ||||
| 	int advance; | ||||
| 	TTF_GlyphMetrics(font.get(), *data, nullptr, nullptr, nullptr, nullptr, &advance); | ||||
| 	return advance; | ||||
| 	*/ | ||||
| } | ||||
|  | ||||
| size_t CTrueTypeFont::getStringWidth(const std::string & data) const | ||||
| { | ||||
| 	int width; | ||||
| 	TTF_SizeText(font.get(), data.c_str(), &width, nullptr); | ||||
| 	TTF_SizeUTF8(font.get(), data.c_str(), &width, nullptr); | ||||
| 	return width; | ||||
| } | ||||
|  | ||||
| @@ -282,11 +290,6 @@ void CTrueTypeFont::renderText(SDL_Surface * surface, const std::string & data, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| size_t CTrueTypeFont::getCharacterSize(char data) const | ||||
| { | ||||
| 	return 1; | ||||
| } | ||||
|  | ||||
| size_t CBitmapHanFont::getCharacterDataOffset(size_t index) const | ||||
| { | ||||
| 	size_t rowSize  = (size + 7) / 8; // 1 bit per pixel, rounded up | ||||
| @@ -350,12 +353,15 @@ void CBitmapHanFont::renderText(SDL_Surface * surface, const std::string & data, | ||||
|  | ||||
| 	SDL_LockSurface(surface); | ||||
|  | ||||
| 	for(size_t i=0; i<data.size(); i += getCharacterSize(data[i])) | ||||
| 	for(size_t i=0; i<data.size(); i += Unicode::getCharacterSize(data[i])) | ||||
| 	{ | ||||
| 		if (ui8(data[i]) < 0x80) | ||||
| 			fallback->renderCharacter(surface, fallback->chars[data[i]], color, posX, posY); | ||||
| 		else | ||||
| 			renderCharacter(surface, getCharacterIndex(data[i], data[i+1]), color, posX, posY); | ||||
| 		std::string localChar = Unicode::fromUnicode(data.substr(i, Unicode::getCharacterSize(data[i]))); | ||||
|  | ||||
| 		if (localChar.size() == 1) | ||||
| 			fallback->renderCharacter(surface, fallback->chars[ui8(localChar[0])], color, posX, posY); | ||||
|  | ||||
| 		if (localChar.size() == 2) | ||||
| 			renderCharacter(surface, getCharacterIndex(localChar[0], localChar[1]), color, posX, posY); | ||||
| 	} | ||||
| 	SDL_UnlockSurface(surface); | ||||
| } | ||||
| @@ -368,25 +374,24 @@ CBitmapHanFont::CBitmapHanFont(const JsonNode &config): | ||||
| 	// basic tests to make sure that fonts are OK | ||||
| 	// 1) fonts must contain 190 "sections", 126 symbols each. | ||||
| 	assert(getCharacterIndex(0xfe, 0xff) == 190*126); | ||||
| 	// ensure that font size is correct - enough to fit all possible symbols | ||||
| 	// 2) ensure that font size is correct - enough to fit all possible symbols | ||||
| 	assert(getCharacterDataOffset(getCharacterIndex(0xfe, 0xff)) == data.second); | ||||
| } | ||||
|  | ||||
| size_t CBitmapHanFont::getLineHeight() const | ||||
| { | ||||
| 	return size + 1; | ||||
| 	return std::max(size + 1, fallback->getLineHeight()); | ||||
| } | ||||
|  | ||||
| size_t CBitmapHanFont::getGlyphWidth(const char * data) const | ||||
| { | ||||
| 	if (ui8(data[0]) < 0x80) | ||||
| 		return fallback->getGlyphWidth(data); | ||||
| 	return size + 1; | ||||
| } | ||||
| 	std::string localChar = Unicode::fromUnicode(std::string(data, Unicode::getCharacterSize(data[0]))); | ||||
|  | ||||
| size_t CBitmapHanFont::getCharacterSize(char data) const | ||||
| { | ||||
| 	if (ui8(data) < 0x80) | ||||
| 		return 1; | ||||
| 	return 2; | ||||
| 	if (localChar.size() == 1) | ||||
| 		return fallback->getGlyphWidth(data); | ||||
|  | ||||
| 	if (localChar.size() == 2) | ||||
| 		return size + 1; | ||||
|  | ||||
| 	return 0; | ||||
| } | ||||
|   | ||||
| @@ -35,9 +35,6 @@ public: | ||||
| 	virtual size_t getLineHeight() const = 0; | ||||
| 	/// Returns width, in pixels of a character glyph. Pointer must contain at least characterSize valid bytes | ||||
| 	virtual size_t getGlyphWidth(const char * data) const = 0; | ||||
| 	/// Returns size (in bytes) of one char in current encoding, may be bigger than one for non-ascii | ||||
| 	/// TODO: move it out of this class. Separate entity for handling localization/different encodings? | ||||
| 	virtual size_t getCharacterSize(char data) const = 0; | ||||
| 	/// Return width of the string | ||||
| 	virtual size_t getStringWidth(const std::string & data) const; | ||||
|  | ||||
| @@ -66,7 +63,7 @@ class CBitmapFont : public IFont | ||||
| { | ||||
| 	static const size_t totalChars = 256; | ||||
|  | ||||
| 	struct Char | ||||
| 	struct BitmapChar | ||||
| 	{ | ||||
| 		si32 leftOffset; | ||||
| 		ui32 width; | ||||
| @@ -76,12 +73,12 @@ class CBitmapFont : public IFont | ||||
|  | ||||
| 	const std::pair<std::unique_ptr<ui8[]>, ui64> data; | ||||
|  | ||||
| 	const std::array<Char, totalChars> chars; | ||||
| 	const std::array<BitmapChar, totalChars> chars; | ||||
| 	const ui8 height; | ||||
|  | ||||
| 	std::array<Char, totalChars> loadChars() const; | ||||
| 	std::array<BitmapChar, totalChars> loadChars() const; | ||||
|  | ||||
| 	void renderCharacter(SDL_Surface * surface, const Char & character, const SDL_Color & color, int &posX, int &posY) const; | ||||
| 	void renderCharacter(SDL_Surface * surface, const BitmapChar & character, const SDL_Color & color, int &posX, int &posY) const; | ||||
|  | ||||
| 	void renderText(SDL_Surface * surface, const std::string & data, const SDL_Color & color, const Point & pos) const override; | ||||
| public: | ||||
| @@ -89,7 +86,6 @@ public: | ||||
|  | ||||
| 	size_t getLineHeight() const override; | ||||
| 	size_t getGlyphWidth(const char * data) const override; | ||||
| 	size_t getCharacterSize(char data) const override; | ||||
|  | ||||
| 	friend class CBitmapHanFont; | ||||
| }; | ||||
| @@ -114,7 +110,6 @@ public: | ||||
|  | ||||
| 	size_t getLineHeight() const override; | ||||
| 	size_t getGlyphWidth(const char * data) const override; | ||||
| 	size_t getCharacterSize(char data) const override; | ||||
| }; | ||||
|  | ||||
| class CTrueTypeFont : public IFont | ||||
| @@ -134,6 +129,5 @@ public: | ||||
|  | ||||
| 	size_t getLineHeight() const override; | ||||
| 	size_t getGlyphWidth(const char * data) const override; | ||||
| 	size_t getCharacterSize(char data) const override; | ||||
| 	size_t getStringWidth(const std::string & data) const override; | ||||
| }; | ||||
|   | ||||
| @@ -41,7 +41,7 @@ | ||||
| 				}, | ||||
| 				"encoding" : { | ||||
| 					"type" : "string", | ||||
| 					"default" : "native" | ||||
| 					"default" : "CP1252" | ||||
| 				} | ||||
| 			} | ||||
| 		}, | ||||
|   | ||||
| @@ -5,6 +5,19 @@ | ||||
| #include "../../lib/CConfigHandler.h" | ||||
| #include "../../lib/VCMIDirs.h" | ||||
|  | ||||
| /// List of encoding which can be selected from Launcher. | ||||
| /// Note that it is possible to specify enconding manually in settings.json | ||||
| static const std::string knownEncodingsList[] = //TODO: remove hardcode | ||||
| { | ||||
|     // European Windows-125X encodings | ||||
|     "CP1250", // West European, covers mostly Slavic languages that use latin script | ||||
|     "CP1251", // Covers languages that use cyrillic scrypt | ||||
|     "CP1252", // Latin/East European, covers most of latin languages | ||||
|     // Chinese encodings | ||||
|     "GBK",    // extension of GB2312, also known as CP936 | ||||
|     "GB2312"  // basic set for Simplified Chinese. Separate from GBK to allow proper detection of H3 fonts | ||||
| }; | ||||
|  | ||||
| void CSettingsView::loadSettings() | ||||
| { | ||||
| 	int resX = settings["video"]["screenRes"]["width"].Float(); | ||||
| @@ -37,6 +50,11 @@ void CSettingsView::loadSettings() | ||||
| 	for (auto string : VCMIDirs::get().dataPaths()) | ||||
| 		dataDirs += QString::fromUtf8(string.c_str()); | ||||
| 	ui->lineEditGameDir->setText(dataDirs.join(':')); | ||||
|  | ||||
| 	std::string encoding = settings["general"]["encoding"].String(); | ||||
| 	size_t encodingIndex = boost::range::find(knownEncodingsList, encoding) - knownEncodingsList; | ||||
| 	if (encodingIndex < ui->comboBoxEncoding->count()) | ||||
| 		ui->comboBoxEncoding->setCurrentIndex(encodingIndex); | ||||
| } | ||||
|  | ||||
| CSettingsView::CSettingsView(QWidget *parent) : | ||||
| @@ -112,13 +130,6 @@ void CSettingsView::on_plainTextEditRepos_textChanged() | ||||
|  | ||||
| void CSettingsView::on_comboBoxEncoding_currentIndexChanged(int index) | ||||
| { | ||||
| 	std::string encodings[] = | ||||
| 	{ | ||||
| 	    "native", // right now indicates disabled unicode, may be removed in future | ||||
| 	    "CP1250", "CP1251", "CP1252", // european Windows-125X encoding | ||||
| 	    "GBK", "gb2312"  // chinese, aka CP936. Same encoding rules but different font files. | ||||
| 	}; | ||||
|  | ||||
| 	Settings node = settings.write["general"]["encoding"]; | ||||
| 	node->String() = encodings[index]; | ||||
| 	node->String() = knownEncodingsList[index]; | ||||
| } | ||||
|   | ||||
| @@ -341,11 +341,6 @@ | ||||
|    </item> | ||||
|    <item row="6" column="4"> | ||||
|     <widget class="QComboBox" name="comboBoxEncoding"> | ||||
|      <item> | ||||
|       <property name="text"> | ||||
|        <string>Native (unicode disabled)</string> | ||||
|       </property> | ||||
|      </item> | ||||
|      <item> | ||||
|       <property name="text"> | ||||
|        <string>Central European (Windows 1250)</string> | ||||
|   | ||||
| @@ -1,12 +1,13 @@ | ||||
| #include "StdInc.h" | ||||
| #include "CGeneralTextHandler.h" | ||||
|  | ||||
| #include "filesystem/Filesystem.h" | ||||
| #include "GameConstants.h" | ||||
| #include "CModHandler.h" | ||||
| #include "VCMI_Lib.h" | ||||
| #include <boost/locale.hpp> | ||||
|  | ||||
| // #include <locale> //needed? | ||||
| #include "filesystem/Filesystem.h" | ||||
| #include "CConfigHandler.h" | ||||
| #include "CModHandler.h" | ||||
| #include "GameConstants.h" | ||||
| #include "VCMI_Lib.h" | ||||
|  | ||||
| /* | ||||
|  * CGeneralTextHandler.cpp, part of VCMI engine | ||||
| @@ -18,6 +19,110 @@ | ||||
|  * | ||||
|  */ | ||||
|  | ||||
| size_t Unicode::getCharacterSize(ui8 firstByte) | ||||
| { | ||||
| 	// length of utf-8 character can be determined from 1st byte by counting number of highest bits set to 1: | ||||
| 	// 0xxxxxxx -> 1 -  ASCII chars | ||||
| 	// 110xxxxx -> 2 | ||||
| 	// 11110xxx -> 4 - last allowed in current standard | ||||
| 	// 1111110x -> 6 - last allowed in original standard | ||||
|  | ||||
| 	if (firstByte < 0x80) | ||||
| 		return 1; // ASCII | ||||
|  | ||||
| 	size_t ret = 0; | ||||
|  | ||||
| 	for (size_t i=0; i<8; i++) | ||||
| 	{ | ||||
| 		if ((firstByte & (0x80 >> i)) != 0) | ||||
| 			ret++; | ||||
| 		else | ||||
| 			break; | ||||
| 	} | ||||
| 	return ret; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidCharacter(const ui8 *character, size_t maxSize) | ||||
| { | ||||
| 	// first character must follow rules checked in getCharacterSize | ||||
| 	size_t size = getCharacterSize(character[0]); | ||||
|  | ||||
| 	if (character[0] > 0xF4) | ||||
| 		return false; // above maximum allowed in standard (UTF codepoints are capped at 0x0010FFFF) | ||||
|  | ||||
| 	if (size > maxSize) | ||||
| 		return false; | ||||
|  | ||||
| 	// remaining characters must have highest bit set to 1 | ||||
| 	for (size_t i = 1; i < size; i++) | ||||
| 	{ | ||||
| 		if ((character[i] & 0x80) == 0) | ||||
| 			return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidASCII(const std::string & text) | ||||
| { | ||||
| 	for (const char & ch : text) | ||||
| 		if (ui8(ch) >= 0x80 ) | ||||
| 			return false; | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidASCII(const char * data, size_t size) | ||||
| { | ||||
| 	for (size_t i=0; i<size; i++) | ||||
| 		if (ui8(data[i]) >= 0x80 ) | ||||
| 			return false; | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidString(const std::string & text) | ||||
| { | ||||
| 	for (size_t i=0; i<text.size(); i += getCharacterSize(text[i])) | ||||
| 	{ | ||||
| 		if (!isValidCharacter(reinterpret_cast<const ui8*>(text.data() + i), text.size() - i)) | ||||
| 			return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| bool Unicode::isValidString(const char * data, size_t size) | ||||
| { | ||||
| 	for (size_t i=0; i<size; i += getCharacterSize(data[i])) | ||||
| 	{ | ||||
| 		if (!isValidCharacter(reinterpret_cast<const ui8*>(data + i), size - i)) | ||||
| 			return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
|  | ||||
| static std::string getSelectedEncoding() | ||||
| { | ||||
| 	return settings["general"]["encoding"].String(); | ||||
| } | ||||
|  | ||||
| std::string Unicode::toUnicode(const std::string &text) | ||||
| { | ||||
| 	return toUnicode(text, getSelectedEncoding()); | ||||
| } | ||||
|  | ||||
| std::string Unicode::toUnicode(const std::string &text, const std::string &encoding) | ||||
| { | ||||
| 	return boost::locale::conv::to_utf<char>(text, encoding); | ||||
| } | ||||
|  | ||||
| std::string Unicode::fromUnicode(const std::string & text) | ||||
| { | ||||
| 	return fromUnicode(text, getSelectedEncoding()); | ||||
| } | ||||
|  | ||||
| std::string Unicode::fromUnicode(const std::string &text, const std::string &encoding) | ||||
| { | ||||
| 	return boost::locale::conv::from_utf<char>(text, encoding); | ||||
| } | ||||
|  | ||||
| //Helper for string -> float conversion | ||||
| class LocaleWithComma: public std::numpunct<char> | ||||
| { | ||||
| @@ -90,7 +195,7 @@ std::string CLegacyConfigParser::extractNormalString() | ||||
| 	return std::string(begin, curr); | ||||
| } | ||||
|  | ||||
| std::string CLegacyConfigParser::readString() | ||||
| std::string CLegacyConfigParser::readRawString() | ||||
| { | ||||
| 	if (curr >= end || *curr == '\n') | ||||
| 		return ""; | ||||
| @@ -106,9 +211,18 @@ std::string CLegacyConfigParser::readString() | ||||
| 	return ret; | ||||
| } | ||||
|  | ||||
| std::string CLegacyConfigParser::readString() | ||||
| { | ||||
| 	// do not convert strings that are already in ASCII - this will only slow down loading process | ||||
| 	std::string str = readRawString(); | ||||
| 	if (Unicode::isValidASCII(str)) | ||||
| 		return str; | ||||
| 	return Unicode::toUnicode(str); | ||||
| } | ||||
|  | ||||
| float CLegacyConfigParser::readNumber() | ||||
| { | ||||
| 	std::string input = readString(); | ||||
| 	std::string input = readRawString(); | ||||
|  | ||||
| 	std::istringstream stream(input); | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,34 @@ | ||||
|  * | ||||
|  */ | ||||
|  | ||||
| /// Namespace that provides utilites for unicode support (UTF-8) | ||||
| namespace Unicode | ||||
| { | ||||
| 	/// evaluates size of UTF-8 character | ||||
| 	size_t getCharacterSize(ui8 firstByte); | ||||
|  | ||||
| 	/// test if character is a valid UTF-8 symbol | ||||
| 	/// maxSize - maximum number of bytes this symbol may consist from ( = remainer of string) | ||||
| 	bool isValidCharacter(const ui8 *character, size_t maxSize); | ||||
|  | ||||
| 	/// test if text contains ASCII-string (no need for unicode conversion) | ||||
| 	bool isValidASCII(const std::string & text); | ||||
| 	bool isValidASCII(const char * data, size_t size); | ||||
|  | ||||
| 	/// test if text contains valid UTF-8 sequence | ||||
| 	bool isValidString(const std::string & text); | ||||
| 	bool isValidString(const char * data, size_t size); | ||||
|  | ||||
| 	/// converts text to unicode from specified encoding or from one specified in settings | ||||
| 	std::string toUnicode(const std::string & text); | ||||
| 	std::string toUnicode(const std::string & text, const std::string & encoding); | ||||
|  | ||||
| 	/// converts text from unicode to specified encoding or to one specified in settings | ||||
| 	/// NOTE: usage of these functions should be avoided if possible | ||||
| 	std::string fromUnicode(const std::string & text); | ||||
| 	std::string fromUnicode(const std::string & text, const std::string & encoding); | ||||
| }; | ||||
|  | ||||
| class CInputStream; | ||||
|  | ||||
| /// Parser for any text files from H3 | ||||
| @@ -30,6 +58,8 @@ class CLegacyConfigParser | ||||
| 	/// extracts non-quoted string | ||||
| 	std::string extractNormalString(); | ||||
|  | ||||
| 	/// reads "raw" string without encoding conversion | ||||
| 	std::string readRawString(); | ||||
| public: | ||||
| 	/// read one entry from current line. Return ""/0 if end of line reached | ||||
| 	std::string readString(); | ||||
|   | ||||
| @@ -17,6 +17,7 @@ | ||||
| #include "filesystem/Filesystem.h" | ||||
| #include "VCMI_Lib.h" //for identifier resolution | ||||
| #include "CModHandler.h" | ||||
| #include "CGeneralTextHandler.h" | ||||
|  | ||||
| using namespace JsonDetail; | ||||
|  | ||||
| @@ -417,6 +418,9 @@ JsonNode JsonParser::parse(std::string fileName) | ||||
| { | ||||
| 	JsonNode root; | ||||
|  | ||||
| 	if (!Unicode::isValidString(&input[0], input.size())) | ||||
| 		error("Not a valid UTF-8 file", false); | ||||
|  | ||||
| 	extractValue(root); | ||||
| 	extractWhitespace(false); | ||||
|  | ||||
| @@ -426,8 +430,8 @@ JsonNode JsonParser::parse(std::string fileName) | ||||
|  | ||||
| 	if (!errors.empty()) | ||||
| 	{ | ||||
|         logGlobal->warnStream()<<"File " << fileName << " is not a valid JSON file!"; | ||||
|         logGlobal->warnStream()<<errors; | ||||
| 		logGlobal->warnStream()<<"File " << fileName << " is not a valid JSON file!"; | ||||
| 		logGlobal->warnStream()<<errors; | ||||
| 	} | ||||
| 	return root; | ||||
| } | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
|  | ||||
| #include <SDL_endian.h> | ||||
| #include "CInputStream.h" | ||||
|  | ||||
| #include "../CGeneralTextHandler.h" | ||||
|  | ||||
| #if SDL_BYTEORDER == SDL_BIG_ENDIAN | ||||
| template <typename CData> | ||||
| @@ -41,19 +41,19 @@ void CBinaryReader::setStream(CInputStream * stream) | ||||
|  | ||||
| si64 CBinaryReader::read(ui8 * data, si64 size) | ||||
| { | ||||
| 	return stream->read(data, size); | ||||
| 	si64 bytesRead = stream->read(data, size); | ||||
| 	if(bytesRead != size) | ||||
| 	{ | ||||
| 		throw std::runtime_error(getEndOfStreamExceptionMsg(size)); | ||||
| 	} | ||||
| 	return bytesRead; | ||||
| } | ||||
|  | ||||
| template <typename CData> | ||||
| CData CBinaryReader::readInteger() | ||||
| { | ||||
| 	CData val; | ||||
| 	si64 b = stream->read(reinterpret_cast<unsigned char *>(&val), sizeof(val)); | ||||
| 	if(b < sizeof(val)) | ||||
| 	{ | ||||
| 		throw std::runtime_error(getEndOfStreamExceptionMsg(sizeof(val))); | ||||
| 	} | ||||
|  | ||||
| 	stream->read(reinterpret_cast<unsigned char *>(&val), sizeof(val)); | ||||
| 	return readLE(val); | ||||
| } | ||||
|  | ||||
| @@ -78,15 +78,20 @@ INSTANTIATE(si64, readInt64) | ||||
|  | ||||
| std::string CBinaryReader::readString() | ||||
| { | ||||
|     int len = readUInt32(); | ||||
| 	assert(len >= 0 && len <= 500000); //not too long | ||||
|     std::string ret; | ||||
|     ret.reserve(len); | ||||
|     for(int gg = 0; gg < len; ++gg) | ||||
| 	unsigned int len = readUInt32(); | ||||
| 	assert(len <= 500000); //not too long | ||||
| 	if (len > 0) | ||||
| 	{ | ||||
|         ret += readInt8(); | ||||
| 		std::string ret; | ||||
| 		ret.resize(len); | ||||
| 		read(reinterpret_cast<ui8*>(&ret[0]), len); | ||||
| 		//FIXME: any need to move this into separate "read localized string" method? | ||||
| 		if (Unicode::isValidASCII(ret)) | ||||
| 			return ret; | ||||
| 		return Unicode::toUnicode(ret); | ||||
| 	} | ||||
| 	return ret; | ||||
| 	return ""; | ||||
|  | ||||
| } | ||||
|  | ||||
| void CBinaryReader::skip(int count) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user