1
0
mirror of https://github.com/vcmi/vcmi.git synced 2025-01-06 00:24:11 +02:00
vcmi/lib/json/JsonParser.cpp

602 lines
11 KiB
C++
Raw Normal View History

/*
* JsonParser.cpp, part of VCMI engine
*
* Authors: listed in file AUTHORS in main folder
*
* License: GNU General Public License v2.0 or later
* Full text of license available in license.txt file, in main folder
*
*/
#include "StdInc.h"
#include "JsonParser.h"
2024-02-13 22:19:24 +02:00
#include "../ScopeGuard.h"
#include "../texts/TextOperations.h"
2024-02-13 19:16:35 +02:00
#include "JsonFormatException.h"
VCMI_LIB_NAMESPACE_BEGIN
2024-02-13 22:19:24 +02:00
JsonParser::JsonParser(const std::byte * inputString, size_t stringSize, const JsonParsingSettings & settings)
: settings(settings)
, input(reinterpret_cast<const char *>(inputString), stringSize)
2024-02-13 19:16:35 +02:00
, lineCount(1)
2024-02-13 22:19:24 +02:00
, currentDepth(0)
2024-02-13 19:16:35 +02:00
, lineStart(0)
, pos(0)
{
}
JsonNode JsonParser::parse(const std::string & fileName)
{
JsonNode root;
2024-02-13 19:16:35 +02:00
if(input.empty())
{
error("File is empty", false);
}
else
{
2024-02-13 19:16:35 +02:00
if(!TextOperations::isValidUnicodeString(input.data(), input.size()))
error("Not a valid UTF-8 file", false);
2024-02-20 19:01:06 +02:00
// If file starts with BOM - skip it
uint32_t firstCharacter = TextOperations::getUnicodeCodepoint(input.data(), input.size());
if (firstCharacter == 0xFEFF)
pos += TextOperations::getUnicodeCharacterSize(input[0]);
extractValue(root);
extractWhitespace(false);
//Warn if there are any non-whitespace symbols left
2024-02-13 19:16:35 +02:00
if(pos < input.size())
error("Not all file was parsed!", true);
}
2024-02-13 19:16:35 +02:00
if(!errors.empty())
{
logMod->warn("%s is not valid JSON!", fileName);
logMod->warn(errors);
}
return root;
}
bool JsonParser::isValid()
{
return errors.empty();
}
bool JsonParser::extractSeparator()
{
2024-02-13 19:16:35 +02:00
if(!extractWhitespace())
return false;
2024-02-13 19:16:35 +02:00
if(input[pos] != ':')
return error("Separator expected");
pos++;
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractValue(JsonNode & node)
{
2024-02-13 19:16:35 +02:00
if(!extractWhitespace())
return false;
2024-02-13 19:16:35 +02:00
switch(input[pos])
{
2024-02-13 19:16:35 +02:00
case '\"':
2024-02-19 17:56:58 +02:00
case '\'':
2024-02-13 19:16:35 +02:00
return extractString(node);
case 'n':
return extractNull(node);
case 't':
return extractTrue(node);
case 'f':
return extractFalse(node);
case '{':
return extractStruct(node);
case '[':
return extractArray(node);
case '-':
2024-02-19 17:56:58 +02:00
case '+':
case '.':
2024-02-13 19:16:35 +02:00
return extractFloat(node);
default:
{
2024-02-13 19:16:35 +02:00
if(input[pos] >= '0' && input[pos] <= '9')
return extractFloat(node);
return error("Value expected!");
}
}
}
bool JsonParser::extractWhitespace(bool verbose)
{
2024-02-13 19:07:51 +02:00
//TODO: JSON5 - C-style multi-line comments
//TODO: JSON5 - Additional white space characters are allowed
2024-02-13 19:16:35 +02:00
while(true)
{
while(pos < input.size() && static_cast<ui8>(input[pos]) <= ' ')
{
2024-02-13 19:16:35 +02:00
if(input[pos] == '\n')
{
lineCount++;
2024-02-13 19:16:35 +02:00
lineStart = pos + 1;
}
pos++;
}
2024-02-13 18:08:35 +02:00
2024-02-13 19:16:35 +02:00
if(pos >= input.size() || input[pos] != '/')
break;
2024-02-13 19:16:35 +02:00
if(settings.mode == JsonParsingSettings::JsonFormatMode::JSON)
2024-02-13 18:08:35 +02:00
error("Comments are not permitted in json!", true);
pos++;
2024-02-13 19:16:35 +02:00
if(pos == input.size())
break;
2024-02-13 19:16:35 +02:00
if(input[pos] == '/')
pos++;
else
error("Comments must consist of two slashes!", true);
2024-02-13 19:16:35 +02:00
while(pos < input.size() && input[pos] != '\n')
pos++;
}
2024-02-13 19:16:35 +02:00
if(pos >= input.size() && verbose)
return error("Unexpected end of file!");
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractEscaping(std::string & str)
{
2024-02-13 19:07:51 +02:00
// TODO: support unicode escaping:
// \u1234
switch(input[pos])
{
2024-02-13 19:16:35 +02:00
case '\"':
str += '\"';
break;
case '\\':
str += '\\';
break;
case 'b':
str += '\b';
break;
case 'f':
str += '\f';
break;
case 'n':
str += '\n';
break;
case 'r':
str += '\r';
break;
case 't':
str += '\t';
break;
case '/':
str += '/';
break;
default:
return error("Unknown escape sequence!", true);
}
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractString(std::string & str)
{
2024-02-13 19:07:51 +02:00
//TODO: JSON5 - line breaks escaping
2024-02-13 19:16:35 +02:00
if(settings.mode < JsonParsingSettings::JsonFormatMode::JSON5)
2024-02-13 19:07:51 +02:00
{
2024-02-13 19:16:35 +02:00
if(input[pos] != '\"')
2024-02-13 19:07:51 +02:00
return error("String expected!");
}
else
{
2024-02-13 19:16:35 +02:00
if(input[pos] != '\"' && input[pos] != '\'')
2024-02-13 19:07:51 +02:00
return error("String expected!");
}
char lineTerminator = input[pos];
pos++;
size_t first = pos;
2024-02-13 19:16:35 +02:00
while(pos != input.size())
{
2024-02-13 19:16:35 +02:00
if(input[pos] == lineTerminator) // Correct end of string
{
2024-02-13 19:16:35 +02:00
str.append(&input[first], pos - first);
pos++;
return true;
}
2024-02-13 19:16:35 +02:00
if(input[pos] == '\\') // Escaping
{
2024-02-13 19:16:35 +02:00
str.append(&input[first], pos - first);
pos++;
2024-02-13 19:16:35 +02:00
if(pos == input.size())
break;
extractEscaping(str);
first = pos + 1;
}
2024-02-13 19:16:35 +02:00
if(input[pos] == '\n') // end-of-line
{
2024-02-13 19:16:35 +02:00
str.append(&input[first], pos - first);
return error("Closing quote not found!", true);
}
if(static_cast<unsigned char>(input[pos]) < ' ') // control character
{
2024-02-13 19:16:35 +02:00
str.append(&input[first], pos - first);
first = pos + 1;
error("Illegal character in the string!", true);
}
pos++;
}
return error("Unterminated string!");
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractString(JsonNode & node)
{
std::string str;
2024-02-13 19:16:35 +02:00
if(!extractString(str))
return false;
node.setType(JsonNode::JsonType::DATA_STRING);
node.String() = str;
return true;
}
2024-02-13 19:07:51 +02:00
bool JsonParser::extractLiteral(std::string & literal)
{
2024-02-13 19:16:35 +02:00
while(pos < input.size())
{
2024-02-13 19:16:35 +02:00
bool isUpperCase = input[pos] >= 'A' && input[pos] <= 'Z';
bool isLowerCase = input[pos] >= 'a' && input[pos] <= 'z';
bool isNumber = input[pos] >= '0' && input[pos] <= '9';
2024-02-13 19:07:51 +02:00
2024-02-13 19:16:35 +02:00
if(!isUpperCase && !isLowerCase && !isNumber)
2024-02-13 19:07:51 +02:00
break;
literal += input[pos];
pos++;
}
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractAndCompareLiteral(const std::string & expectedLiteral)
2024-02-13 19:07:51 +02:00
{
std::string literal;
2024-02-13 19:16:35 +02:00
if(!extractLiteral(literal))
2024-02-13 19:07:51 +02:00
return false;
2024-02-13 19:16:35 +02:00
if(literal != expectedLiteral)
2024-02-13 19:07:51 +02:00
{
return error("Expected " + expectedLiteral + ", but unknown literal found", true);
return false;
}
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractNull(JsonNode & node)
{
2024-02-13 19:16:35 +02:00
if(!extractAndCompareLiteral("null"))
return false;
node.clear();
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractTrue(JsonNode & node)
{
2024-02-13 19:16:35 +02:00
if(!extractAndCompareLiteral("true"))
return false;
node.Bool() = true;
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractFalse(JsonNode & node)
{
2024-02-13 19:16:35 +02:00
if(!extractAndCompareLiteral("false"))
return false;
node.Bool() = false;
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractStruct(JsonNode & node)
{
node.setType(JsonNode::JsonType::DATA_STRUCT);
2024-02-13 19:07:51 +02:00
2024-02-13 19:16:35 +02:00
if(currentDepth > settings.maxDepth)
2024-02-13 22:19:24 +02:00
error("Maximum allowed depth of json structure has been reached", true);
2024-02-13 19:07:51 +02:00
pos++;
2024-02-13 22:19:24 +02:00
currentDepth++;
auto guard = vstd::makeScopeGuard([this]()
{
currentDepth--;
});
2024-02-13 19:16:35 +02:00
if(!extractWhitespace())
return false;
//Empty struct found
2024-02-13 19:16:35 +02:00
if(input[pos] == '}')
{
pos++;
return true;
}
2024-02-13 19:16:35 +02:00
while(true)
{
2024-02-13 19:16:35 +02:00
if(!extractWhitespace())
return false;
bool overrideFlag = false;
std::string key;
2024-02-13 19:07:51 +02:00
2024-02-13 19:16:35 +02:00
if(settings.mode < JsonParsingSettings::JsonFormatMode::JSON5)
2024-02-13 19:07:51 +02:00
{
2024-02-13 19:16:35 +02:00
if(!extractString(key))
2024-02-13 19:07:51 +02:00
return false;
}
else
{
2024-02-13 19:16:35 +02:00
if(input[pos] == '\'' || input[pos] == '\"')
2024-02-13 19:07:51 +02:00
{
2024-02-13 19:16:35 +02:00
if(!extractString(key))
2024-02-13 19:07:51 +02:00
return false;
}
else
{
2024-02-13 19:16:35 +02:00
if(!extractLiteral(key))
2024-02-13 19:07:51 +02:00
return false;
}
}
2024-02-13 19:16:35 +02:00
if(key.find('#') != std::string::npos)
{
// split key string into actual key and meta-flags
std::vector<std::string> keyAndFlags;
boost::split(keyAndFlags, key, boost::is_any_of("#"));
key = keyAndFlags[0];
for(int i = 1; i < keyAndFlags.size(); i++)
{
2024-02-13 19:16:35 +02:00
if(keyAndFlags[i] == "override")
overrideFlag = true;
else
error("Encountered unknown flag #" + keyAndFlags[i], true);
}
}
2024-02-13 19:16:35 +02:00
if(node.Struct().find(key) != node.Struct().end())
error("Duplicate element encountered!", true);
2024-02-13 19:16:35 +02:00
if(!extractSeparator())
return false;
2024-02-13 19:16:35 +02:00
if(!extractElement(node.Struct()[key], '}'))
return false;
node.Struct()[key].setOverrideFlag(overrideFlag);
2024-02-13 19:16:35 +02:00
if(input[pos] == '}')
{
pos++;
return true;
}
}
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractArray(JsonNode & node)
{
2024-02-13 19:16:35 +02:00
if(currentDepth > settings.maxDepth)
2024-02-13 19:07:51 +02:00
error("Macimum allowed depth of json structure has been reached", true);
currentDepth++;
2024-02-13 22:19:24 +02:00
auto guard = vstd::makeScopeGuard([this]()
{
currentDepth--;
});
pos++;
node.setType(JsonNode::JsonType::DATA_VECTOR);
2024-02-13 19:16:35 +02:00
if(!extractWhitespace())
return false;
//Empty array found
2024-02-13 19:16:35 +02:00
if(input[pos] == ']')
{
pos++;
return true;
}
2024-02-13 19:16:35 +02:00
while(true)
{
//NOTE: currently 50% of time is this vector resizing.
//May be useful to use list during parsing and then swap() all items to vector
2024-02-13 19:16:35 +02:00
node.Vector().resize(node.Vector().size() + 1);
2024-02-13 19:16:35 +02:00
if(!extractElement(node.Vector().back(), ']'))
return false;
2024-02-13 19:16:35 +02:00
if(input[pos] == ']')
{
pos++;
return true;
}
}
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractElement(JsonNode & node, char terminator)
{
2024-02-13 19:16:35 +02:00
if(!extractValue(node))
return false;
2024-02-13 19:16:35 +02:00
if(!extractWhitespace())
return false;
bool comma = (input[pos] == ',');
2024-02-13 19:16:35 +02:00
if(comma)
{
pos++;
2024-02-13 19:16:35 +02:00
if(!extractWhitespace())
return false;
}
2024-02-13 19:16:35 +02:00
if(input[pos] == terminator)
{
2024-02-13 22:19:24 +02:00
if(comma && settings.mode < JsonParsingSettings::JsonFormatMode::JSON5)
error("Extra comma found!", true);
return true;
}
2024-02-13 19:16:35 +02:00
if(!comma)
error("Comma expected!", true);
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::extractFloat(JsonNode & node)
{
2024-02-13 19:07:51 +02:00
//TODO: JSON5 - hexacedimal support
//TODO: JSON5 - Numbers may be IEEE 754 positive infinity, negative infinity, and NaN (why?)
assert(input[pos] == '-' || (input[pos] >= '0' && input[pos] <= '9'));
2024-02-13 19:16:35 +02:00
bool negative = false;
double result = 0;
si64 integerPart = 0;
bool isFloat = false;
2024-02-13 19:16:35 +02:00
if(input[pos] == '+')
2024-02-13 19:07:51 +02:00
{
2024-02-13 19:16:35 +02:00
if(settings.mode < JsonParsingSettings::JsonFormatMode::JSON5)
2024-02-13 19:07:51 +02:00
error("Positive numbers should not have plus sign!", true);
pos++;
}
2024-02-13 19:16:35 +02:00
else if(input[pos] == '-')
{
pos++;
negative = true;
}
2024-02-13 19:16:35 +02:00
if(input[pos] < '0' || input[pos] > '9')
2024-02-13 19:07:51 +02:00
{
2024-02-13 19:16:35 +02:00
if(input[pos] != '.' && settings.mode < JsonParsingSettings::JsonFormatMode::JSON5)
2024-02-13 19:07:51 +02:00
return error("Number expected!");
}
//Extract integer part
2024-02-13 19:16:35 +02:00
while(input[pos] >= '0' && input[pos] <= '9')
{
2024-02-13 19:16:35 +02:00
integerPart = integerPart * 10 + (input[pos] - '0');
pos++;
}
result = static_cast<double>(integerPart);
2024-02-13 19:16:35 +02:00
if(input[pos] == '.')
{
//extract fractional part
isFloat = true;
pos++;
double fractMult = 0.1;
2024-02-13 19:07:51 +02:00
2024-02-13 22:19:24 +02:00
if(settings.mode < JsonParsingSettings::JsonFormatMode::JSON5 && (input[pos] < '0' || input[pos] > '9'))
return error("Decimal part expected!");
2024-02-13 19:16:35 +02:00
while(input[pos] >= '0' && input[pos] <= '9')
{
2024-02-13 19:16:35 +02:00
result = result + fractMult * (input[pos] - '0');
fractMult /= 10;
pos++;
}
}
if(input[pos] == 'e')
{
//extract exponential part
pos++;
isFloat = true;
bool powerNegative = false;
double power = 0;
if(input[pos] == '-')
{
pos++;
powerNegative = true;
}
else if(input[pos] == '+')
{
pos++;
}
2024-02-13 19:16:35 +02:00
if(input[pos] < '0' || input[pos] > '9')
return error("Exponential part expected!");
2024-02-13 19:16:35 +02:00
while(input[pos] >= '0' && input[pos] <= '9')
{
2024-02-13 19:16:35 +02:00
power = power * 10 + (input[pos] - '0');
pos++;
}
if(powerNegative)
power = -power;
result *= std::pow(10, power);
}
if(isFloat)
{
if(negative)
result = -result;
node.setType(JsonNode::JsonType::DATA_FLOAT);
node.Float() = result;
}
else
{
if(negative)
integerPart = -integerPart;
node.setType(JsonNode::JsonType::DATA_INTEGER);
node.Integer() = integerPart;
}
return true;
}
2024-02-13 19:16:35 +02:00
bool JsonParser::error(const std::string & message, bool warning)
{
2024-02-13 19:16:35 +02:00
if(settings.strict)
2024-02-13 18:08:35 +02:00
throw JsonFormatException(message);
std::ostringstream stream;
2024-02-13 19:16:35 +02:00
std::string type(warning ? " warning: " : " error: ");
if(!errors.empty())
{
// only add the line breaks between error messages so we don't have a trailing line break
stream << "\n";
}
stream << "At line " << lineCount << ", position " << pos - lineStart << type << message;
errors += stream.str();
return warning;
}
VCMI_LIB_NAMESPACE_END