1
0
mirror of https://github.com/vcmi/vcmi.git synced 2025-06-21 00:19:29 +02:00

Improved json validation

- split JsonNode.cpp into JsonNode and JsonDetail files
- validation should be notably faster (at least 10% faster loading)
- support for "format" field, allows checking existance of files.
- minor fixes in schemas
- msk/msg files are now optional
This commit is contained in:
Ivan Savenko
2013-10-26 19:33:34 +00:00
parent e2c037402c
commit 9237e6d97d
17 changed files with 1313 additions and 1052 deletions

View File

@ -18,6 +18,7 @@
#include "VCMI_Lib.h" //for identifier resolution
#include "CModHandler.h"
#include "CGeneralTextHandler.h"
#include "JsonDetail.h"
using namespace JsonDetail;
@ -302,873 +303,6 @@ JsonNode & JsonNode::resolvePointer(const std::string &jsonPointer)
return ::resolvePointer(*this, jsonPointer);
}
////////////////////////////////////////////////////////////////////////////////
template<typename Iterator>
void JsonWriter::writeContainer(Iterator begin, Iterator end)
{
if (begin == end)
return;
prefix += '\t';
writeEntry(begin++);
while (begin != end)
{
out<<",\n";
writeEntry(begin++);
}
out<<"\n";
prefix.resize(prefix.size()-1);
}
void JsonWriter::writeEntry(JsonMap::const_iterator entry)
{
out << prefix;
writeString(entry->first);
out << " : ";
writeNode(entry->second);
}
void JsonWriter::writeEntry(JsonVector::const_iterator entry)
{
out << prefix;
writeNode(*entry);
}
void JsonWriter::writeString(const std::string &string)
{
static const std::string escaped = "\"\\\b\f\n\r\t";
out <<'\"';
size_t pos=0, start=0;
for (; pos<string.size(); pos++)
{
size_t escapedChar = escaped.find(string[pos]);
if (escapedChar != std::string::npos)
{
out.write(string.data()+start, pos - start);
out << '\\' << escaped[escapedChar];
start = pos;
}
}
out.write(string.data()+start, pos - start);
out <<'\"';
}
void JsonWriter::writeNode(const JsonNode &node)
{
switch(node.getType())
{
break; case JsonNode::DATA_NULL:
out << "null";
break; case JsonNode::DATA_BOOL:
if (node.Bool())
out << "true";
else
out << "false";
break; case JsonNode::DATA_FLOAT:
out << node.Float();
break; case JsonNode::DATA_STRING:
writeString(node.String());
break; case JsonNode::DATA_VECTOR:
out << "[" << "\n";
writeContainer(node.Vector().begin(), node.Vector().end());
out << prefix << "]";
break; case JsonNode::DATA_STRUCT:
out << "{" << "\n";
writeContainer(node.Struct().begin(), node.Struct().end());
out << prefix << "}";
}
if (!node.meta.empty()) // write metainf as comment
out << " //" << node.meta;
}
JsonWriter::JsonWriter(std::ostream &output, const JsonNode &node):
out(output)
{
writeNode(node);
}
std::ostream & operator<<(std::ostream &out, const JsonNode &node)
{
JsonWriter(out, node);
return out << "\n";
}
////////////////////////////////////////////////////////////////////////////////
JsonParser::JsonParser(const char * inputString, size_t stringSize):
input(inputString, stringSize),
lineCount(1),
lineStart(0),
pos(0)
{
}
JsonNode JsonParser::parse(std::string fileName)
{
JsonNode root;
if (!Unicode::isValidString(&input[0], input.size()))
error("Not a valid UTF-8 file", false);
extractValue(root);
extractWhitespace(false);
//Warn if there are any non-whitespace symbols left
if (pos < input.size())
error("Not all file was parsed!", true);
if (!errors.empty())
{
logGlobal->warnStream()<<"File " << fileName << " is not a valid JSON file!";
logGlobal->warnStream()<<errors;
}
return root;
}
bool JsonParser::extractSeparator()
{
if (!extractWhitespace())
return false;
if ( input[pos] !=':')
return error("Separator expected");
pos++;
return true;
}
bool JsonParser::extractValue(JsonNode &node)
{
if (!extractWhitespace())
return false;
switch (input[pos])
{
case '\"': return extractString(node);
case 'n' : return extractNull(node);
case 't' : return extractTrue(node);
case 'f' : return extractFalse(node);
case '{' : return extractStruct(node);
case '[' : return extractArray(node);
case '-' : return extractFloat(node);
default:
{
if (input[pos] >= '0' && input[pos] <= '9')
return extractFloat(node);
return error("Value expected!");
}
}
}
bool JsonParser::extractWhitespace(bool verbose)
{
while (true)
{
while (pos < input.size() && (ui8)input[pos] <= ' ')
{
if (input[pos] == '\n')
{
lineCount++;
lineStart = pos+1;
}
pos++;
}
if (pos >= input.size() || input[pos] != '/')
break;
pos++;
if (pos == input.size())
break;
if (input[pos] == '/')
pos++;
else
error("Comments must consist from two slashes!", true);
while (pos < input.size() && input[pos] != '\n')
pos++;
}
if (pos >= input.size() && verbose)
return error("Unexpected end of file!");
return true;
}
bool JsonParser::extractEscaping(std::string &str)
{
switch(input[pos])
{
break; case '\"': str += '\"';
break; case '\\': str += '\\';
break; case 'b': str += '\b';
break; case 'f': str += '\f';
break; case 'n': str += '\n';
break; case 'r': str += '\r';
break; case 't': str += '\t';
break; default: return error("Unknown escape sequence!", true);
};
return true;
}
bool JsonParser::extractString(std::string &str)
{
if (input[pos] != '\"')
return error("String expected!");
pos++;
size_t first = pos;
while (pos != input.size())
{
if (input[pos] == '\"') // Correct end of string
{
str.append( &input[first], pos-first);
pos++;
return true;
}
if (input[pos] == '\\') // Escaping
{
str.append( &input[first], pos-first);
pos++;
if (pos == input.size())
break;
extractEscaping(str);
first = pos + 1;
}
if (input[pos] == '\n') // end-of-line
{
str.append( &input[first], pos-first);
return error("Closing quote not found!", true);
}
if ((unsigned char)(input[pos]) < ' ') // control character
{
str.append( &input[first], pos-first);
first = pos+1;
error("Illegal character in the string!", true);
}
pos++;
}
return error("Unterminated string!");
}
bool JsonParser::extractString(JsonNode &node)
{
std::string str;
if (!extractString(str))
return false;
node.setType(JsonNode::DATA_STRING);
node.String() = str;
return true;
}
bool JsonParser::extractLiteral(const std::string &literal)
{
if (literal.compare(0, literal.size(), &input[pos], literal.size()) != 0)
{
while (pos < input.size() && ((input[pos]>'a' && input[pos]<'z')
|| (input[pos]>'A' && input[pos]<'Z')))
pos++;
return error("Unknown literal found", true);
}
pos += literal.size();
return true;
}
bool JsonParser::extractNull(JsonNode &node)
{
if (!extractLiteral("null"))
return false;
node.clear();
return true;
}
bool JsonParser::extractTrue(JsonNode &node)
{
if (!extractLiteral("true"))
return false;
node.Bool() = true;
return true;
}
bool JsonParser::extractFalse(JsonNode &node)
{
if (!extractLiteral("false"))
return false;
node.Bool() = false;
return true;
}
bool JsonParser::extractStruct(JsonNode &node)
{
node.setType(JsonNode::DATA_STRUCT);
pos++;
if (!extractWhitespace())
return false;
//Empty struct found
if (input[pos] == '}')
{
pos++;
return true;
}
while (true)
{
if (!extractWhitespace())
return false;
std::string key;
if (!extractString(key))
return false;
if (node.Struct().find(key) != node.Struct().end())
error("Dublicated element encountered!", true);
if (!extractSeparator())
return false;
if (!extractElement(node.Struct()[key], '}'))
return false;
if (input[pos] == '}')
{
pos++;
return true;
}
}
}
bool JsonParser::extractArray(JsonNode &node)
{
pos++;
node.setType(JsonNode::DATA_VECTOR);
if (!extractWhitespace())
return false;
//Empty array found
if (input[pos] == ']')
{
pos++;
return true;
}
while (true)
{
//NOTE: currently 50% of time is this vector resizing.
//May be useful to use list during parsing and then swap() all items to vector
node.Vector().resize(node.Vector().size()+1);
if (!extractElement(node.Vector().back(), ']'))
return false;
if (input[pos] == ']')
{
pos++;
return true;
}
}
}
bool JsonParser::extractElement(JsonNode &node, char terminator)
{
if (!extractValue(node))
return false;
if (!extractWhitespace())
return false;
bool comma = (input[pos] == ',');
if (comma )
{
pos++;
if (!extractWhitespace())
return false;
}
if (input[pos] == terminator)
{
//FIXME: MOD COMPATIBILITY: Too many of these right now, re-enable later
//if (comma)
//error("Extra comma found!", true);
return true;
}
if (!comma)
error("Comma expected!", true);
return true;
}
bool JsonParser::extractFloat(JsonNode &node)
{
assert(input[pos] == '-' || (input[pos] >= '0' && input[pos] <= '9'));
bool negative=false;
double result=0;
if (input[pos] == '-')
{
pos++;
negative = true;
}
if (input[pos] < '0' || input[pos] > '9')
return error("Number expected!");
//Extract integer part
while (input[pos] >= '0' && input[pos] <= '9')
{
result = result*10+(input[pos]-'0');
pos++;
}
if (input[pos] == '.')
{
//extract fractional part
pos++;
double fractMult = 0.1;
if (input[pos] < '0' || input[pos] > '9')
return error("Decimal part expected!");
while (input[pos] >= '0' && input[pos] <= '9')
{
result = result + fractMult*(input[pos]-'0');
fractMult /= 10;
pos++;
}
}
//TODO: exponential part
if (negative)
result = -result;
node.setType(JsonNode::DATA_FLOAT);
node.Float() = result;
return true;
}
bool JsonParser::error(const std::string &message, bool warning)
{
std::ostringstream stream;
std::string type(warning?" warning: ":" error: ");
stream << "At line " << lineCount << ", position "<<pos-lineStart
<< type << message <<"\n";
errors += stream.str();
return warning;
}
static const std::map<std::string, JsonNode::JsonType> stringToType =
boost::assign::map_list_of
("null", JsonNode::DATA_NULL) ("boolean", JsonNode::DATA_BOOL)
("number", JsonNode::DATA_FLOAT) ("string", JsonNode::DATA_STRING)
("array", JsonNode::DATA_VECTOR) ("object", JsonNode::DATA_STRUCT);
std::string JsonValidator::validateEnum(const JsonNode &node, const JsonVector &enumeration)
{
for(auto & enumEntry : enumeration)
{
if (node == enumEntry)
return "";
}
return fail("Key must have one of predefined values");
}
std::string JsonValidator::validatesSchemaList(const JsonNode &node, const JsonNode &schemas, std::string errorMsg, std::function<bool(size_t)> isValid)
{
if (!schemas.isNull())
{
std::string errors = "<tested schemas>\n";
size_t result = 0;
for(auto & schema : schemas.Vector())
{
std::string error = validateNode(node, schema);
if (error.empty())
{
result++;
}
else
{
errors += error;
errors += "<end of schema>\n";
}
}
if (isValid(result))
{
return "";
}
return fail(errorMsg) + errors;
}
return "";
}
std::string JsonValidator::validateNodeType(const JsonNode &node, const JsonNode &schema)
{
std::string errors;
// data must be valid against all schemas in the list
errors += validatesSchemaList(node, schema["allOf"], "Failed to pass all schemas", [&](size_t count)
{
return count == schema["allOf"].Vector().size();
});
// data must be valid against any non-zero number of schemas in the list
errors += validatesSchemaList(node, schema["anyOf"], "Failed to pass any schema", [&](size_t count)
{
return count > 0;
});
// data must be valid against one and only one schema
errors += validatesSchemaList(node, schema["oneOf"], "Failed to pass one and only one schema", [&](size_t count)
{
return count == 1;
});
// data must NOT be valid against schema
if (!schema["not"].isNull())
{
if (validateNode(node, schema["not"]).empty())
errors += fail("Successful validation against negative check");
}
return errors;
}
// Basic checks common for any nodes
std::string JsonValidator::validateNode(const JsonNode &node, const JsonNode &schema)
{
std::string errors;
assert(!schema.isNull()); // can this error be triggered?
if (node.isNull())
return ""; // node not present. consider to be "valid"
if (!schema["$ref"].isNull())
{
std::string URI = schema["$ref"].String();
//node must be validated using schema pointed by this reference and not by data here
//Local reference. Turn it into more easy to handle remote ref
if (boost::algorithm::starts_with(URI, "#"))
URI = usedSchemas.back() + URI;
return validateRoot(node, URI);
}
// basic schema check
auto & typeNode = schema["type"];
if ( !typeNode.isNull())
{
JsonNode::JsonType type = stringToType.find(typeNode.String())->second;
if(type != node.getType())
return errors + fail("Type mismatch!"); // different type. Any other checks are useless
}
errors += validateNodeType(node, schema);
// enumeration - data must be equeal to one of items in list
if (!schema["enum"].isNull())
errors += validateEnum(node, schema["enum"].Vector());
// try to run any type-specific checks
if (node.getType() == JsonNode::DATA_VECTOR) errors += validateVector(node, schema);
if (node.getType() == JsonNode::DATA_STRUCT) errors += validateStruct(node, schema);
if (node.getType() == JsonNode::DATA_STRING) errors += validateString(node, schema);
if (node.getType() == JsonNode::DATA_FLOAT) errors += validateNumber(node, schema);
return errors;
}
std::string JsonValidator::validateVectorItem(const JsonVector items, const JsonNode & schema, const JsonNode & additional, size_t index)
{
currentPath.push_back(JsonNode());
currentPath.back().Float() = index;
auto onExit = vstd::makeScopeGuard([&]
{
currentPath.pop_back();
});
if (!schema.isNull())
{
// case 1: schema is vector. Validate items agaist corresponding items in vector
if (schema.getType() == JsonNode::DATA_VECTOR)
{
if (schema.Vector().size() > index)
return validateNode(items[index], schema.Vector()[index]);
}
else // case 2: schema has to be struct. Apply it to all items, completely ignore additionalItems
{
return validateNode(items[index], schema);
}
}
// othervice check against schema in additional items field
if (additional.getType() == JsonNode::DATA_STRUCT)
return validateNode(items[index], additional);
// or, additionalItems field can be bool which indicates if such items are allowed
if (!additional.isNull() && additional.Bool() == false) // present and set to false - error
return fail("Unknown entry found");
// by default - additional items are allowed
return "";
}
//Checks "items" entry from schema (type-specific check for Vector)
std::string JsonValidator::validateVector(const JsonNode &node, const JsonNode &schema)
{
std::string errors;
auto & vector = node.Vector();
{
auto & items = schema["items"];
auto & additional = schema["additionalItems"];
for (size_t i=0; i<vector.size(); i++)
errors += validateVectorItem(vector, items, additional, i);
}
if (vstd::contains(schema.Struct(), "maxItems") && vector.size() > schema["maxItems"].Float())
errors += fail("Too many items in the list!");
if (vstd::contains(schema.Struct(), "minItems") && vector.size() < schema["minItems"].Float())
errors += fail("Too few items in the list");
if (schema["uniqueItems"].Bool())
{
for (auto itA = vector.begin(); itA != vector.end(); itA++)
{
auto itB = itA;
while (++itB != vector.end())
{
if (*itA == *itB)
errors += fail("List must consist from unique items");
}
}
}
return errors;
}
std::string JsonValidator::validateStructItem(const JsonNode &node, const JsonNode & schema, const JsonNode & additional, std::string nodeName)
{
currentPath.push_back(JsonNode());
currentPath.back().String() = nodeName;
auto onExit = vstd::makeScopeGuard([&]
{
currentPath.pop_back();
});
// there is schema specifically for this item
if (!schema[nodeName].isNull())
return validateNode(node, schema[nodeName]);
// try generic additionalItems schema
if (additional.getType() == JsonNode::DATA_STRUCT)
return validateNode(node, additional);
// or, additionalItems field can be bool which indicates if such items are allowed
if (!additional.isNull() && additional.Bool() == false) // present and set to false - error
return fail("Unknown entry found: " + nodeName);
// by default - additional items are allowed
return "";
}
//Checks "properties" entry from schema (type-specific check for Struct)
std::string JsonValidator::validateStruct(const JsonNode &node, const JsonNode &schema)
{
std::string errors;
auto & map = node.Struct();
{
auto & properties = schema["properties"];
auto & additional = schema["additionalProperties"];
for(auto & entry : map)
errors += validateStructItem(entry.second, properties, additional, entry.first);
}
for(auto & required : schema["required"].Vector())
{
if (node[required.String()].isNull())
errors += fail("Required entry " + required.String() + " is missing");
}
//Copy-paste from vector code. yay!
if (vstd::contains(schema.Struct(), "maxProperties") && map.size() > schema["maxProperties"].Float())
errors += fail("Too many items in the list!");
if (vstd::contains(schema.Struct(), "minItems") && map.size() < schema["minItems"].Float())
errors += fail("Too few items in the list");
if (schema["uniqueItems"].Bool())
{
for (auto itA = map.begin(); itA != map.end(); itA++)
{
auto itB = itA;
while (++itB != map.end())
{
if (itA->second == itB->second)
errors += fail("List must consist from unique items");
}
}
}
// dependencies. Format is object/struct where key is the name of key in data
// and value is either:
// a) array of fields that must be present
// b) struct with schema against which data should be valid
// These checks are triggered only if key is present
for(auto & deps : schema["dependencies"].Struct())
{
if (vstd::contains(map, deps.first))
{
if (deps.second.getType() == JsonNode::DATA_VECTOR)
{
JsonVector depList = deps.second.Vector();
for(auto & depEntry : depList)
{
if (!vstd::contains(map, depEntry.String()))
errors += fail("Property " + depEntry.String() + " required for " + deps.first + " is missing");
}
}
else
{
if (!validateNode(node, deps.second).empty())
errors += fail("Requirements for " + deps.first + " are not fulfilled");
}
}
}
// TODO: missing fields from draft v4
// patternProperties
return errors;
}
std::string JsonValidator::validateString(const JsonNode &node, const JsonNode &schema)
{
std::string errors;
auto & string = node.String();
if (vstd::contains(schema.Struct(), "maxLength") && string.size() > schema["maxLength"].Float())
errors += fail("String too long");
if (vstd::contains(schema.Struct(), "minLength") && string.size() < schema["minLength"].Float())
errors += fail("String too short");
// TODO: missing fields from draft v4
// pattern
return errors;
}
std::string JsonValidator::validateNumber(const JsonNode &node, const JsonNode &schema)
{
std::string errors;
auto & value = node.Float();
if (vstd::contains(schema.Struct(), "maximum"))
{
if (schema["exclusiveMaximum"].Bool())
{
if (value >= schema["maximum"].Float())
errors += fail("Value is too large");
}
else
{
if (value > schema["maximum"].Float())
errors += fail("Value is too large");
}
}
if (vstd::contains(schema.Struct(), "minimum"))
{
if (schema["exclusiveMinimum"].Bool())
{
if (value <= schema["minimum"].Float())
errors += fail("Value is too small");
}
else
{
if (value < schema["minimum"].Float())
errors += fail("Value is too small");
}
}
if (vstd::contains(schema.Struct(), "multipleOf"))
{
double result = value / schema["multipleOf"].Float();
if (floor(result) != result)
errors += ("Value is not divisible");
}
return errors;
}
//basic schema validation (like checking $schema entry).
std::string JsonValidator::validateRoot(const JsonNode &node, std::string schemaName)
{
const JsonNode & schema = JsonUtils::getSchema(schemaName);
usedSchemas.push_back(schemaName.substr(0, schemaName.find('#')));
auto onExit = vstd::makeScopeGuard([&]
{
usedSchemas.pop_back();
});
if (!schema.isNull())
return validateNode(node, schema);
else
return fail("Schema not found!");
}
std::string JsonValidator::fail(const std::string &message)
{
std::string errors;
errors += "At ";
if (!currentPath.empty())
{
for(const JsonNode &path : currentPath)
{
errors += "/";
if (path.getType() == JsonNode::DATA_STRING)
errors += path.String();
else
errors += boost::lexical_cast<std::string>(static_cast<unsigned>(path.Float()));
}
}
else
errors += "<root>";
errors += "\n\t Error: " + message + "\n";
return errors;
}
bool JsonValidator::validate(const JsonNode &root, std::string schemaName, std::string name)
{
std::string errors = validateRoot(root, schemaName);
if (!errors.empty())
{
logGlobal->warnStream() << "Data in " << name << " is invalid!";
logGlobal->warnStream() << errors;
}
return errors.empty();
}
///JsonUtils
void JsonUtils::parseTypedBonusShort(const JsonVector& source, Bonus *dest)
@ -1499,8 +633,13 @@ void JsonUtils::maximize(JsonNode & node, std::string schemaName)
bool JsonUtils::validate(const JsonNode &node, std::string schemaName, std::string dataName)
{
JsonValidator validator;
return validator.validate(node, schemaName, dataName);
std::string log = Validation::check(schemaName, node);
if (!log.empty())
{
logGlobal->errorStream() << "Data in " << dataName << " is invalid!";
logGlobal->errorStream() << log;
}
return log.empty();
}
const JsonNode & getSchemaByName(std::string name)