/* * ERMParser.cpp, part of VCMI engine * * Authors: listed in file AUTHORS in main folder * * License: GNU General Public License v2.0 or later * Full text of license available in license.txt file, in main folder * */ #include "StdInc.h" #include "ERMParser.h" #include <boost/spirit/include/qi.hpp> #include <boost/spirit/include/phoenix_core.hpp> #include <boost/spirit/include/phoenix_operator.hpp> #include <boost/spirit/include/phoenix_fusion.hpp> #include <boost/spirit/include/phoenix_stl.hpp> #include <boost/spirit/include/phoenix_object.hpp> #include <boost/fusion/include/adapt_struct.hpp> namespace qi = boost::spirit::qi; namespace ascii = spirit::ascii; namespace phoenix = boost::phoenix; //Greenspun's Tenth Rule of Programming: //Any sufficiently complicated C or Fortran program contains an ad hoc, informally-specified, //bug-ridden, slow implementation of half of Common Lisp. //actually these macros help in dealing with boost::variant CERMPreprocessor::CERMPreprocessor(const std::string & source) : sourceStream(source), lineNo(0), version(Version::INVALID) { //check header std::string header; getline(header); if(header == "ZVSE") version = Version::ERM; else if(header == "VERM") version = Version::VERM; else logGlobal->error("File %s has wrong header", fname); } class ParseErrorException : public std::exception { }; std::string CERMPreprocessor::retrieveCommandLine() { std::string wholeCommand; //parse file bool verm = false; bool openedString = false; int openedBraces = 0; while(sourceStream.good()) { std::string line ; getline(line); //reading line size_t dash = line.find_first_of('^'); bool inTheMiddle = openedBraces || openedString; if(!inTheMiddle) { if(line.size() < 2) continue; if(line[0] != '!' ) //command lines must begin with ! -> otherwise treat as comment continue; verm = line[1] == '['; } if(openedString) { wholeCommand += "\n"; if(dash != std::string::npos) { wholeCommand += line.substr(0, dash + 1); line.erase(0,dash + 1); openedString = false; } else //no closing marker -> the whole line is further part of string { wholeCommand += line; continue; } } int i = 0; for(; i < line.length(); i++) { char c = line[i]; if(!openedString) { if(c == '[') openedBraces++; else if(c == ']') { openedBraces--; if(!openedBraces) //the last brace has been matched -> stop "parsing", everything else in the line is comment { i++; break; } } else if(c == '^') openedString = true; else if(c == ';' && !verm) //do not allow comments inside VExp for now { line.erase(i+!verm, line.length() - i - !verm); //leave ';' at the end only at ERM commands break; } // else if(c == ';') // a ';' that is in command line (and not in string) ends the command -> throw away rest // { // line.erase(i+!verm, line.length() - i - !verm); //leave ';' at the end only at ERM commands // break; // } } else if(c == '^') openedString = false; } if(verm && !openedBraces && i < line.length()) { line.erase(i, line.length() - i); } if(wholeCommand.size()) //separate lines with a space wholeCommand += " "; wholeCommand += line; if(!openedBraces && !openedString) return wholeCommand; //loop end } if(openedBraces || openedString) { logGlobal->error("Ill-formed file: %s", fname); throw ParseErrorException(); } return ""; } void CERMPreprocessor::getline(std::string &ret) { lineNo++; std::getline(sourceStream, ret); boost::trim(ret); //get rid of wspace } ERMParser::ERMParser() { ERMgrammar = std::make_shared<ERM::ERM_grammar<std::string::const_iterator>>(); } ERMParser::~ERMParser() = default; std::vector<LineInfo> ERMParser::parseFile(CERMPreprocessor & preproc) { std::vector<LineInfo> ret; try { while(1) { std::string command = preproc.retrieveCommandLine(); if(command.length() == 0) break; repairEncoding(command); LineInfo li; li.realLineNum = preproc.getCurLineNo(); li.tl = parseLine(command, li.realLineNum); ret.push_back(li); } } catch (ParseErrorException & e) { logGlobal->error("ERM Parser Error. File: '%s' Line: %d Exception: '%s'" , preproc.getCurFileName(), preproc.getCurLineNo(), e.what()); throw; } return ret; } BOOST_FUSION_ADAPT_STRUCT( ERM::TStringConstant, (std::string, str) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TMacroUsage, (std::string, macro) ) // BOOST_FUSION_ADAPT_STRUCT( // ERM::TQMacroUsage, // (std::string, qmacro) // ) BOOST_FUSION_ADAPT_STRUCT( ERM::TMacroDef, (std::string, macro) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVarExpNotMacro, (boost::optional<char>, questionMark) (std::string, varsym) (ERM::TVarExpNotMacro::Tval, val) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TArithmeticOp, (ERM::TIexp, lhs) (char, opcode) (ERM::TIexp, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVarpExp, (ERM::TVarExp, var) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVRLogic, (char, opcode) (ERM::TIexp, var) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVRArithmetic, (char, opcode) (ERM::TIexp, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TNormalBodyOption, (char, optionCode) (boost::optional<ERM::TNormalBodyOptionList>, params) ) BOOST_FUSION_ADAPT_STRUCT( ERM::Ttrigger, (ERM::TCmdName, name) (boost::optional<ERM::Tidentifier>, identifier) (boost::optional<ERM::Tcondition>, condition) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TComparison, (ERM::TIexp, lhs) (std::string, compSign) (ERM::TIexp, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TSemiCompare, (std::string, compSign) (ERM::TIexp, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TCurriedString, (ERM::TIexp, iexp) (ERM::TStringConstant, string) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVarConcatString, (ERM::TVarExp, var) (ERM::TStringConstant, string) ) BOOST_FUSION_ADAPT_STRUCT( ERM::Tcondition, (char, ctype) (ERM::Tcondition::Tcond, cond) (ERM::TconditionNode, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::Tinstruction, (ERM::TCmdName, name) (boost::optional<ERM::Tidentifier>, identifier) (boost::optional<ERM::Tcondition>, condition) (ERM::Tbody, body) ) BOOST_FUSION_ADAPT_STRUCT( ERM::Treceiver, (ERM::TCmdName, name) (boost::optional<ERM::Tidentifier>, identifier) (boost::optional<ERM::Tcondition>, condition) (boost::optional<ERM::Tbody>, body) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TPostTrigger, (ERM::TCmdName, name) (boost::optional<ERM::Tidentifier>, identifier) (boost::optional<ERM::Tcondition>, condition) ) //BOOST_FUSION_ADAPT_STRUCT( // ERM::Tcommand, // (ERM::Tcommand::Tcmd, cmd) // (std::string, comment) // ) BOOST_FUSION_ADAPT_STRUCT( ERM::Tcommand, (ERM::Tcommand::Tcmd, cmd) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVExp, (std::vector<ERM::TVModifier>, modifier) (std::vector<ERM::TVOption>, children) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TSymbol, (std::vector<ERM::TVModifier>, symModifier) (std::string, sym) ) namespace ERM { template<typename Iterator> struct ERM_grammar : qi::grammar<Iterator, TLine(), ascii::space_type> { ERM_grammar() : ERM_grammar::base_type(vline, "VERM script line") { //do not build too complicated expressions, e.g. (a >> b) | c, qi has problems with them ERMmacroUsage %= qi::lexeme[qi::lit('$') >> *(qi::char_ - '$') >> qi::lit('$')]; ERMmacroDef %= qi::lexeme[qi::lit('@') >> *(qi::char_ - '@') >> qi::lit('@')]; varExpNotMacro %= -qi::char_("?") >> (+(qi::char_("a-z") - 'u')) >> -qi::int_; //TODO: mixed var/macro expressions like in !!HE-1&407:Id$cost$; [script 13] /*qERMMacroUsage %= qi::lexeme[qi::lit("?$") >> *(qi::char_ - '$') >> qi::lit('$')];*/ varExp %= varExpNotMacro | ERMmacroUsage; iexp %= varExp | qi::int_; varp %= qi::lit("?") >> varExp; comment %= *qi::char_; commentLine %= (~qi::char_("!") >> comment | (qi::char_('!') >> (~qi::char_("?!$#[")) >> comment )); cmdName %= qi::lexeme[qi::repeat(2)[qi::char_]]; arithmeticOp %= iexp >> qi::char_ >> iexp; //??? //identifier is usually a vector of i-expressions but VR receiver performs arithmetic operations on it //identifier %= (iexp | arithmeticOp) % qi::lit('/'); identifier %= iexp % qi::lit('/'); comparison %= iexp >> (*qi::char_("<=>")) >> iexp; condition %= qi::char_("&|/") >> (comparison | qi::int_) >> -condition; trigger %= cmdName >> -identifier >> -condition > qi::lit(";"); ///// string %= qi::lexeme['^' >> *(qi::char_ - '^') >> '^']; VRLogic %= qi::char_("&|") >> iexp; VRarithmetic %= qi::char_("+*:/%-") >> iexp; semiCompare %= +qi::char_("<=>") >> iexp; curStr %= iexp >> string; varConcatString %= varExp >> qi::lit("+") >> string; bodyOptionItem %= varConcatString | curStr | string | semiCompare | ERMmacroDef | varp | iexp ; exactBodyOptionList %= (bodyOptionItem % qi::lit("/")); normalBodyOption = qi::char_("A-Z") > -(exactBodyOptionList); bodyOption %= VRLogic | VRarithmetic | normalBodyOption; body %= qi::lit(":") >> *(bodyOption) > qi::lit(";"); instruction %= cmdName >> -identifier >> -condition >> body; receiver %= cmdName >> -identifier >> -condition >> body; postTrigger %= cmdName >> -identifier >> -condition > qi::lit(";"); command %= (qi::lit("!") >> ( (qi::lit("?") >> trigger) | (qi::lit("!") >> receiver) | (qi::lit("#") >> instruction) | (qi::lit("$") >> postTrigger) ) //>> comment ); rline %= ( command | commentLine | spirit::eps ); vmod %= qi::string("`") | qi::string(",!") | qi::string(",") | qi::string("#'") | qi::string("'"); vsym %= *vmod >> qi::lexeme[+qi::char_("+*/$%&_=<>~a-zA-Z0-9-")]; qi::real_parser<double, qi::strict_real_policies<double> > strict_double; vopt %= qi::lexeme[(qi::lit("!") >> qi::char_ >> qi::lit("!"))] | qi::lexeme[strict_double] | qi::lexeme[qi::int_] | command | vexp | string | vsym; vexp %= *vmod >> qi::lit("[") >> *(vopt) >> qi::lit("]"); vline %= (( qi::lit("!") >>vexp) | rline ) > spirit::eoi; //error handling string.name("string constant"); ERMmacroUsage.name("macro usage"); /*qERMMacroUsage.name("macro usage with ?");*/ ERMmacroDef.name("macro definition"); varExpNotMacro.name("variable expression (not macro)"); varExp.name("variable expression"); iexp.name("i-expression"); comment.name("comment"); commentLine.name("comment line"); cmdName.name("name of a command"); identifier.name("identifier"); condition.name("condition"); trigger.name("trigger"); body.name("body"); instruction.name("instruction"); receiver.name("receiver"); postTrigger.name("post trigger"); command.name("command"); rline.name("ERM script line"); vsym.name("V symbol"); vopt.name("V option"); vexp.name("V expression"); vline.name("VERM line"); qi::on_error<qi::fail> ( vline , std::cout //or phoenix::ref(std::count), is there any difference? << phoenix::val("Error! Expecting ") << qi::_4 // what failed? << phoenix::val(" here: \"") << phoenix::construct<std::string>(qi::_3, qi::_2) // iterators to error-pos, end << phoenix::val("\"") ); } qi::rule<Iterator, TStringConstant(), ascii::space_type> string; qi::rule<Iterator, TMacroUsage(), ascii::space_type> ERMmacroUsage; /*qi::rule<Iterator, TQMacroUsage(), ascii::space_type> qERMMacroUsage;*/ qi::rule<Iterator, TMacroDef(), ascii::space_type> ERMmacroDef; qi::rule<Iterator, TVarExpNotMacro(), ascii::space_type> varExpNotMacro; qi::rule<Iterator, TVarExp(), ascii::space_type> varExp; qi::rule<Iterator, TIexp(), ascii::space_type> iexp; qi::rule<Iterator, TVarpExp(), ascii::space_type> varp; qi::rule<Iterator, TArithmeticOp(), ascii::space_type> arithmeticOp; qi::rule<Iterator, std::string(), ascii::space_type> comment; qi::rule<Iterator, std::string(), ascii::space_type> commentLine; qi::rule<Iterator, TCmdName(), ascii::space_type> cmdName; qi::rule<Iterator, Tidentifier(), ascii::space_type> identifier; qi::rule<Iterator, TComparison(), ascii::space_type> comparison; qi::rule<Iterator, Tcondition(), ascii::space_type> condition; qi::rule<Iterator, TVRLogic(), ascii::space_type> VRLogic; qi::rule<Iterator, TVRArithmetic(), ascii::space_type> VRarithmetic; qi::rule<Iterator, TSemiCompare(), ascii::space_type> semiCompare; qi::rule<Iterator, TCurriedString(), ascii::space_type> curStr; qi::rule<Iterator, TVarConcatString(), ascii::space_type> varConcatString; qi::rule<Iterator, TBodyOptionItem(), ascii::space_type> bodyOptionItem; qi::rule<Iterator, TNormalBodyOptionList(), ascii::space_type> exactBodyOptionList; qi::rule<Iterator, TNormalBodyOption(), ascii::space_type> normalBodyOption; qi::rule<Iterator, TBodyOption(), ascii::space_type> bodyOption; qi::rule<Iterator, Ttrigger(), ascii::space_type> trigger; qi::rule<Iterator, Tbody(), ascii::space_type> body; qi::rule<Iterator, Tinstruction(), ascii::space_type> instruction; qi::rule<Iterator, Treceiver(), ascii::space_type> receiver; qi::rule<Iterator, TPostTrigger(), ascii::space_type> postTrigger; qi::rule<Iterator, Tcommand(), ascii::space_type> command; qi::rule<Iterator, TERMline(), ascii::space_type> rline; qi::rule<Iterator, TSymbol(), ascii::space_type> vsym; qi::rule<Iterator, TVModifier(), ascii::space_type> vmod; qi::rule<Iterator, TVOption(), ascii::space_type> vopt; qi::rule<Iterator, TVExp(), ascii::space_type> vexp; qi::rule<Iterator, TLine(), ascii::space_type> vline; }; } ERM::TLine ERMParser::parseLine(const std::string & line, int realLineNo) { try { return parseLine(line); } catch(...) { //logGlobal->error("Parse error occurred in file %s (line %d): %s", fname, realLineNo, line); throw; } } ERM::TLine ERMParser::parseLine(const std::string & line) { auto beg = line.begin(); auto end = line.end(); ERM::TLine AST; bool r = qi::phrase_parse(beg, end, *ERMgrammar.get(), ascii::space, AST); if(!r || beg != end) { logGlobal->error("Parse error: cannot parse: %s", std::string(beg, end)); throw ParseErrorException(); } return AST; } void ERMParser::repairEncoding(std::string & str) const { for(int g=0; g<str.size(); ++g) if(str[g] & 0x80) str[g] = '|'; } void ERMParser::repairEncoding(char * str, int len) const { for(int g=0; g<len; ++g) if(str[g] & 0x80) str[g] = '|'; }