#include "StdInc.h" #include "ERMParser.h" /* * ERMParser.cpp, part of VCMI engine * * Authors: listed in file AUTHORS in main folder * * License: GNU General Public License v2.0 or later * Full text of license available in license.txt file, in main folder * */ //Greenspun's Tenth Rule of Programming: //Any sufficiently complicated C or Fortran program contains an ad hoc, informally-specified, //bug-ridden, slow implementation of half of Common Lisp. //actually these macros help in dealing with boost::variant CERMPreprocessor::CERMPreprocessor(const std::string &Fname) : fname(Fname), file(Fname.c_str()), lineNo(0), version(INVALID) { if(!file.is_open()) { logGlobal->errorStream() << "File " << Fname << " not found or unable to open"; return; } //check header std::string header; getline(header); if(header == "ZVSE") version = ERM; else if(header == "VERM") version = VERM; else { logGlobal->errorStream() << "File " << fname << " has wrong header"; return; } } class ParseErrorException : public std::exception { }; std::string CERMPreprocessor::retreiveCommandLine() { std::string wholeCommand; //parse file bool verm = false; bool openedString = false; int openedBraces = 0; while(file.good()) { std::string line ; getline(line); //reading line size_t dash = line.find_first_of('^'); bool inTheMiddle = openedBraces || openedString; if(!inTheMiddle) { if(line.size() < 2) continue; if(line[0] != '!' ) //command lines must begin with ! -> otherwise treat as comment continue; verm = line[1] == '['; } if(openedString) { wholeCommand += "\\n"; if(dash != std::string::npos) { wholeCommand += line.substr(0, dash); line.erase(0,dash); } else //no closing marker -> the whole line is further part of string { wholeCommand += line; continue; } } int i = 0; for(; i < line.length(); i++) { char c = line[i]; if(!openedString) { if(c == '[') openedBraces++; else if(c == ']') { openedBraces--; if(!openedBraces) //the last brace has been matched -> stop "parsing", everything else in the line is comment { i++; break; } } else if(c == '^') openedString = true; else if(c == ';') // a ';' that is in command line (and not in string) ends the command -> throw away rest { line.erase(i+!verm, line.length() - i - !verm); //leave ';' at the end only at ERM commands break; } } else if(c == '^') openedString = false; } if(verm && !openedBraces && i < line.length()) { line.erase(i, line.length() - i); } if(wholeCommand.size()) //separate lines with a space wholeCommand += " "; wholeCommand += line; if(!openedBraces && !openedString) return wholeCommand; //loop end } if(openedBraces || openedString) logGlobal->errorStream() << "Ill-formed file: " << fname; return ""; } void CERMPreprocessor::getline(std::string &ret) { lineNo++; std::getline(file, ret); boost::trim(ret); //get rid of wspace } ERMParser::ERMParser(std::string file) :srcFile(file) {} std::vector ERMParser::parseFile() { CERMPreprocessor preproc(srcFile); std::vector ret; try { while(1) { std::string command = preproc.retreiveCommandLine(); if(command.length() == 0) break; repairEncoding(command); LineInfo li; li.realLineNum = preproc.getCurLineNo(); li.tl = parseLine(command, li.realLineNum); ret.push_back(li); } } catch (ParseErrorException & e) { logGlobal->errorStream() << "stopped parsing file"; } return ret; } BOOST_FUSION_ADAPT_STRUCT( ERM::TStringConstant, (std::string, str) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TMacroUsage, (std::string, macro) ) // BOOST_FUSION_ADAPT_STRUCT( // ERM::TQMacroUsage, // (std::string, qmacro) // ) BOOST_FUSION_ADAPT_STRUCT( ERM::TMacroDef, (std::string, macro) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVarExpNotMacro, (boost::optional, questionMark) (std::string, varsym) (ERM::TVarExpNotMacro::Tval, val) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TArithmeticOp, (ERM::TIexp, lhs) (char, opcode) (ERM::TIexp, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVarpExp, (ERM::TVarExp, var) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVRLogic, (char, opcode) (ERM::TIexp, var) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVRArithmetic, (char, opcode) (ERM::TIexp, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TNormalBodyOption, (char, optionCode) (ERM::TNormalBodyOptionList, params) ) BOOST_FUSION_ADAPT_STRUCT( ERM::Ttrigger, (ERM::TCmdName, name) (boost::optional, identifier) (boost::optional, condition) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TComparison, (ERM::TIexp, lhs) (std::string, compSign) (ERM::TIexp, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TSemiCompare, (std::string, compSign) (ERM::TIexp, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TCurriedString, (ERM::TIexp, iexp) (ERM::TStringConstant, string) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVarConcatString, (ERM::TVarExp, var) (ERM::TStringConstant, string) ) BOOST_FUSION_ADAPT_STRUCT( ERM::Tcondition, (char, ctype) (ERM::Tcondition::Tcond, cond) (ERM::TconditionNode, rhs) ) BOOST_FUSION_ADAPT_STRUCT( ERM::Tinstruction, (ERM::TCmdName, name) (boost::optional, identifier) (boost::optional, condition) (ERM::Tbody, body) ) BOOST_FUSION_ADAPT_STRUCT( ERM::Treceiver, (ERM::TCmdName, name) (boost::optional, identifier) (boost::optional, condition) (boost::optional, body) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TPostTrigger, (ERM::TCmdName, name) (boost::optional, identifier) (boost::optional, condition) ) BOOST_FUSION_ADAPT_STRUCT( ERM::Tcommand, (ERM::Tcommand::Tcmd, cmd) (std::string, comment) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TVExp, (std::vector, modifier) (std::vector, children) ) BOOST_FUSION_ADAPT_STRUCT( ERM::TSymbol, (std::vector, symModifier) (std::string, sym) ) namespace ERM { template struct ERM_grammar : qi::grammar { ERM_grammar() : ERM_grammar::base_type(vline, "VERM script line") { //do not build too complicated expressions, e.g. (a >> b) | c, qi has problems with them ERMmacroUsage %= qi::lexeme[qi::lit('$') >> *(qi::char_ - '$') >> qi::lit('$')]; ERMmacroDef %= qi::lexeme[qi::lit('@') >> *(qi::char_ - '@') >> qi::lit('@')]; varExpNotMacro %= -qi::char_("?") >> (+(qi::char_("a-z") - 'u')) >> -qi::int_; //TODO: mixed var/macro expressions like in !!HE-1&407:Id$cost$; [script 13] /*qERMMacroUsage %= qi::lexeme[qi::lit("?$") >> *(qi::char_ - '$') >> qi::lit('$')];*/ varExp %= varExpNotMacro | ERMmacroUsage; iexp %= varExp | qi::int_; varp %= qi::lit("?") >> varExp; comment %= *qi::char_; commentLine %= (~qi::char_("!") >> comment | (qi::char_('!') >> (~qi::char_("?!$#[")) >> comment )); cmdName %= qi::lexeme[qi::repeat(2)[qi::char_]]; arithmeticOp %= iexp >> qi::char_ >> iexp; //identifier is usually a vector of i-expressions but VR receiver performs arithmetic operations on it identifier %= (iexp | arithmeticOp) % qi::lit('/'); comparison %= iexp >> (*qi::char_("<=>")) >> iexp; condition %= qi::char_("&|X/") >> (comparison | qi::int_) >> -condition; trigger %= cmdName >> -identifier >> -condition > qi::lit(";"); ///// string %= qi::lexeme['^' >> *(qi::char_ - '^') >> '^']; VRLogic %= qi::char_("&|X") >> iexp; VRarithmetic %= qi::char_("+*:/%-") >> iexp; semiCompare %= +qi::char_("<=>") >> iexp; curStr %= iexp >> string; varConcatString %= varExp >> qi::lit("+") >> string; bodyOptionItem %= varConcatString | curStr | string | semiCompare | ERMmacroDef | varp | iexp | qi::eps; exactBodyOptionList %= (bodyOptionItem % qi::lit("/")); normalBodyOption = qi::char_("A-Z+") > exactBodyOptionList; bodyOption %= VRLogic | VRarithmetic | normalBodyOption; body %= qi::lit(":") >> +(bodyOption) > qi::lit(";"); instruction %= cmdName >> -identifier >> -condition >> body; receiver %= cmdName >> -identifier >> -condition >> -body; //receiver without body exists... change needed postTrigger %= cmdName >> -identifier >> -condition > qi::lit(";"); command %= (qi::lit("!") >> ( (qi::lit("?") >> trigger) | (qi::lit("!") >> receiver) | (qi::lit("#") >> instruction) | (qi::lit("$") >> postTrigger) ) >> comment ); rline %= ( command | commentLine | spirit::eps ); vmod %= qi::string("`") | qi::string(",!") | qi::string(",") | qi::string("#'") | qi::string("'"); vsym %= *vmod >> qi::lexeme[+qi::char_("+*/$%&_=<>~a-zA-Z0-9-")]; qi::real_parser > strict_double; vopt %= qi::lexeme[(qi::lit("!") >> qi::char_ >> qi::lit("!"))] | qi::lexeme[strict_double] | qi::lexeme[qi::int_] | command | vexp | string | vsym; vexp %= *vmod >> qi::lit("[") >> *(vopt) >> qi::lit("]"); vline %= (( qi::lit("!") >>vexp) | rline ) > spirit::eoi; //error handling string.name("string constant"); ERMmacroUsage.name("macro usage"); /*qERMMacroUsage.name("macro usage with ?");*/ ERMmacroDef.name("macro definition"); varExpNotMacro.name("variable expression (not macro)"); varExp.name("variable expression"); iexp.name("i-expression"); comment.name("comment"); commentLine.name("comment line"); cmdName.name("name of a command"); identifier.name("identifier"); condition.name("condition"); trigger.name("trigger"); body.name("body"); instruction.name("instruction"); receiver.name("receiver"); postTrigger.name("post trigger"); command.name("command"); rline.name("ERM script line"); vsym.name("V symbol"); vopt.name("V option"); vexp.name("V expression"); vline.name("VERM line"); qi::on_error ( vline , std::cout //or phoenix::ref(std::count), is there any difference? << phoenix::val("Error! Expecting ") << qi::_4 // what failed? << phoenix::val(" here: \"") << phoenix::construct(qi::_3, qi::_2) // iterators to error-pos, end << phoenix::val("\"") ); } qi::rule string; qi::rule ERMmacroUsage; /*qi::rule qERMMacroUsage;*/ qi::rule ERMmacroDef; qi::rule varExpNotMacro; qi::rule varExp; qi::rule iexp; qi::rule varp; qi::rule arithmeticOp; qi::rule comment; qi::rule commentLine; qi::rule cmdName; qi::rule identifier; qi::rule comparison; qi::rule condition; qi::rule VRLogic; qi::rule VRarithmetic; qi::rule semiCompare; qi::rule curStr; qi::rule varConcatString; qi::rule bodyOptionItem; qi::rule exactBodyOptionList; qi::rule normalBodyOption; qi::rule bodyOption; qi::rule trigger; qi::rule body; qi::rule instruction; qi::rule receiver; qi::rule postTrigger; qi::rule command; qi::rule rline; qi::rule vsym; qi::rule vmod; qi::rule vopt; qi::rule vexp; qi::rule vline; }; }; ERM::TLine ERMParser::parseLine( const std::string & line, int realLineNo ) { try { return parseLine(line); } catch(...) { logGlobal->errorStream() << "Parse error occurred in file " << srcFile << " (line " << realLineNo << ") :" << line; throw; } } ERM::TLine ERMParser::parseLine(const std::string & line) { std::string::const_iterator beg = line.begin(), end = line.end(); ERM::ERM_grammar ERMgrammar; ERM::TLine AST; bool r = qi::phrase_parse(beg, end, ERMgrammar, ascii::space, AST); if(!r || beg != end) { logGlobal->errorStream() << "Parse error: cannot parse: " << std::string(beg, end); throw ParseErrorException(); } return AST; } ERMParser::ELineType ERMParser::classifyLine( const std::string & line, bool inString ) const { ERMParser::ELineType ret; if(line[0] == '!') { if(countHatsBeforeSemicolon(line) % 2 == 1) ret = ERMParser::UNFINISHED; else ret = ERMParser::COMMAND_FULL; } else { if(inString) { if(countHatsBeforeSemicolon(line) % 2 == 1) ret = ERMParser::END_OF; else ret = ERMParser::UNFINISHED; } else { ret = ERMParser::COMMENT; } } return ret; } int ERMParser::countHatsBeforeSemicolon( const std::string & line ) const { //CHECK: omit macros? or anything else? int numOfHats = 0; //num of '^' before ';' //check for unmatched ^ for (char c : line) { if(c == ';') break; if(c == '^') ++numOfHats; } return numOfHats; } void ERMParser::repairEncoding( std::string & str ) const { for(int g=0; g