From 6a3320f1b7dd5c78d2f910e49ac35a43b20e9c82 Mon Sep 17 00:00:00 2001 From: Jonathan Poelen Date: Sat, 31 Aug 2019 17:38:43 +0200 Subject: [PATCH] Fix several errors with c++ - add attribute ([[...]]) - add c++20 keywords - add hexadecimal floatting point - add digits separator - fix unamed class/union/struct/enum (fix #272) - fix function name detection --- lexers/c/cpp.go | 28 ++++---- lexers/testdata/cpp.actual | 25 ++++++- lexers/testdata/cpp.expected | 123 +++++++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 14 deletions(-) diff --git a/lexers/c/cpp.go b/lexers/c/cpp.go index cb53ee5..bb83492 100644 --- a/lexers/c/cpp.go +++ b/lexers/c/cpp.go @@ -16,26 +16,27 @@ var CPP = internal.Register(MustNewLexer( }, Rules{ "statements": { - {Words(``, `\b`, `catch`, `const_cast`, `delete`, `dynamic_cast`, `explicit`, `export`, `friend`, `mutable`, `namespace`, `new`, `operator`, `private`, `protected`, `public`, `reinterpret_cast`, `restrict`, `static_cast`, `template`, `this`, `throw`, `throws`, `try`, `typeid`, `typename`, `using`, `virtual`, `constexpr`, `nullptr`, `decltype`, `thread_local`, `alignas`, `alignof`, `static_assert`, `noexcept`, `override`, `final`), Keyword, nil}, - {`char(16_t|32_t)\b`, KeywordType, nil}, - {`(class)\b`, ByGroups(Keyword, Text), Push("classname")}, + {Words(``, `\b`, `catch`, `const_cast`, `delete`, `dynamic_cast`, `explicit`, `export`, `friend`, `mutable`, `namespace`, `new`, `operator`, `private`, `protected`, `public`, `reinterpret_cast`, `restrict`, `static_cast`, `template`, `this`, `throw`, `throws`, `try`, `typeid`, `typename`, `using`, `virtual`, `constexpr`, `nullptr`, `decltype`, `thread_local`, `alignas`, `alignof`, `static_assert`, `noexcept`, `override`, `final`, `concept`, `requires`, `consteval`, `co_await`, `co_return`, `co_yield`), Keyword, nil}, + {`(enum)\b(\s+)(class)\b(\s*)`, ByGroups(Keyword, Text, Keyword, Text), Push("classname")}, + {`(class|struct|enum|union)\b(\s*)`, ByGroups(Keyword, Text), Push("classname")}, + {`\[\[.+\]\]`, NameAttribute, nil}, {`(R)(")([^\\()\s]{,16})(\()((?:.|\n)*?)(\)\3)(")`, ByGroups(LiteralStringAffix, LiteralString, LiteralStringDelimiter, LiteralStringDelimiter, LiteralString, LiteralStringDelimiter, LiteralString), nil}, {`(u8|u|U)(")`, ByGroups(LiteralStringAffix, LiteralString), Push("string")}, {`(L?)(")`, ByGroups(LiteralStringAffix, LiteralString), Push("string")}, {`(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')`, ByGroups(LiteralStringAffix, LiteralStringChar, LiteralStringChar, LiteralStringChar), nil}, - {`(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*`, LiteralNumberFloat, nil}, - {`(\d+\.\d*|\.\d+|\d+[fF])[fF]?`, LiteralNumberFloat, nil}, - {`0x[0-9a-fA-F]+[LlUu]*`, LiteralNumberHex, nil}, - {`0[0-7]+[LlUu]*`, LiteralNumberOct, nil}, - {`\d+[LlUu]*`, LiteralNumberInteger, nil}, + {`(\.([0-9](?:'?[0-9]+)*)([eE][+-]?([0-9]('?[0-9]+)*))?|([0-9]('?[0-9]+)*)(([eE][+-]?([0-9]('?[0-9]+)*))|\.([0-9]('?[0-9]+)*)?([eE][+-]?([0-9]('?[0-9]+)*))?)|0[xX](\.([0-9A-Fa-f]('?[0-9A-Fa-f]+)*)([pP][-+]?([0-9]('?[0-9]+)*))?|([0-9A-Fa-f]('?[0-9A-Fa-f]+)*)(([pP][-+]?([0-9]('?[0-9]+)*))|\.([0-9A-Fa-f]('?[0-9A-Fa-f]+)*)?([pP][-+]?([0-9]('?[0-9]+)*))?)))[fFLlUu]*`, LiteralNumberFloat, nil}, + {`0[xX]([0-9A-Fa-f]('?[0-9A-Fa-f]+)*)[LlUu]*`, LiteralNumberHex, nil}, + {`0('?[0-7]+)+[LlUu]*`, LiteralNumberOct, nil}, + {`0[Bb][01]('?[01]+)*[LlUu]*`, LiteralNumberBin, nil}, + {`[0-9]('?[0-9]+)*[LlUu]*`, LiteralNumberInteger, nil}, {`\*/`, Error, nil}, {`[~!%^&*+=|?:<>/-]`, Operator, nil}, {`[()\[\],.]`, Punctuation, nil}, {Words(``, `\b`, `asm`, `auto`, `break`, `case`, `const`, `continue`, `default`, `do`, `else`, `enum`, `extern`, `for`, `goto`, `if`, `register`, `restricted`, `return`, `sizeof`, `static`, `struct`, `switch`, `typedef`, `union`, `volatile`, `while`), Keyword, nil}, - {`(bool|int|long|float|short|double|char|unsigned|signed|void)\b`, KeywordType, nil}, + {`(bool|int|long|float|short|double|char((8|16|32)_t)?|wchar_t|unsigned|signed|void|u?int(_fast|_least|)(8|16|32|64)_t)\b`, KeywordType, nil}, {Words(``, `\b`, `inline`, `_inline`, `__inline`, `naked`, `restrict`, `thread`, `typename`), KeywordReserved, nil}, {`(__m(128i|128d|128|64))\b`, KeywordReserved, nil}, - {Words(`__`, `\b`, `asm`, `int8`, `based`, `except`, `int16`, `stdcall`, `cdecl`, `fastcall`, `int32`, `declspec`, `finally`, `int64`, `try`, `leave`, `wchar_t`, `w64`, `unaligned`, `raise`, `noop`, `identifier`, `forceinline`, `assume`), KeywordReserved, nil}, + {Words(`__`, `\b`, `asm`, `int8`, `based`, `except`, `int16`, `stdcall`, `cdecl`, `fastcall`, `int32`, `declspec`, `finally`, `int64`, `try`, `leave`, `w64`, `unaligned`, `raise`, `noop`, `identifier`, `forceinline`, `assume`), KeywordReserved, nil}, {`(true|false|NULL)\b`, NameBuiltin, nil}, {`([a-zA-Z_]\w*)(\s*)(:)(?!:)`, ByGroups(NameLabel, Text, Punctuation), nil}, {`[a-zA-Z_]\w*`, Name, nil}, @@ -49,8 +50,9 @@ var CPP = internal.Register(MustNewLexer( {`__(offload|blockingoffload|outer)\b`, KeywordPseudo, nil}, }, "classname": { + {`(\[\[.+\]\])(\s*)`, ByGroups(NameAttribute, Text), nil}, {`[a-zA-Z_]\w*`, NameClass, Pop(1)}, - {`\s*(?=>)`, Text, Pop(1)}, + {`\s*(?=[>{])`, Text, Pop(1)}, }, "whitespace": { {`^#if\s+0`, CommentPreproc, Push("if0")}, @@ -67,8 +69,8 @@ var CPP = internal.Register(MustNewLexer( "statement": { Include("whitespace"), Include("statements"), - {`[{}]`, Punctuation, nil}, - {`;`, Punctuation, Pop(1)}, + {`[{]`, Punctuation, Push("root")}, + {`[;}]`, Punctuation, Pop(1)}, }, "function": { Include("whitespace"), diff --git a/lexers/testdata/cpp.actual b/lexers/testdata/cpp.actual index 33c14ce..2e891b0 100644 --- a/lexers/testdata/cpp.actual +++ b/lexers/testdata/cpp.actual @@ -1,3 +1,26 @@ +#include "a" +#include + +[[nodiscard]] void foo() noexcept; +void foo(); + +constexpr class {} a; + +enum class E { A, B }; +enum class [[nodiscard]] E { A, B }; + +enum class { + a, b, + c, +} e; + +enum E { A, B }; + +class A { + void foo(); + void bar(); +}; + int main() { - return 0; + return 0 + 1'3 + 1.4; } diff --git a/lexers/testdata/cpp.expected b/lexers/testdata/cpp.expected index 8498a49..891a22d 100644 --- a/lexers/testdata/cpp.expected +++ b/lexers/testdata/cpp.expected @@ -1,4 +1,119 @@ [ + {"type":"CommentPreproc","value":"#include"}, + {"type":"Text","value":" "}, + {"type":"CommentPreprocFile","value":"\"a\""}, + {"type":"CommentPreproc","value":"\n#include"}, + {"type":"Text","value":" "}, + {"type":"CommentPreprocFile","value":"\u003cb\u003e"}, + {"type":"CommentPreproc","value":"\n"}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"[[nodiscard]]"}, + {"type":"Text","value":" "}, + {"type":"KeywordType","value":"void"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"foo"}, + {"type":"Punctuation","value":"()"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"noexcept"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n"}, + {"type":"KeywordType","value":"void"}, + {"type":"Text","value":" "}, + {"type":"NameFunction","value":"foo"}, + {"type":"Punctuation","value":"();"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"constexpr"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"class"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"{}"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"a"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"enum"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"class"}, + {"type":"Text","value":" "}, + {"type":"NameClass","value":"E"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"{"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"A"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"Name","value":"B"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"};"}, + {"type":"Text","value":"\n"}, + {"type":"Keyword","value":"enum"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"class"}, + {"type":"Text","value":" "}, + {"type":"NameAttribute","value":"[[nodiscard]]"}, + {"type":"Text","value":" "}, + {"type":"NameClass","value":"E"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"{"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"A"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"Name","value":"B"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"};"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"enum"}, + {"type":"Text","value":" "}, + {"type":"Keyword","value":"class"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"{"}, + {"type":"Text","value":"\n "}, + {"type":"Name","value":"a"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"Name","value":"b"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":"\n "}, + {"type":"Name","value":"c"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":"}"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"e"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"enum"}, + {"type":"Text","value":" "}, + {"type":"NameClass","value":"E"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"{"}, + {"type":"Text","value":" "}, + {"type":"Name","value":"A"}, + {"type":"Punctuation","value":","}, + {"type":"Text","value":" "}, + {"type":"Name","value":"B"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"};"}, + {"type":"Text","value":"\n\n"}, + {"type":"Keyword","value":"class"}, + {"type":"Text","value":" "}, + {"type":"NameClass","value":"A"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"{"}, + {"type":"Text","value":"\n "}, + {"type":"KeywordType","value":"void"}, + {"type":"Text","value":" "}, + {"type":"NameFunction","value":"foo"}, + {"type":"Punctuation","value":"();"}, + {"type":"Text","value":"\n "}, + {"type":"KeywordType","value":"void"}, + {"type":"Text","value":" "}, + {"type":"NameFunction","value":"bar"}, + {"type":"Punctuation","value":"();"}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":"};"}, + {"type":"Text","value":"\n\n"}, {"type":"KeywordType","value":"int"}, {"type":"Text","value":" "}, {"type":"NameFunction","value":"main"}, @@ -9,6 +124,14 @@ {"type":"Keyword","value":"return"}, {"type":"Text","value":" "}, {"type":"LiteralNumberInteger","value":"0"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"+"}, + {"type":"Text","value":" "}, + {"type":"LiteralNumberInteger","value":"1'3"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"+"}, + {"type":"Text","value":" "}, + {"type":"LiteralNumberFloat","value":"1.4"}, {"type":"Punctuation","value":";"}, {"type":"Text","value":"\n"}, {"type":"Punctuation","value":"}"},