From 26fa6cd3c32f04e67d529f4848ae173ea0894c42 Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Wed, 30 Jul 2025 20:41:41 -0400 Subject: [PATCH] Refactor error handling: replace `NewSyntaxError` with a more comprehensive `parseError` method, improve syntax error parsing, introduce token tracking via `TrackingTokenStream` and `TokenHistory`, add `SkipWhitespaceForward` helper, update parser to support token stream transformations, and enhance error messaging across the compiler and test cases. --- pkg/compiler/compiler.go | 9 +- pkg/compiler/error_listener.go | 110 +++++++++++++++++- pkg/compiler/internal/core/error_helpers.go | 29 +++++ .../internal/core/error_recognizer.go | 65 ----------- pkg/file/helpers.go | 15 +++ pkg/parser/parser.go | 10 +- ...anging_stream.go => stream_change_case.go} | 0 pkg/parser/stream_track_tokens.go | 29 +++++ pkg/parser/token_history.go | 75 ++++++++++++ pkg/parser/transformer.go | 5 + .../compiler/compiler_errors_test.go | 22 +++- 11 files changed, 288 insertions(+), 81 deletions(-) delete mode 100644 pkg/compiler/internal/core/error_recognizer.go create mode 100644 pkg/file/helpers.go rename pkg/parser/{case_changing_stream.go => stream_change_case.go} (100%) create mode 100644 pkg/parser/stream_track_tokens.go create mode 100644 pkg/parser/token_history.go create mode 100644 pkg/parser/transformer.go diff --git a/pkg/compiler/compiler.go b/pkg/compiler/compiler.go index 922ea0a7..9991614f 100644 --- a/pkg/compiler/compiler.go +++ b/pkg/compiler/compiler.go @@ -3,6 +3,8 @@ package compiler import ( goruntime "runtime" + "github.com/antlr4-go/antlr/v4" + "github.com/MontFerret/ferret/pkg/file" "github.com/MontFerret/ferret/pkg/compiler/internal/core" @@ -58,8 +60,11 @@ func (c *Compiler) Compile(src *file.Source) (program *vm.Program, err error) { }() l := NewVisitor(src, errorHandler) - p := parser.New(src.Content()) - p.AddErrorListener(newErrorListener(l.Ctx.Errors)) + tokenHistory := parser.NewTokenHistory(10) + p := parser.New(src.Content(), func(stream antlr.TokenStream) antlr.TokenStream { + return parser.NewTrackingTokenStream(stream, tokenHistory) + }) + p.AddErrorListener(newErrorListener(src, l.Ctx.Errors, tokenHistory)) p.Visit(l) if l.Ctx.Errors.HasErrors() { diff --git a/pkg/compiler/error_listener.go b/pkg/compiler/error_listener.go index f4898fbe..47ebfcb2 100644 --- a/pkg/compiler/error_listener.go +++ b/pkg/compiler/error_listener.go @@ -1,21 +1,39 @@ package compiler import ( + "fmt" + "regexp" + "strings" + "github.com/antlr4-go/antlr/v4" + "github.com/MontFerret/ferret/pkg/file" + "github.com/MontFerret/ferret/pkg/parser" + "github.com/MontFerret/ferret/pkg/compiler/internal/core" ) -type errorListener struct { - *antlr.DiagnosticErrorListener +type ( + errorListener struct { + *antlr.DiagnosticErrorListener + src *file.Source + handler *core.ErrorHandler + lastTokens *parser.TokenHistory + } - handler *core.ErrorHandler -} + errorPattern struct { + Name string + MatchFn func(tokens []antlr.Token) (matched bool, info map[string]string) + Explain func(info map[string]string) (msg, hint string, span file.Span) + } +) -func newErrorListener(handler *core.ErrorHandler) antlr.ErrorListener { +func newErrorListener(src *file.Source, handler *core.ErrorHandler, lastTokens *parser.TokenHistory) antlr.ErrorListener { return &errorListener{ DiagnosticErrorListener: antlr.NewDiagnosticErrorListener(false), + src: src, handler: handler, + lastTokens: lastTokens, } } @@ -26,5 +44,85 @@ func (d *errorListener) ReportContextSensitivity(recognizer antlr.Parser, dfa *a } func (d *errorListener) SyntaxError(_ antlr.Recognizer, offendingSymbol interface{}, line, column int, msg string, e antlr.RecognitionException) { - d.handler.Add(core.NewSyntaxError(msg, line, column, offendingSymbol)) + var offending antlr.Token + + // Get offending token + if tok, ok := offendingSymbol.(antlr.Token); ok { + offending = tok + } + + d.handler.Add(d.parseError(msg, offending)) +} + +func (d *errorListener) parseError(msg string, offending antlr.Token) *CompilationError { + span := core.SpanFromTokenSafe(offending, d.src) + + err := &CompilationError{ + Kind: SyntaxError, + Message: "Syntax error: " + msg, + Hint: "Check your syntax. Did you forget to write something?", + Spans: []core.ErrorSpan{ + {Span: span, Main: true}, + }, + } + + for _, handler := range []func(*CompilationError, antlr.Token) bool{ + d.extraneousError, + d.noViableAltError, + } { + if handler(err, offending) { + break + } + } + + return err +} + +func (d *errorListener) extraneousError(err *CompilationError, offending antlr.Token) (matched bool) { + if !strings.Contains(err.Message, "extraneous input") { + return false + } + + span := core.SpanFromTokenSafe(offending, d.src) + err.Spans = []core.ErrorSpan{ + core.NewMainErrorSpan(span, "query must end with a value"), + } + + err.Message = "Expected a RETURN or FOR clause at end of query" + err.Hint = "All queries must return a value. Add a RETURN statement to complete the query." + + return true +} + +func (d *errorListener) noViableAltError(err *CompilationError, offending antlr.Token) bool { + recognizer := regexp.MustCompile("no viable alternative at input '(\\w+).+'") + + matches := recognizer.FindAllStringSubmatch(err.Message, -1) + + if len(matches) == 0 { + return false + } + + last := d.lastTokens.Last() + keyword := matches[0][1] + start := file.SkipWhitespaceForward(d.src.Content(), last.GetStop()+1) + span := file.Span{ + Start: start, + End: start + len(keyword), + } + + switch strings.ToLower(keyword) { + case "return": + err.Message = fmt.Sprintf("Expected expression after '%s'", keyword) + err.Hint = fmt.Sprintf("Did you forget to provide a value after '%s'?", keyword) + + // Replace span with RETURN token’s span + err.Spans = []core.ErrorSpan{ + core.NewMainErrorSpan(span, "missing return value"), + } + + return true + } + + return false } diff --git a/pkg/compiler/internal/core/error_helpers.go b/pkg/compiler/internal/core/error_helpers.go index d34379ad..6c3bf939 100644 --- a/pkg/compiler/internal/core/error_helpers.go +++ b/pkg/compiler/internal/core/error_helpers.go @@ -24,3 +24,32 @@ func SpanFromToken(tok antlr.Token) file.Span { return file.Span{Start: tok.GetStart(), End: tok.GetStop() + 1} } + +func SpanFromTokenSafe(tok antlr.Token, src *file.Source) file.Span { + if tok == nil { + return file.Span{Start: 0, End: 1} + } + + start := tok.GetStart() + end := tok.GetStop() + 1 // exclusive end + + if start < 0 { + start = 0 + } + + if end <= start { + end = start + 1 + } + + // clamp to source length + maxLen := len(src.Content()) + + if end > maxLen { + end = maxLen + } + if start > maxLen { + start = maxLen - 1 + } + + return file.Span{Start: start, End: end} +} diff --git a/pkg/compiler/internal/core/error_recognizer.go b/pkg/compiler/internal/core/error_recognizer.go deleted file mode 100644 index f49088ce..00000000 --- a/pkg/compiler/internal/core/error_recognizer.go +++ /dev/null @@ -1,65 +0,0 @@ -package core - -import ( - "regexp" - "strings" -) - -func NewSyntaxError(msg string, line, column int, offendingSymbol any) *CompilationError { - err := &CompilationError{ - Message: msg, - Hint: "Check the syntax of your code. It may be missing a keyword, operator, or punctuation.", - } - - var matched bool - parsers := []func(*CompilationError, int, int, any) bool{ - parseNoViableAltError, - parseExtraneousError, - } - - for _, parser := range parsers { - matched = parser(err, line, column, offendingSymbol) - - if matched { - break - } - } - - return err -} - -func parseExtraneousError(err *CompilationError, line, column int, offendingSymbol any) (matched bool) { - recognizer := regexp.MustCompile("extraneous input '' expecting") - - if !recognizer.MatchString(err.Message) { - return false - } - - err.Message = "Extraneous input at end of file" - - return true -} - -func parseNoViableAltError(err *CompilationError, line, column int, offendingSymbol any) (matched bool) { - recognizer := regexp.MustCompile("no viable alternative at input '(\\w+).+'") - - matches := recognizer.FindAllStringSubmatch(err.Message, -1) - - if len(matches) == 0 { - return false - } - - var msg, hint string - keyword := matches[0][1] - - switch strings.ToLower(keyword) { - case "return": - msg = "Unexpected 'return' keyword" - hint = "Did you mean to return a value?" - } - - err.Message = msg - err.Hint = hint - - return true -} diff --git a/pkg/file/helpers.go b/pkg/file/helpers.go new file mode 100644 index 00000000..d64c4687 --- /dev/null +++ b/pkg/file/helpers.go @@ -0,0 +1,15 @@ +package file + +func SkipWhitespaceForward(content string, offset int) int { + for offset < len(content) { + ch := content[offset] + + if ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' { + break + } + + offset++ + } + + return offset +} diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index 8320955d..95766395 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -11,14 +11,20 @@ type Parser struct { tree *fql.FqlParser } -func New(query string) *Parser { +func New(query string, tr ...TokenStreamTransformer) *Parser { input := antlr.NewInputStream(query) // converts tokens to upper case, so now it doesn't matter // in which case the tokens were entered upper := newCaseChangingStream(input, true) lexer := fql.NewFqlLexer(upper) - stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel) + var stream antlr.TokenStream + stream = antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel) + + // Apply all transformations to the token stream + for _, transform := range tr { + stream = transform(stream) + } p := fql.NewFqlParser(stream) p.BuildParseTrees = true diff --git a/pkg/parser/case_changing_stream.go b/pkg/parser/stream_change_case.go similarity index 100% rename from pkg/parser/case_changing_stream.go rename to pkg/parser/stream_change_case.go diff --git a/pkg/parser/stream_track_tokens.go b/pkg/parser/stream_track_tokens.go new file mode 100644 index 00000000..e6abccd6 --- /dev/null +++ b/pkg/parser/stream_track_tokens.go @@ -0,0 +1,29 @@ +package parser + +import "github.com/antlr4-go/antlr/v4" + +type TrackingTokenStream struct { + antlr.TokenStream + tokens *TokenHistory +} + +func NewTrackingTokenStream(stream antlr.TokenStream, history *TokenHistory) antlr.TokenStream { + return &TrackingTokenStream{ + TokenStream: stream, + tokens: history, + } +} + +func (ts *TrackingTokenStream) Tokens() *TokenHistory { + return ts.tokens +} + +func (ts *TrackingTokenStream) LT(i int) antlr.Token { + tok := ts.TokenStream.LT(i) + + if i == 1 && tok != nil && tok.GetTokenType() != antlr.TokenEOF { + ts.tokens.Add(tok) + } + + return tok +} diff --git a/pkg/parser/token_history.go b/pkg/parser/token_history.go new file mode 100644 index 00000000..9597a56c --- /dev/null +++ b/pkg/parser/token_history.go @@ -0,0 +1,75 @@ +package parser + +import "github.com/antlr4-go/antlr/v4" + +type ( + TokenNode struct { + Token antlr.Token + Prev *TokenNode + Next *TokenNode + } + + TokenHistory struct { + head *TokenNode + tail *TokenNode + size int + cap int + } +) + +func NewTokenHistory(cap int) *TokenHistory { + return &TokenHistory{cap: cap} +} + +func (h *TokenHistory) Add(token antlr.Token) { + if token == nil { + return + } + + node := &TokenNode{Token: token} + + if h.head != nil { + node.Next = h.head + h.head.Prev = node + } + + h.head = node + + if h.tail == nil { + h.tail = node + } + + h.size++ + + if h.size > h.cap { + // Remove oldest + h.tail = h.tail.Prev + + if h.tail != nil { + h.tail.Next = nil + } + + h.size-- + } +} + +func (h *TokenHistory) LastN(n int) []antlr.Token { + result := make([]antlr.Token, 0, n) + curr := h.head + + for curr != nil && n > 0 { + result = append(result, curr.Token) + curr = curr.Next + n-- + } + + return result +} + +func (h *TokenHistory) Last() antlr.Token { + if h.tail == nil { + return nil + } + + return h.tail.Token +} diff --git a/pkg/parser/transformer.go b/pkg/parser/transformer.go new file mode 100644 index 00000000..adfed7fd --- /dev/null +++ b/pkg/parser/transformer.go @@ -0,0 +1,5 @@ +package parser + +import "github.com/antlr4-go/antlr/v4" + +type TokenStreamTransformer func(antlr.TokenStream) antlr.TokenStream diff --git a/test/integration/compiler/compiler_errors_test.go b/test/integration/compiler/compiler_errors_test.go index 2dd73b84..0bed238c 100644 --- a/test/integration/compiler/compiler_errors_test.go +++ b/test/integration/compiler/compiler_errors_test.go @@ -13,18 +13,28 @@ func TestErrors(t *testing.T) { LET i = NONE `, E{ Kind: compiler.SyntaxError, - Message: "Variable 'i' is already defined", - //Message: "Extraneous input at end of file", + Message: "Expected a RETURN or FOR clause at end of query", + Hint: "All queries must return a value. Add a RETURN statement to complete the query.", }, "Syntax error: missing return statement"), - SkipErrorCase( + ErrorCase( ` LET i = NONE RETURN `, E{ - Kind: compiler.SyntaxError, - //Message: "Unexpected 'return' keyword", - //Hint: "Did you mean to return a value?", + Kind: compiler.SyntaxError, + Message: "Expected expression after 'RETURN'", + Hint: "Did you forget to provide a value after 'RETURN'?", }, "Syntax error: missing return value"), + ErrorCase( + ` + LET i = + LET y = [] + RETURN i + `, E{ + Kind: compiler.SyntaxError, + Message: "_FAIL_", + Hint: "", + }, "Syntax error: missing variable assignment value"), ErrorCase( ` LET i = NONE