1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-08-13 19:52:52 +02:00

Refactor error handling: replace NewSyntaxError with a more comprehensive parseError method, improve syntax error parsing, introduce token tracking via TrackingTokenStream and TokenHistory, add SkipWhitespaceForward helper, update parser to support token stream transformations, and enhance error messaging across the compiler and test cases.

This commit is contained in:
Tim Voronov
2025-07-30 20:41:41 -04:00
parent c24d802671
commit 26fa6cd3c3
11 changed files with 288 additions and 81 deletions

View File

@@ -3,6 +3,8 @@ package compiler
import (
goruntime "runtime"
"github.com/antlr4-go/antlr/v4"
"github.com/MontFerret/ferret/pkg/file"
"github.com/MontFerret/ferret/pkg/compiler/internal/core"
@@ -58,8 +60,11 @@ func (c *Compiler) Compile(src *file.Source) (program *vm.Program, err error) {
}()
l := NewVisitor(src, errorHandler)
p := parser.New(src.Content())
p.AddErrorListener(newErrorListener(l.Ctx.Errors))
tokenHistory := parser.NewTokenHistory(10)
p := parser.New(src.Content(), func(stream antlr.TokenStream) antlr.TokenStream {
return parser.NewTrackingTokenStream(stream, tokenHistory)
})
p.AddErrorListener(newErrorListener(src, l.Ctx.Errors, tokenHistory))
p.Visit(l)
if l.Ctx.Errors.HasErrors() {

View File

@@ -1,21 +1,39 @@
package compiler
import (
"fmt"
"regexp"
"strings"
"github.com/antlr4-go/antlr/v4"
"github.com/MontFerret/ferret/pkg/file"
"github.com/MontFerret/ferret/pkg/parser"
"github.com/MontFerret/ferret/pkg/compiler/internal/core"
)
type errorListener struct {
type (
errorListener struct {
*antlr.DiagnosticErrorListener
src *file.Source
handler *core.ErrorHandler
}
lastTokens *parser.TokenHistory
}
func newErrorListener(handler *core.ErrorHandler) antlr.ErrorListener {
errorPattern struct {
Name string
MatchFn func(tokens []antlr.Token) (matched bool, info map[string]string)
Explain func(info map[string]string) (msg, hint string, span file.Span)
}
)
func newErrorListener(src *file.Source, handler *core.ErrorHandler, lastTokens *parser.TokenHistory) antlr.ErrorListener {
return &errorListener{
DiagnosticErrorListener: antlr.NewDiagnosticErrorListener(false),
src: src,
handler: handler,
lastTokens: lastTokens,
}
}
@@ -26,5 +44,85 @@ func (d *errorListener) ReportContextSensitivity(recognizer antlr.Parser, dfa *a
}
func (d *errorListener) SyntaxError(_ antlr.Recognizer, offendingSymbol interface{}, line, column int, msg string, e antlr.RecognitionException) {
d.handler.Add(core.NewSyntaxError(msg, line, column, offendingSymbol))
var offending antlr.Token
// Get offending token
if tok, ok := offendingSymbol.(antlr.Token); ok {
offending = tok
}
d.handler.Add(d.parseError(msg, offending))
}
func (d *errorListener) parseError(msg string, offending antlr.Token) *CompilationError {
span := core.SpanFromTokenSafe(offending, d.src)
err := &CompilationError{
Kind: SyntaxError,
Message: "Syntax error: " + msg,
Hint: "Check your syntax. Did you forget to write something?",
Spans: []core.ErrorSpan{
{Span: span, Main: true},
},
}
for _, handler := range []func(*CompilationError, antlr.Token) bool{
d.extraneousError,
d.noViableAltError,
} {
if handler(err, offending) {
break
}
}
return err
}
func (d *errorListener) extraneousError(err *CompilationError, offending antlr.Token) (matched bool) {
if !strings.Contains(err.Message, "extraneous input") {
return false
}
span := core.SpanFromTokenSafe(offending, d.src)
err.Spans = []core.ErrorSpan{
core.NewMainErrorSpan(span, "query must end with a value"),
}
err.Message = "Expected a RETURN or FOR clause at end of query"
err.Hint = "All queries must return a value. Add a RETURN statement to complete the query."
return true
}
func (d *errorListener) noViableAltError(err *CompilationError, offending antlr.Token) bool {
recognizer := regexp.MustCompile("no viable alternative at input '(\\w+).+'")
matches := recognizer.FindAllStringSubmatch(err.Message, -1)
if len(matches) == 0 {
return false
}
last := d.lastTokens.Last()
keyword := matches[0][1]
start := file.SkipWhitespaceForward(d.src.Content(), last.GetStop()+1)
span := file.Span{
Start: start,
End: start + len(keyword),
}
switch strings.ToLower(keyword) {
case "return":
err.Message = fmt.Sprintf("Expected expression after '%s'", keyword)
err.Hint = fmt.Sprintf("Did you forget to provide a value after '%s'?", keyword)
// Replace span with RETURN token’s span
err.Spans = []core.ErrorSpan{
core.NewMainErrorSpan(span, "missing return value"),
}
return true
}
return false
}

View File

@@ -24,3 +24,32 @@ func SpanFromToken(tok antlr.Token) file.Span {
return file.Span{Start: tok.GetStart(), End: tok.GetStop() + 1}
}
func SpanFromTokenSafe(tok antlr.Token, src *file.Source) file.Span {
if tok == nil {
return file.Span{Start: 0, End: 1}
}
start := tok.GetStart()
end := tok.GetStop() + 1 // exclusive end
if start < 0 {
start = 0
}
if end <= start {
end = start + 1
}
// clamp to source length
maxLen := len(src.Content())
if end > maxLen {
end = maxLen
}
if start > maxLen {
start = maxLen - 1
}
return file.Span{Start: start, End: end}
}

View File

@@ -1,65 +0,0 @@
package core
import (
"regexp"
"strings"
)
func NewSyntaxError(msg string, line, column int, offendingSymbol any) *CompilationError {
err := &CompilationError{
Message: msg,
Hint: "Check the syntax of your code. It may be missing a keyword, operator, or punctuation.",
}
var matched bool
parsers := []func(*CompilationError, int, int, any) bool{
parseNoViableAltError,
parseExtraneousError,
}
for _, parser := range parsers {
matched = parser(err, line, column, offendingSymbol)
if matched {
break
}
}
return err
}
func parseExtraneousError(err *CompilationError, line, column int, offendingSymbol any) (matched bool) {
recognizer := regexp.MustCompile("extraneous input '<EOF>' expecting")
if !recognizer.MatchString(err.Message) {
return false
}
err.Message = "Extraneous input at end of file"
return true
}
func parseNoViableAltError(err *CompilationError, line, column int, offendingSymbol any) (matched bool) {
recognizer := regexp.MustCompile("no viable alternative at input '(\\w+).+'")
matches := recognizer.FindAllStringSubmatch(err.Message, -1)
if len(matches) == 0 {
return false
}
var msg, hint string
keyword := matches[0][1]
switch strings.ToLower(keyword) {
case "return":
msg = "Unexpected 'return' keyword"
hint = "Did you mean to return a value?"
}
err.Message = msg
err.Hint = hint
return true
}

15
pkg/file/helpers.go Normal file
View File

@@ -0,0 +1,15 @@
package file
func SkipWhitespaceForward(content string, offset int) int {
for offset < len(content) {
ch := content[offset]
if ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' {
break
}
offset++
}
return offset
}

View File

@@ -11,14 +11,20 @@ type Parser struct {
tree *fql.FqlParser
}
func New(query string) *Parser {
func New(query string, tr ...TokenStreamTransformer) *Parser {
input := antlr.NewInputStream(query)
// converts tokens to upper case, so now it doesn't matter
// in which case the tokens were entered
upper := newCaseChangingStream(input, true)
lexer := fql.NewFqlLexer(upper)
stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)
var stream antlr.TokenStream
stream = antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)
// Apply all transformations to the token stream
for _, transform := range tr {
stream = transform(stream)
}
p := fql.NewFqlParser(stream)
p.BuildParseTrees = true

View File

@@ -0,0 +1,29 @@
package parser
import "github.com/antlr4-go/antlr/v4"
type TrackingTokenStream struct {
antlr.TokenStream
tokens *TokenHistory
}
func NewTrackingTokenStream(stream antlr.TokenStream, history *TokenHistory) antlr.TokenStream {
return &TrackingTokenStream{
TokenStream: stream,
tokens: history,
}
}
func (ts *TrackingTokenStream) Tokens() *TokenHistory {
return ts.tokens
}
func (ts *TrackingTokenStream) LT(i int) antlr.Token {
tok := ts.TokenStream.LT(i)
if i == 1 && tok != nil && tok.GetTokenType() != antlr.TokenEOF {
ts.tokens.Add(tok)
}
return tok
}

View File

@@ -0,0 +1,75 @@
package parser
import "github.com/antlr4-go/antlr/v4"
type (
TokenNode struct {
Token antlr.Token
Prev *TokenNode
Next *TokenNode
}
TokenHistory struct {
head *TokenNode
tail *TokenNode
size int
cap int
}
)
func NewTokenHistory(cap int) *TokenHistory {
return &TokenHistory{cap: cap}
}
func (h *TokenHistory) Add(token antlr.Token) {
if token == nil {
return
}
node := &TokenNode{Token: token}
if h.head != nil {
node.Next = h.head
h.head.Prev = node
}
h.head = node
if h.tail == nil {
h.tail = node
}
h.size++
if h.size > h.cap {
// Remove oldest
h.tail = h.tail.Prev
if h.tail != nil {
h.tail.Next = nil
}
h.size--
}
}
func (h *TokenHistory) LastN(n int) []antlr.Token {
result := make([]antlr.Token, 0, n)
curr := h.head
for curr != nil && n > 0 {
result = append(result, curr.Token)
curr = curr.Next
n--
}
return result
}
func (h *TokenHistory) Last() antlr.Token {
if h.tail == nil {
return nil
}
return h.tail.Token
}

View File

@@ -0,0 +1,5 @@
package parser
import "github.com/antlr4-go/antlr/v4"
type TokenStreamTransformer func(antlr.TokenStream) antlr.TokenStream

View File

@@ -13,18 +13,28 @@ func TestErrors(t *testing.T) {
LET i = NONE
`, E{
Kind: compiler.SyntaxError,
Message: "Variable 'i' is already defined",
//Message: "Extraneous input at end of file",
Message: "Expected a RETURN or FOR clause at end of query",
Hint: "All queries must return a value. Add a RETURN statement to complete the query.",
}, "Syntax error: missing return statement"),
SkipErrorCase(
ErrorCase(
`
LET i = NONE
RETURN
`, E{
Kind: compiler.SyntaxError,
//Message: "Unexpected 'return' keyword",
//Hint: "Did you mean to return a value?",
Message: "Expected expression after 'RETURN'",
Hint: "Did you forget to provide a value after 'RETURN'?",
}, "Syntax error: missing return value"),
ErrorCase(
`
LET i =
LET y = []
RETURN i
`, E{
Kind: compiler.SyntaxError,
Message: "_FAIL_",
Hint: "",
}, "Syntax error: missing variable assignment value"),
ErrorCase(
`
LET i = NONE