mirror of
https://github.com/MontFerret/ferret.git
synced 2025-08-13 19:52:52 +02:00
Refactor error handling: replace NewSyntaxError
with a more comprehensive parseError
method, improve syntax error parsing, introduce token tracking via TrackingTokenStream
and TokenHistory
, add SkipWhitespaceForward
helper, update parser to support token stream transformations, and enhance error messaging across the compiler and test cases.
This commit is contained in:
@@ -3,6 +3,8 @@ package compiler
|
||||
import (
|
||||
goruntime "runtime"
|
||||
|
||||
"github.com/antlr4-go/antlr/v4"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/file"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/compiler/internal/core"
|
||||
@@ -58,8 +60,11 @@ func (c *Compiler) Compile(src *file.Source) (program *vm.Program, err error) {
|
||||
}()
|
||||
|
||||
l := NewVisitor(src, errorHandler)
|
||||
p := parser.New(src.Content())
|
||||
p.AddErrorListener(newErrorListener(l.Ctx.Errors))
|
||||
tokenHistory := parser.NewTokenHistory(10)
|
||||
p := parser.New(src.Content(), func(stream antlr.TokenStream) antlr.TokenStream {
|
||||
return parser.NewTrackingTokenStream(stream, tokenHistory)
|
||||
})
|
||||
p.AddErrorListener(newErrorListener(src, l.Ctx.Errors, tokenHistory))
|
||||
p.Visit(l)
|
||||
|
||||
if l.Ctx.Errors.HasErrors() {
|
||||
|
@@ -1,21 +1,39 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/antlr4-go/antlr/v4"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/file"
|
||||
"github.com/MontFerret/ferret/pkg/parser"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/compiler/internal/core"
|
||||
)
|
||||
|
||||
type errorListener struct {
|
||||
*antlr.DiagnosticErrorListener
|
||||
type (
|
||||
errorListener struct {
|
||||
*antlr.DiagnosticErrorListener
|
||||
src *file.Source
|
||||
handler *core.ErrorHandler
|
||||
lastTokens *parser.TokenHistory
|
||||
}
|
||||
|
||||
handler *core.ErrorHandler
|
||||
}
|
||||
errorPattern struct {
|
||||
Name string
|
||||
MatchFn func(tokens []antlr.Token) (matched bool, info map[string]string)
|
||||
Explain func(info map[string]string) (msg, hint string, span file.Span)
|
||||
}
|
||||
)
|
||||
|
||||
func newErrorListener(handler *core.ErrorHandler) antlr.ErrorListener {
|
||||
func newErrorListener(src *file.Source, handler *core.ErrorHandler, lastTokens *parser.TokenHistory) antlr.ErrorListener {
|
||||
return &errorListener{
|
||||
DiagnosticErrorListener: antlr.NewDiagnosticErrorListener(false),
|
||||
src: src,
|
||||
handler: handler,
|
||||
lastTokens: lastTokens,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,5 +44,85 @@ func (d *errorListener) ReportContextSensitivity(recognizer antlr.Parser, dfa *a
|
||||
}
|
||||
|
||||
func (d *errorListener) SyntaxError(_ antlr.Recognizer, offendingSymbol interface{}, line, column int, msg string, e antlr.RecognitionException) {
|
||||
d.handler.Add(core.NewSyntaxError(msg, line, column, offendingSymbol))
|
||||
var offending antlr.Token
|
||||
|
||||
// Get offending token
|
||||
if tok, ok := offendingSymbol.(antlr.Token); ok {
|
||||
offending = tok
|
||||
}
|
||||
|
||||
d.handler.Add(d.parseError(msg, offending))
|
||||
}
|
||||
|
||||
func (d *errorListener) parseError(msg string, offending antlr.Token) *CompilationError {
|
||||
span := core.SpanFromTokenSafe(offending, d.src)
|
||||
|
||||
err := &CompilationError{
|
||||
Kind: SyntaxError,
|
||||
Message: "Syntax error: " + msg,
|
||||
Hint: "Check your syntax. Did you forget to write something?",
|
||||
Spans: []core.ErrorSpan{
|
||||
{Span: span, Main: true},
|
||||
},
|
||||
}
|
||||
|
||||
for _, handler := range []func(*CompilationError, antlr.Token) bool{
|
||||
d.extraneousError,
|
||||
d.noViableAltError,
|
||||
} {
|
||||
if handler(err, offending) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *errorListener) extraneousError(err *CompilationError, offending antlr.Token) (matched bool) {
|
||||
if !strings.Contains(err.Message, "extraneous input") {
|
||||
return false
|
||||
}
|
||||
|
||||
span := core.SpanFromTokenSafe(offending, d.src)
|
||||
err.Spans = []core.ErrorSpan{
|
||||
core.NewMainErrorSpan(span, "query must end with a value"),
|
||||
}
|
||||
|
||||
err.Message = "Expected a RETURN or FOR clause at end of query"
|
||||
err.Hint = "All queries must return a value. Add a RETURN statement to complete the query."
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (d *errorListener) noViableAltError(err *CompilationError, offending antlr.Token) bool {
|
||||
recognizer := regexp.MustCompile("no viable alternative at input '(\\w+).+'")
|
||||
|
||||
matches := recognizer.FindAllStringSubmatch(err.Message, -1)
|
||||
|
||||
if len(matches) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
last := d.lastTokens.Last()
|
||||
keyword := matches[0][1]
|
||||
start := file.SkipWhitespaceForward(d.src.Content(), last.GetStop()+1)
|
||||
span := file.Span{
|
||||
Start: start,
|
||||
End: start + len(keyword),
|
||||
}
|
||||
|
||||
switch strings.ToLower(keyword) {
|
||||
case "return":
|
||||
err.Message = fmt.Sprintf("Expected expression after '%s'", keyword)
|
||||
err.Hint = fmt.Sprintf("Did you forget to provide a value after '%s'?", keyword)
|
||||
|
||||
// Replace span with RETURN token’s span
|
||||
err.Spans = []core.ErrorSpan{
|
||||
core.NewMainErrorSpan(span, "missing return value"),
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
@@ -24,3 +24,32 @@ func SpanFromToken(tok antlr.Token) file.Span {
|
||||
|
||||
return file.Span{Start: tok.GetStart(), End: tok.GetStop() + 1}
|
||||
}
|
||||
|
||||
func SpanFromTokenSafe(tok antlr.Token, src *file.Source) file.Span {
|
||||
if tok == nil {
|
||||
return file.Span{Start: 0, End: 1}
|
||||
}
|
||||
|
||||
start := tok.GetStart()
|
||||
end := tok.GetStop() + 1 // exclusive end
|
||||
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
|
||||
if end <= start {
|
||||
end = start + 1
|
||||
}
|
||||
|
||||
// clamp to source length
|
||||
maxLen := len(src.Content())
|
||||
|
||||
if end > maxLen {
|
||||
end = maxLen
|
||||
}
|
||||
if start > maxLen {
|
||||
start = maxLen - 1
|
||||
}
|
||||
|
||||
return file.Span{Start: start, End: end}
|
||||
}
|
||||
|
@@ -1,65 +0,0 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func NewSyntaxError(msg string, line, column int, offendingSymbol any) *CompilationError {
|
||||
err := &CompilationError{
|
||||
Message: msg,
|
||||
Hint: "Check the syntax of your code. It may be missing a keyword, operator, or punctuation.",
|
||||
}
|
||||
|
||||
var matched bool
|
||||
parsers := []func(*CompilationError, int, int, any) bool{
|
||||
parseNoViableAltError,
|
||||
parseExtraneousError,
|
||||
}
|
||||
|
||||
for _, parser := range parsers {
|
||||
matched = parser(err, line, column, offendingSymbol)
|
||||
|
||||
if matched {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func parseExtraneousError(err *CompilationError, line, column int, offendingSymbol any) (matched bool) {
|
||||
recognizer := regexp.MustCompile("extraneous input '<EOF>' expecting")
|
||||
|
||||
if !recognizer.MatchString(err.Message) {
|
||||
return false
|
||||
}
|
||||
|
||||
err.Message = "Extraneous input at end of file"
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func parseNoViableAltError(err *CompilationError, line, column int, offendingSymbol any) (matched bool) {
|
||||
recognizer := regexp.MustCompile("no viable alternative at input '(\\w+).+'")
|
||||
|
||||
matches := recognizer.FindAllStringSubmatch(err.Message, -1)
|
||||
|
||||
if len(matches) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
var msg, hint string
|
||||
keyword := matches[0][1]
|
||||
|
||||
switch strings.ToLower(keyword) {
|
||||
case "return":
|
||||
msg = "Unexpected 'return' keyword"
|
||||
hint = "Did you mean to return a value?"
|
||||
}
|
||||
|
||||
err.Message = msg
|
||||
err.Hint = hint
|
||||
|
||||
return true
|
||||
}
|
15
pkg/file/helpers.go
Normal file
15
pkg/file/helpers.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package file
|
||||
|
||||
func SkipWhitespaceForward(content string, offset int) int {
|
||||
for offset < len(content) {
|
||||
ch := content[offset]
|
||||
|
||||
if ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' {
|
||||
break
|
||||
}
|
||||
|
||||
offset++
|
||||
}
|
||||
|
||||
return offset
|
||||
}
|
@@ -11,14 +11,20 @@ type Parser struct {
|
||||
tree *fql.FqlParser
|
||||
}
|
||||
|
||||
func New(query string) *Parser {
|
||||
func New(query string, tr ...TokenStreamTransformer) *Parser {
|
||||
input := antlr.NewInputStream(query)
|
||||
// converts tokens to upper case, so now it doesn't matter
|
||||
// in which case the tokens were entered
|
||||
upper := newCaseChangingStream(input, true)
|
||||
|
||||
lexer := fql.NewFqlLexer(upper)
|
||||
stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)
|
||||
var stream antlr.TokenStream
|
||||
stream = antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)
|
||||
|
||||
// Apply all transformations to the token stream
|
||||
for _, transform := range tr {
|
||||
stream = transform(stream)
|
||||
}
|
||||
|
||||
p := fql.NewFqlParser(stream)
|
||||
p.BuildParseTrees = true
|
||||
|
29
pkg/parser/stream_track_tokens.go
Normal file
29
pkg/parser/stream_track_tokens.go
Normal file
@@ -0,0 +1,29 @@
|
||||
package parser
|
||||
|
||||
import "github.com/antlr4-go/antlr/v4"
|
||||
|
||||
type TrackingTokenStream struct {
|
||||
antlr.TokenStream
|
||||
tokens *TokenHistory
|
||||
}
|
||||
|
||||
func NewTrackingTokenStream(stream antlr.TokenStream, history *TokenHistory) antlr.TokenStream {
|
||||
return &TrackingTokenStream{
|
||||
TokenStream: stream,
|
||||
tokens: history,
|
||||
}
|
||||
}
|
||||
|
||||
func (ts *TrackingTokenStream) Tokens() *TokenHistory {
|
||||
return ts.tokens
|
||||
}
|
||||
|
||||
func (ts *TrackingTokenStream) LT(i int) antlr.Token {
|
||||
tok := ts.TokenStream.LT(i)
|
||||
|
||||
if i == 1 && tok != nil && tok.GetTokenType() != antlr.TokenEOF {
|
||||
ts.tokens.Add(tok)
|
||||
}
|
||||
|
||||
return tok
|
||||
}
|
75
pkg/parser/token_history.go
Normal file
75
pkg/parser/token_history.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package parser
|
||||
|
||||
import "github.com/antlr4-go/antlr/v4"
|
||||
|
||||
type (
|
||||
TokenNode struct {
|
||||
Token antlr.Token
|
||||
Prev *TokenNode
|
||||
Next *TokenNode
|
||||
}
|
||||
|
||||
TokenHistory struct {
|
||||
head *TokenNode
|
||||
tail *TokenNode
|
||||
size int
|
||||
cap int
|
||||
}
|
||||
)
|
||||
|
||||
func NewTokenHistory(cap int) *TokenHistory {
|
||||
return &TokenHistory{cap: cap}
|
||||
}
|
||||
|
||||
func (h *TokenHistory) Add(token antlr.Token) {
|
||||
if token == nil {
|
||||
return
|
||||
}
|
||||
|
||||
node := &TokenNode{Token: token}
|
||||
|
||||
if h.head != nil {
|
||||
node.Next = h.head
|
||||
h.head.Prev = node
|
||||
}
|
||||
|
||||
h.head = node
|
||||
|
||||
if h.tail == nil {
|
||||
h.tail = node
|
||||
}
|
||||
|
||||
h.size++
|
||||
|
||||
if h.size > h.cap {
|
||||
// Remove oldest
|
||||
h.tail = h.tail.Prev
|
||||
|
||||
if h.tail != nil {
|
||||
h.tail.Next = nil
|
||||
}
|
||||
|
||||
h.size--
|
||||
}
|
||||
}
|
||||
|
||||
func (h *TokenHistory) LastN(n int) []antlr.Token {
|
||||
result := make([]antlr.Token, 0, n)
|
||||
curr := h.head
|
||||
|
||||
for curr != nil && n > 0 {
|
||||
result = append(result, curr.Token)
|
||||
curr = curr.Next
|
||||
n--
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *TokenHistory) Last() antlr.Token {
|
||||
if h.tail == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return h.tail.Token
|
||||
}
|
5
pkg/parser/transformer.go
Normal file
5
pkg/parser/transformer.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package parser
|
||||
|
||||
import "github.com/antlr4-go/antlr/v4"
|
||||
|
||||
type TokenStreamTransformer func(antlr.TokenStream) antlr.TokenStream
|
@@ -13,18 +13,28 @@ func TestErrors(t *testing.T) {
|
||||
LET i = NONE
|
||||
`, E{
|
||||
Kind: compiler.SyntaxError,
|
||||
Message: "Variable 'i' is already defined",
|
||||
//Message: "Extraneous input at end of file",
|
||||
Message: "Expected a RETURN or FOR clause at end of query",
|
||||
Hint: "All queries must return a value. Add a RETURN statement to complete the query.",
|
||||
}, "Syntax error: missing return statement"),
|
||||
SkipErrorCase(
|
||||
ErrorCase(
|
||||
`
|
||||
LET i = NONE
|
||||
RETURN
|
||||
`, E{
|
||||
Kind: compiler.SyntaxError,
|
||||
//Message: "Unexpected 'return' keyword",
|
||||
//Hint: "Did you mean to return a value?",
|
||||
Kind: compiler.SyntaxError,
|
||||
Message: "Expected expression after 'RETURN'",
|
||||
Hint: "Did you forget to provide a value after 'RETURN'?",
|
||||
}, "Syntax error: missing return value"),
|
||||
ErrorCase(
|
||||
`
|
||||
LET i =
|
||||
LET y = []
|
||||
RETURN i
|
||||
`, E{
|
||||
Kind: compiler.SyntaxError,
|
||||
Message: "_FAIL_",
|
||||
Hint: "",
|
||||
}, "Syntax error: missing variable assignment value"),
|
||||
ErrorCase(
|
||||
`
|
||||
LET i = NONE
|
||||
|
Reference in New Issue
Block a user