1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-08-13 19:52:52 +02:00

Refactor error handling and token tracking: replace direct access of TokenHistory with TokenNode, introduce TokenNode struct, enhance TrackingTokenStream to improve token management, update error handling logic to leverage TokenNode traversal, and refine error messaging for syntax errors.

This commit is contained in:
Tim Voronov
2025-07-30 22:07:03 -04:00
parent 26fa6cd3c3
commit 57390fc901
8 changed files with 181 additions and 84 deletions

View File

@@ -2,9 +2,10 @@ package compiler
import (
"fmt"
"regexp"
"strings"
"github.com/MontFerret/ferret/pkg/parser/fql"
"github.com/antlr4-go/antlr/v4"
"github.com/MontFerret/ferret/pkg/file"
@@ -16,9 +17,9 @@ import (
type (
errorListener struct {
*antlr.DiagnosticErrorListener
src *file.Source
handler *core.ErrorHandler
lastTokens *parser.TokenHistory
src *file.Source
handler *core.ErrorHandler
history *parser.TokenHistory
}
errorPattern struct {
@@ -28,12 +29,12 @@ type (
}
)
func newErrorListener(src *file.Source, handler *core.ErrorHandler, lastTokens *parser.TokenHistory) antlr.ErrorListener {
func newErrorListener(src *file.Source, handler *core.ErrorHandler, history *parser.TokenHistory) antlr.ErrorListener {
return &errorListener{
DiagnosticErrorListener: antlr.NewDiagnosticErrorListener(false),
src: src,
handler: handler,
lastTokens: lastTokens,
history: history,
}
}
@@ -66,11 +67,11 @@ func (d *errorListener) parseError(msg string, offending antlr.Token) *Compilati
},
}
for _, handler := range []func(*CompilationError, antlr.Token) bool{
for _, handler := range []func(*CompilationError) bool{
d.extraneousError,
d.noViableAltError,
} {
if handler(err, offending) {
if handler(err) {
break
}
}
@@ -78,12 +79,18 @@ func (d *errorListener) parseError(msg string, offending antlr.Token) *Compilati
return err
}
func (d *errorListener) extraneousError(err *CompilationError, offending antlr.Token) (matched bool) {
func (d *errorListener) extraneousError(err *CompilationError) (matched bool) {
if !strings.Contains(err.Message, "extraneous input") {
return false
}
span := core.SpanFromTokenSafe(offending, d.src)
last := d.history.Last()
if last == nil {
return false
}
span := core.SpanFromTokenSafe(last.Token(), d.src)
err.Spans = []core.ErrorSpan{
core.NewMainErrorSpan(span, "query must end with a value"),
}
@@ -94,35 +101,73 @@ func (d *errorListener) extraneousError(err *CompilationError, offending antlr.T
return true
}
func (d *errorListener) noViableAltError(err *CompilationError, offending antlr.Token) bool {
recognizer := regexp.MustCompile("no viable alternative at input '(\\w+).+'")
matches := recognizer.FindAllStringSubmatch(err.Message, -1)
if len(matches) == 0 {
func (d *errorListener) noViableAltError(err *CompilationError) bool {
if !strings.Contains(err.Message, "viable alternative at input") {
return false
}
last := d.lastTokens.Last()
keyword := matches[0][1]
start := file.SkipWhitespaceForward(d.src.Content(), last.GetStop()+1)
span := file.Span{
Start: start,
End: start + len(keyword),
if d.history.Size() < 2 {
return false
}
switch strings.ToLower(keyword) {
case "return":
err.Message = fmt.Sprintf("Expected expression after '%s'", keyword)
err.Hint = fmt.Sprintf("Did you forget to provide a value after '%s'?", keyword)
// most recent (offending)
last := d.history.Last()
// Replace span with RETURN token’s span
// CASE: RETURN [missing value]
if isToken(last, "RETURN") && isKeyword(last.Token()) {
span := core.SpanFromTokenSafe(last.Token(), d.src)
err.Message = fmt.Sprintf("Expected expression after '%s'", last)
err.Hint = "Did you forget to provide a value to return?"
err.Spans = []core.ErrorSpan{
core.NewMainErrorSpan(span, "missing return value"),
}
return true
}
// CASE: LET x = [missing value]
//if strtoken(last.Token()) == "LET" && isIdentifier(tokens[n-2]) && t1.GetText() == "=" {
// varName := tokens[n-2].GetText()
// span := core.SpanFromTokenSafe(tokens[n-1], d.src)
//
// err.Message = fmt.Sprintf("Expected expression after '=' for variable '%s'", varName)
// err.Hint = "Did you forget to provide a value?"
// err.Spans = []core.ErrorSpan{
// core.NewMainErrorSpan(span, "missing value"),
// }
// return true
//}
return false
}
func isIdentifier(token antlr.Token) bool {
if token == nil {
return false
}
tt := token.GetTokenType()
return tt == fql.FqlLexerIdentifier || tt == fql.FqlLexerIgnoreIdentifier
}
func isKeyword(token antlr.Token) bool {
if token == nil {
return false
}
ttype := token.GetTokenType()
// 0 is usually invalid; <EOF> is -1
if ttype <= 0 || ttype >= len(fql.FqlLexerLexerStaticData.LiteralNames) {
return false
}
lit := fql.FqlLexerLexerStaticData.LiteralNames[ttype]
return strings.HasPrefix(lit, "'") && strings.HasSuffix(lit, "'")
}
func isToken(node *parser.TokenNode, expected string) bool {
return strings.ToUpper(node.String()) == expected
}

View File

@@ -16,8 +16,8 @@ func New(query string, tr ...TokenStreamTransformer) *Parser {
// converts tokens to upper case, so now it doesn't matter
// in which case the tokens were entered
upper := newCaseChangingStream(input, true)
lexer := fql.NewFqlLexer(upper)
var stream antlr.TokenStream
stream = antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)

View File

@@ -14,7 +14,7 @@ type CaseChangingStream struct {
}
// newCaseChangingStream returns a new CaseChangingStream that forces
// all tokens read from the underlying stream to be either upper case
// all history read from the underlying stream to be either upper case
// or lower case based on the upper argument.
func newCaseChangingStream(in antlr.CharStream, upper bool) antlr.CharStream {
return &CaseChangingStream{in, upper}

View File

@@ -4,26 +4,19 @@ import "github.com/antlr4-go/antlr/v4"
type TrackingTokenStream struct {
antlr.TokenStream
tokens *TokenHistory
history *TokenHistory
}
func NewTrackingTokenStream(stream antlr.TokenStream, history *TokenHistory) antlr.TokenStream {
return &TrackingTokenStream{
TokenStream: stream,
tokens: history,
history: history,
}
}
func (ts *TrackingTokenStream) Tokens() *TokenHistory {
return ts.tokens
}
func (ts *TrackingTokenStream) LT(i int) antlr.Token {
tok := ts.TokenStream.LT(i)
if i == 1 && tok != nil && tok.GetTokenType() != antlr.TokenEOF {
ts.tokens.Add(tok)
}
return tok
func (s *TrackingTokenStream) Consume() {
// Get current token before advancing
tok := s.LT(1)
s.TokenStream.Consume()
s.history.Add(tok)
}

View File

@@ -2,35 +2,41 @@ package parser
import "github.com/antlr4-go/antlr/v4"
type (
TokenNode struct {
Token antlr.Token
Prev *TokenNode
Next *TokenNode
}
TokenHistory struct {
head *TokenNode
tail *TokenNode
size int
cap int
}
)
type TokenHistory struct {
head *TokenNode
tail *TokenNode
size int
cap int
}
func NewTokenHistory(cap int) *TokenHistory {
return &TokenHistory{cap: cap}
}
func (h *TokenHistory) Size() int {
return h.size
}
func (h *TokenHistory) Add(token antlr.Token) {
if token == nil {
return
}
node := &TokenNode{Token: token}
// Avoid adding the same token twice in a row (by position, not just text)
if h.head != nil {
last := h.head.token
if last.GetStart() == token.GetStart() &&
last.GetStop() == token.GetStop() &&
last.GetTokenType() == token.GetTokenType() {
return
}
}
node := &TokenNode{token: token}
if h.head != nil {
node.Next = h.head
h.head.Prev = node
node.next = h.head
h.head.prev = node
}
h.head = node
@@ -43,33 +49,32 @@ func (h *TokenHistory) Add(token antlr.Token) {
if h.size > h.cap {
// Remove oldest
h.tail = h.tail.Prev
h.tail = h.tail.prev
if h.tail != nil {
h.tail.Next = nil
h.tail.next = nil
}
h.size--
}
}
func (h *TokenHistory) LastN(n int) []antlr.Token {
result := make([]antlr.Token, 0, n)
curr := h.head
for curr != nil && n > 0 {
result = append(result, curr.Token)
curr = curr.Next
n--
}
return result
}
func (h *TokenHistory) Last() antlr.Token {
if h.tail == nil {
func (h *TokenHistory) Last() *TokenNode {
if h.head == nil {
return nil
}
return h.tail.Token
return h.head
}
func (h *TokenHistory) Iterate(yield func(token antlr.Token) bool) {
curr := h.tail
for curr != nil {
if !yield(curr.token) {
break
}
curr = curr.prev
}
}

55
pkg/parser/token_node.go Normal file
View File

@@ -0,0 +1,55 @@
package parser
import (
"github.com/antlr4-go/antlr/v4"
)
type TokenNode struct {
token antlr.Token
prev *TokenNode
next *TokenNode
}
func (t *TokenNode) Token() antlr.Token {
return t.token
}
func (t *TokenNode) Prev() *TokenNode {
return t.prev
}
func (t *TokenNode) Next() *TokenNode {
return t.next
}
func (t *TokenNode) PrevAt(n int) *TokenNode {
if n <= 0 {
return t
}
node := t
for i := 0; i < n && node != nil; i++ {
node = node.prev
}
return node
}
func (t *TokenNode) NextAt(n int) *TokenNode {
if n <= 0 {
return t
}
node := t
for i := 0; i < n && node != nil; i++ {
node = node.next
}
return node
}
func (t *TokenNode) String() string {
return t.token.GetText()
}

View File

@@ -46,7 +46,7 @@ func TestGET(t *testing.T) {
httpmock.RegisterResponder("GET", url,
func(req *h.Request) (*h.Response, error) {
if req.Header.Get("X-Token") != "Ferret" {
return nil, errors.Errorf("Expected X-Token to be Ferret, but got %s", req.Header.Get("X-Token"))
return nil, errors.Errorf("Expected X-token to be Ferret, but got %s", req.Header.Get("X-Token"))
}
if req.Header.Get("X-From") != "localhost" {
@@ -61,7 +61,7 @@ func TestGET(t *testing.T) {
out, err := http.GET(ctx, runtime.NewObjectWith(
runtime.NewObjectProperty("url", core.NewString(url)),
runtime.NewObjectProperty("headers", runtime.NewObjectWith(
runtime.NewObjectProperty("X-Token", core.NewString("Ferret")),
runtime.NewObjectProperty("X-token", core.NewString("Ferret")),
runtime.NewObjectProperty("X-From", core.NewString("localhost")),
)),
))

View File

@@ -23,12 +23,11 @@ func TestErrors(t *testing.T) {
`, E{
Kind: compiler.SyntaxError,
Message: "Expected expression after 'RETURN'",
Hint: "Did you forget to provide a value after 'RETURN'?",
Hint: "Did you forget to provide a value to return?",
}, "Syntax error: missing return value"),
ErrorCase(
`
LET i =
LET y = []
LET i =
RETURN i
`, E{
Kind: compiler.SyntaxError,