mirror of
https://github.com/MontFerret/ferret.git
synced 2025-08-15 20:02:56 +02:00
Refactor error handling and token tracking: replace direct access of TokenHistory
with TokenNode
, introduce TokenNode
struct, enhance TrackingTokenStream
to improve token management, update error handling logic to leverage TokenNode
traversal, and refine error messaging for syntax errors.
This commit is contained in:
@@ -2,9 +2,10 @@ package compiler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/parser/fql"
|
||||
|
||||
"github.com/antlr4-go/antlr/v4"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/file"
|
||||
@@ -16,9 +17,9 @@ import (
|
||||
type (
|
||||
errorListener struct {
|
||||
*antlr.DiagnosticErrorListener
|
||||
src *file.Source
|
||||
handler *core.ErrorHandler
|
||||
lastTokens *parser.TokenHistory
|
||||
src *file.Source
|
||||
handler *core.ErrorHandler
|
||||
history *parser.TokenHistory
|
||||
}
|
||||
|
||||
errorPattern struct {
|
||||
@@ -28,12 +29,12 @@ type (
|
||||
}
|
||||
)
|
||||
|
||||
func newErrorListener(src *file.Source, handler *core.ErrorHandler, lastTokens *parser.TokenHistory) antlr.ErrorListener {
|
||||
func newErrorListener(src *file.Source, handler *core.ErrorHandler, history *parser.TokenHistory) antlr.ErrorListener {
|
||||
return &errorListener{
|
||||
DiagnosticErrorListener: antlr.NewDiagnosticErrorListener(false),
|
||||
src: src,
|
||||
handler: handler,
|
||||
lastTokens: lastTokens,
|
||||
history: history,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,11 +67,11 @@ func (d *errorListener) parseError(msg string, offending antlr.Token) *Compilati
|
||||
},
|
||||
}
|
||||
|
||||
for _, handler := range []func(*CompilationError, antlr.Token) bool{
|
||||
for _, handler := range []func(*CompilationError) bool{
|
||||
d.extraneousError,
|
||||
d.noViableAltError,
|
||||
} {
|
||||
if handler(err, offending) {
|
||||
if handler(err) {
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -78,12 +79,18 @@ func (d *errorListener) parseError(msg string, offending antlr.Token) *Compilati
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *errorListener) extraneousError(err *CompilationError, offending antlr.Token) (matched bool) {
|
||||
func (d *errorListener) extraneousError(err *CompilationError) (matched bool) {
|
||||
if !strings.Contains(err.Message, "extraneous input") {
|
||||
return false
|
||||
}
|
||||
|
||||
span := core.SpanFromTokenSafe(offending, d.src)
|
||||
last := d.history.Last()
|
||||
|
||||
if last == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
span := core.SpanFromTokenSafe(last.Token(), d.src)
|
||||
err.Spans = []core.ErrorSpan{
|
||||
core.NewMainErrorSpan(span, "query must end with a value"),
|
||||
}
|
||||
@@ -94,35 +101,73 @@ func (d *errorListener) extraneousError(err *CompilationError, offending antlr.T
|
||||
return true
|
||||
}
|
||||
|
||||
func (d *errorListener) noViableAltError(err *CompilationError, offending antlr.Token) bool {
|
||||
recognizer := regexp.MustCompile("no viable alternative at input '(\\w+).+'")
|
||||
|
||||
matches := recognizer.FindAllStringSubmatch(err.Message, -1)
|
||||
|
||||
if len(matches) == 0 {
|
||||
func (d *errorListener) noViableAltError(err *CompilationError) bool {
|
||||
if !strings.Contains(err.Message, "viable alternative at input") {
|
||||
return false
|
||||
}
|
||||
|
||||
last := d.lastTokens.Last()
|
||||
keyword := matches[0][1]
|
||||
start := file.SkipWhitespaceForward(d.src.Content(), last.GetStop()+1)
|
||||
span := file.Span{
|
||||
Start: start,
|
||||
End: start + len(keyword),
|
||||
if d.history.Size() < 2 {
|
||||
return false
|
||||
}
|
||||
|
||||
switch strings.ToLower(keyword) {
|
||||
case "return":
|
||||
err.Message = fmt.Sprintf("Expected expression after '%s'", keyword)
|
||||
err.Hint = fmt.Sprintf("Did you forget to provide a value after '%s'?", keyword)
|
||||
// most recent (offending)
|
||||
last := d.history.Last()
|
||||
|
||||
// Replace span with RETURN token’s span
|
||||
// CASE: RETURN [missing value]
|
||||
if isToken(last, "RETURN") && isKeyword(last.Token()) {
|
||||
span := core.SpanFromTokenSafe(last.Token(), d.src)
|
||||
|
||||
err.Message = fmt.Sprintf("Expected expression after '%s'", last)
|
||||
err.Hint = "Did you forget to provide a value to return?"
|
||||
err.Spans = []core.ErrorSpan{
|
||||
core.NewMainErrorSpan(span, "missing return value"),
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// CASE: LET x = [missing value]
|
||||
//if strtoken(last.Token()) == "LET" && isIdentifier(tokens[n-2]) && t1.GetText() == "=" {
|
||||
// varName := tokens[n-2].GetText()
|
||||
// span := core.SpanFromTokenSafe(tokens[n-1], d.src)
|
||||
//
|
||||
// err.Message = fmt.Sprintf("Expected expression after '=' for variable '%s'", varName)
|
||||
// err.Hint = "Did you forget to provide a value?"
|
||||
// err.Spans = []core.ErrorSpan{
|
||||
// core.NewMainErrorSpan(span, "missing value"),
|
||||
// }
|
||||
// return true
|
||||
//}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isIdentifier(token antlr.Token) bool {
|
||||
if token == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
tt := token.GetTokenType()
|
||||
|
||||
return tt == fql.FqlLexerIdentifier || tt == fql.FqlLexerIgnoreIdentifier
|
||||
}
|
||||
|
||||
func isKeyword(token antlr.Token) bool {
|
||||
if token == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
ttype := token.GetTokenType()
|
||||
|
||||
// 0 is usually invalid; <EOF> is -1
|
||||
if ttype <= 0 || ttype >= len(fql.FqlLexerLexerStaticData.LiteralNames) {
|
||||
return false
|
||||
}
|
||||
|
||||
lit := fql.FqlLexerLexerStaticData.LiteralNames[ttype]
|
||||
|
||||
return strings.HasPrefix(lit, "'") && strings.HasSuffix(lit, "'")
|
||||
}
|
||||
|
||||
func isToken(node *parser.TokenNode, expected string) bool {
|
||||
return strings.ToUpper(node.String()) == expected
|
||||
}
|
||||
|
@@ -16,8 +16,8 @@ func New(query string, tr ...TokenStreamTransformer) *Parser {
|
||||
// converts tokens to upper case, so now it doesn't matter
|
||||
// in which case the tokens were entered
|
||||
upper := newCaseChangingStream(input, true)
|
||||
|
||||
lexer := fql.NewFqlLexer(upper)
|
||||
|
||||
var stream antlr.TokenStream
|
||||
stream = antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)
|
||||
|
||||
|
@@ -14,7 +14,7 @@ type CaseChangingStream struct {
|
||||
}
|
||||
|
||||
// newCaseChangingStream returns a new CaseChangingStream that forces
|
||||
// all tokens read from the underlying stream to be either upper case
|
||||
// all history read from the underlying stream to be either upper case
|
||||
// or lower case based on the upper argument.
|
||||
func newCaseChangingStream(in antlr.CharStream, upper bool) antlr.CharStream {
|
||||
return &CaseChangingStream{in, upper}
|
||||
|
@@ -4,26 +4,19 @@ import "github.com/antlr4-go/antlr/v4"
|
||||
|
||||
type TrackingTokenStream struct {
|
||||
antlr.TokenStream
|
||||
tokens *TokenHistory
|
||||
history *TokenHistory
|
||||
}
|
||||
|
||||
func NewTrackingTokenStream(stream antlr.TokenStream, history *TokenHistory) antlr.TokenStream {
|
||||
return &TrackingTokenStream{
|
||||
TokenStream: stream,
|
||||
tokens: history,
|
||||
history: history,
|
||||
}
|
||||
}
|
||||
|
||||
func (ts *TrackingTokenStream) Tokens() *TokenHistory {
|
||||
return ts.tokens
|
||||
}
|
||||
|
||||
func (ts *TrackingTokenStream) LT(i int) antlr.Token {
|
||||
tok := ts.TokenStream.LT(i)
|
||||
|
||||
if i == 1 && tok != nil && tok.GetTokenType() != antlr.TokenEOF {
|
||||
ts.tokens.Add(tok)
|
||||
}
|
||||
|
||||
return tok
|
||||
func (s *TrackingTokenStream) Consume() {
|
||||
// Get current token before advancing
|
||||
tok := s.LT(1)
|
||||
s.TokenStream.Consume()
|
||||
s.history.Add(tok)
|
||||
}
|
||||
|
@@ -2,35 +2,41 @@ package parser
|
||||
|
||||
import "github.com/antlr4-go/antlr/v4"
|
||||
|
||||
type (
|
||||
TokenNode struct {
|
||||
Token antlr.Token
|
||||
Prev *TokenNode
|
||||
Next *TokenNode
|
||||
}
|
||||
|
||||
TokenHistory struct {
|
||||
head *TokenNode
|
||||
tail *TokenNode
|
||||
size int
|
||||
cap int
|
||||
}
|
||||
)
|
||||
type TokenHistory struct {
|
||||
head *TokenNode
|
||||
tail *TokenNode
|
||||
size int
|
||||
cap int
|
||||
}
|
||||
|
||||
func NewTokenHistory(cap int) *TokenHistory {
|
||||
return &TokenHistory{cap: cap}
|
||||
}
|
||||
|
||||
func (h *TokenHistory) Size() int {
|
||||
return h.size
|
||||
}
|
||||
|
||||
func (h *TokenHistory) Add(token antlr.Token) {
|
||||
if token == nil {
|
||||
return
|
||||
}
|
||||
|
||||
node := &TokenNode{Token: token}
|
||||
// Avoid adding the same token twice in a row (by position, not just text)
|
||||
if h.head != nil {
|
||||
last := h.head.token
|
||||
if last.GetStart() == token.GetStart() &&
|
||||
last.GetStop() == token.GetStop() &&
|
||||
last.GetTokenType() == token.GetTokenType() {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
node := &TokenNode{token: token}
|
||||
|
||||
if h.head != nil {
|
||||
node.Next = h.head
|
||||
h.head.Prev = node
|
||||
node.next = h.head
|
||||
h.head.prev = node
|
||||
}
|
||||
|
||||
h.head = node
|
||||
@@ -43,33 +49,32 @@ func (h *TokenHistory) Add(token antlr.Token) {
|
||||
|
||||
if h.size > h.cap {
|
||||
// Remove oldest
|
||||
h.tail = h.tail.Prev
|
||||
h.tail = h.tail.prev
|
||||
|
||||
if h.tail != nil {
|
||||
h.tail.Next = nil
|
||||
h.tail.next = nil
|
||||
}
|
||||
|
||||
h.size--
|
||||
}
|
||||
}
|
||||
|
||||
func (h *TokenHistory) LastN(n int) []antlr.Token {
|
||||
result := make([]antlr.Token, 0, n)
|
||||
curr := h.head
|
||||
|
||||
for curr != nil && n > 0 {
|
||||
result = append(result, curr.Token)
|
||||
curr = curr.Next
|
||||
n--
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *TokenHistory) Last() antlr.Token {
|
||||
if h.tail == nil {
|
||||
func (h *TokenHistory) Last() *TokenNode {
|
||||
if h.head == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return h.tail.Token
|
||||
return h.head
|
||||
}
|
||||
|
||||
func (h *TokenHistory) Iterate(yield func(token antlr.Token) bool) {
|
||||
curr := h.tail
|
||||
|
||||
for curr != nil {
|
||||
if !yield(curr.token) {
|
||||
break
|
||||
}
|
||||
|
||||
curr = curr.prev
|
||||
}
|
||||
}
|
||||
|
55
pkg/parser/token_node.go
Normal file
55
pkg/parser/token_node.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"github.com/antlr4-go/antlr/v4"
|
||||
)
|
||||
|
||||
type TokenNode struct {
|
||||
token antlr.Token
|
||||
prev *TokenNode
|
||||
next *TokenNode
|
||||
}
|
||||
|
||||
func (t *TokenNode) Token() antlr.Token {
|
||||
return t.token
|
||||
}
|
||||
|
||||
func (t *TokenNode) Prev() *TokenNode {
|
||||
return t.prev
|
||||
}
|
||||
|
||||
func (t *TokenNode) Next() *TokenNode {
|
||||
return t.next
|
||||
}
|
||||
|
||||
func (t *TokenNode) PrevAt(n int) *TokenNode {
|
||||
if n <= 0 {
|
||||
return t
|
||||
}
|
||||
|
||||
node := t
|
||||
|
||||
for i := 0; i < n && node != nil; i++ {
|
||||
node = node.prev
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
|
||||
func (t *TokenNode) NextAt(n int) *TokenNode {
|
||||
if n <= 0 {
|
||||
return t
|
||||
}
|
||||
|
||||
node := t
|
||||
|
||||
for i := 0; i < n && node != nil; i++ {
|
||||
node = node.next
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
|
||||
func (t *TokenNode) String() string {
|
||||
return t.token.GetText()
|
||||
}
|
@@ -46,7 +46,7 @@ func TestGET(t *testing.T) {
|
||||
httpmock.RegisterResponder("GET", url,
|
||||
func(req *h.Request) (*h.Response, error) {
|
||||
if req.Header.Get("X-Token") != "Ferret" {
|
||||
return nil, errors.Errorf("Expected X-Token to be Ferret, but got %s", req.Header.Get("X-Token"))
|
||||
return nil, errors.Errorf("Expected X-token to be Ferret, but got %s", req.Header.Get("X-Token"))
|
||||
}
|
||||
|
||||
if req.Header.Get("X-From") != "localhost" {
|
||||
@@ -61,7 +61,7 @@ func TestGET(t *testing.T) {
|
||||
out, err := http.GET(ctx, runtime.NewObjectWith(
|
||||
runtime.NewObjectProperty("url", core.NewString(url)),
|
||||
runtime.NewObjectProperty("headers", runtime.NewObjectWith(
|
||||
runtime.NewObjectProperty("X-Token", core.NewString("Ferret")),
|
||||
runtime.NewObjectProperty("X-token", core.NewString("Ferret")),
|
||||
runtime.NewObjectProperty("X-From", core.NewString("localhost")),
|
||||
)),
|
||||
))
|
||||
|
@@ -23,12 +23,11 @@ func TestErrors(t *testing.T) {
|
||||
`, E{
|
||||
Kind: compiler.SyntaxError,
|
||||
Message: "Expected expression after 'RETURN'",
|
||||
Hint: "Did you forget to provide a value after 'RETURN'?",
|
||||
Hint: "Did you forget to provide a value to return?",
|
||||
}, "Syntax error: missing return value"),
|
||||
ErrorCase(
|
||||
`
|
||||
LET i =
|
||||
LET y = []
|
||||
LET i =
|
||||
RETURN i
|
||||
`, E{
|
||||
Kind: compiler.SyntaxError,
|
||||
|
Reference in New Issue
Block a user