diff --git a/cmd/chroma/main.go b/cmd/chroma/main.go index b31ac84..cf3916b 100644 --- a/cmd/chroma/main.go +++ b/cmd/chroma/main.go @@ -346,8 +346,8 @@ func listAll() { } func lex(ctx *kong.Context, lexer chroma.Lexer, contents string) chroma.Iterator { - if rel, ok := lexer.(*chroma.RegexLexer); ok { - rel.Trace(cli.Trace) + if rel, ok := lexer.(chroma.TracingLexer); ok { + rel.SetTracing(cli.Trace) } lexer = chroma.Coalesce(lexer) it, err := lexer.Tokenise(nil, contents) diff --git a/delegate.go b/delegate.go index f848194..298f2db 100644 --- a/delegate.go +++ b/delegate.go @@ -24,6 +24,15 @@ func DelegatingLexer(root Lexer, language Lexer) Lexer { } } +func (d *delegatingLexer) SetTracing(enable bool) { + if l, ok := d.language.(TracingLexer); ok { + l.SetTracing(enable) + } + if l, ok := d.root.(TracingLexer); ok { + l.SetTracing(enable) + } +} + func (d *delegatingLexer) AnalyseText(text string) float32 { return d.root.AnalyseText(text) } diff --git a/lexer.go b/lexer.go index eb027bf..602db1c 100644 --- a/lexer.go +++ b/lexer.go @@ -130,6 +130,23 @@ type Lexer interface { AnalyseText(text string) float32 } +// Trace is the trace of a tokenisation process. +type Trace struct { + Lexer string `json:"lexer"` + State string `json:"state"` + Rule int `json:"rule"` + Pattern string `json:"pattern"` + Pos int `json:"pos"` + Length int `json:"length"` + Elapsed float64 `json:"elapsedMs"` // Elapsed time spent matching for this rule. +} + +// TracingLexer is a Lexer that can trace its tokenisation process. +type TracingLexer interface { + Lexer + SetTracing(enable bool) +} + // Lexers is a slice of lexers sortable by name. type Lexers []Lexer diff --git a/regexp.go b/regexp.go index 8d0bd4a..c0e5e10 100644 --- a/regexp.go +++ b/regexp.go @@ -1,6 +1,7 @@ package chroma import ( + "encoding/json" "fmt" "os" "path/filepath" @@ -135,11 +136,20 @@ func NewLexer(config *Config, rulesFunc func() Rules) (*RegexLexer, error) { } // Trace enables debug tracing. +// +// Deprecated: Use SetTracing instead. func (r *RegexLexer) Trace(trace bool) *RegexLexer { r.trace = trace return r } +// SetTracing enables debug tracing. +// +// This complies with the [TracingLexer] interface. +func (r *RegexLexer) SetTracing(trace bool) { + r.trace = trace +} + // A CompiledRule is a Rule with a pre-compiled regex. // // Note that regular expressions are lazily compiled on first use of the lexer. @@ -185,6 +195,7 @@ func (l *LexerState) Get(key interface{}) interface{} { // Iterator returns the next Token from the lexer. func (l *LexerState) Iterator() Token { // nolint: gocognit + trace := json.NewEncoder(os.Stderr) end := len(l.Text) if l.newlineAdded { end-- @@ -205,14 +216,33 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit } l.State = l.Stack[len(l.Stack)-1] - if l.Lexer.trace { - fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q\n", l.State, l.Pos, string(l.Text[l.Pos:])) - } selectedRule, ok := l.Rules[l.State] if !ok { panic("unknown state " + l.State) } + var start time.Time + if l.Lexer.trace { + start = time.Now() + } ruleIndex, rule, groups, namedGroups := matchRules(l.Text, l.Pos, selectedRule) + if l.Lexer.trace { + var length int + if groups != nil { + length = len(groups[0]) + } else { + length = -1 + } + _ = trace.Encode(Trace{ //nolint + Lexer: l.Lexer.config.Name, + State: l.State, + Rule: ruleIndex, + Pattern: rule.Pattern, + Pos: l.Pos, + Length: length, + Elapsed: float64(time.Since(start)) / float64(time.Millisecond), + }) + // fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q, elapsed=%s\n", l.State, l.Pos, string(l.Text[l.Pos:]), time.Since(start)) + } // No match. if groups == nil { // From Pygments :\