1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-10-08 22:52:04 +02:00

feat: improve tracing

`--trace` now outputs a JSON structure with tracing information, eg.

```
{"lexer":"markdown","state":"root","rule":15,"pos":0,"elapsedMs":0.022875}
{"lexer":"markdown","state":"root","rule":15,"pos":1,"elapsedMs":0.002667}
{"lexer":"markdown","state":"root","rule":15,"pos":2,"elapsedMs":0.001833}
{"lexer":"markdown","state":"root","rule":15,"pos":3,"elapsedMs":0.002166}
{"lexer":"markdown","state":"root","rule":15,"pos":4,"elapsedMs":0.002125}
```

This should generally be much more amenable to analysis, eg. convenient
filtering using jq to help track down hotspots:

```
chroma --trace docs.md 2>&1 > /dev/null | jq 'select(. | .elapsedMs > 0.1)' | less
```
This commit is contained in:
Alec Thomas
2025-08-04 13:51:19 +10:00
parent 1f48e65abc
commit 303b65df3f
4 changed files with 61 additions and 5 deletions

View File

@@ -346,8 +346,8 @@ func listAll() {
}
func lex(ctx *kong.Context, lexer chroma.Lexer, contents string) chroma.Iterator {
if rel, ok := lexer.(*chroma.RegexLexer); ok {
rel.Trace(cli.Trace)
if rel, ok := lexer.(chroma.TracingLexer); ok {
rel.SetTracing(cli.Trace)
}
lexer = chroma.Coalesce(lexer)
it, err := lexer.Tokenise(nil, contents)

View File

@@ -24,6 +24,15 @@ func DelegatingLexer(root Lexer, language Lexer) Lexer {
}
}
func (d *delegatingLexer) SetTracing(enable bool) {
if l, ok := d.language.(TracingLexer); ok {
l.SetTracing(enable)
}
if l, ok := d.root.(TracingLexer); ok {
l.SetTracing(enable)
}
}
func (d *delegatingLexer) AnalyseText(text string) float32 {
return d.root.AnalyseText(text)
}

View File

@@ -130,6 +130,23 @@ type Lexer interface {
AnalyseText(text string) float32
}
// Trace is the trace of a tokenisation process.
type Trace struct {
Lexer string `json:"lexer"`
State string `json:"state"`
Rule int `json:"rule"`
Pattern string `json:"pattern"`
Pos int `json:"pos"`
Length int `json:"length"`
Elapsed float64 `json:"elapsedMs"` // Elapsed time spent matching for this rule.
}
// TracingLexer is a Lexer that can trace its tokenisation process.
type TracingLexer interface {
Lexer
SetTracing(enable bool)
}
// Lexers is a slice of lexers sortable by name.
type Lexers []Lexer

View File

@@ -1,6 +1,7 @@
package chroma
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
@@ -135,11 +136,20 @@ func NewLexer(config *Config, rulesFunc func() Rules) (*RegexLexer, error) {
}
// Trace enables debug tracing.
//
// Deprecated: Use SetTracing instead.
func (r *RegexLexer) Trace(trace bool) *RegexLexer {
r.trace = trace
return r
}
// SetTracing enables debug tracing.
//
// This complies with the [TracingLexer] interface.
func (r *RegexLexer) SetTracing(trace bool) {
r.trace = trace
}
// A CompiledRule is a Rule with a pre-compiled regex.
//
// Note that regular expressions are lazily compiled on first use of the lexer.
@@ -185,6 +195,7 @@ func (l *LexerState) Get(key interface{}) interface{} {
// Iterator returns the next Token from the lexer.
func (l *LexerState) Iterator() Token { // nolint: gocognit
trace := json.NewEncoder(os.Stderr)
end := len(l.Text)
if l.newlineAdded {
end--
@@ -205,14 +216,33 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
}
l.State = l.Stack[len(l.Stack)-1]
if l.Lexer.trace {
fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q\n", l.State, l.Pos, string(l.Text[l.Pos:]))
}
selectedRule, ok := l.Rules[l.State]
if !ok {
panic("unknown state " + l.State)
}
var start time.Time
if l.Lexer.trace {
start = time.Now()
}
ruleIndex, rule, groups, namedGroups := matchRules(l.Text, l.Pos, selectedRule)
if l.Lexer.trace {
var length int
if groups != nil {
length = len(groups[0])
} else {
length = -1
}
_ = trace.Encode(Trace{ //nolint
Lexer: l.Lexer.config.Name,
State: l.State,
Rule: ruleIndex,
Pattern: rule.Pattern,
Pos: l.Pos,
Length: length,
Elapsed: float64(time.Since(start)) / float64(time.Millisecond),
})
// fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q, elapsed=%s\n", l.State, l.Pos, string(l.Text[l.Pos:]), time.Since(start))
}
// No match.
if groups == nil {
// From Pygments :\