1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-11-23 22:24:39 +02:00

feat: improve tracing

`--trace` now outputs a JSON structure with tracing information, eg.

```
{"lexer":"markdown","state":"root","rule":15,"pos":0,"elapsedMs":0.022875}
{"lexer":"markdown","state":"root","rule":15,"pos":1,"elapsedMs":0.002667}
{"lexer":"markdown","state":"root","rule":15,"pos":2,"elapsedMs":0.001833}
{"lexer":"markdown","state":"root","rule":15,"pos":3,"elapsedMs":0.002166}
{"lexer":"markdown","state":"root","rule":15,"pos":4,"elapsedMs":0.002125}
```

This should generally be much more amenable to analysis, eg. convenient
filtering using jq to help track down hotspots:

```
chroma --trace docs.md 2>&1 > /dev/null | jq 'select(. | .elapsedMs > 0.1)' | less
```
This commit is contained in:
Alec Thomas
2025-08-04 13:51:19 +10:00
parent 1f48e65abc
commit 303b65df3f
4 changed files with 61 additions and 5 deletions

View File

@@ -1,6 +1,7 @@
package chroma
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
@@ -135,11 +136,20 @@ func NewLexer(config *Config, rulesFunc func() Rules) (*RegexLexer, error) {
}
// Trace enables debug tracing.
//
// Deprecated: Use SetTracing instead.
func (r *RegexLexer) Trace(trace bool) *RegexLexer {
r.trace = trace
return r
}
// SetTracing enables debug tracing.
//
// This complies with the [TracingLexer] interface.
func (r *RegexLexer) SetTracing(trace bool) {
r.trace = trace
}
// A CompiledRule is a Rule with a pre-compiled regex.
//
// Note that regular expressions are lazily compiled on first use of the lexer.
@@ -185,6 +195,7 @@ func (l *LexerState) Get(key interface{}) interface{} {
// Iterator returns the next Token from the lexer.
func (l *LexerState) Iterator() Token { // nolint: gocognit
trace := json.NewEncoder(os.Stderr)
end := len(l.Text)
if l.newlineAdded {
end--
@@ -205,14 +216,33 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
}
l.State = l.Stack[len(l.Stack)-1]
if l.Lexer.trace {
fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q\n", l.State, l.Pos, string(l.Text[l.Pos:]))
}
selectedRule, ok := l.Rules[l.State]
if !ok {
panic("unknown state " + l.State)
}
var start time.Time
if l.Lexer.trace {
start = time.Now()
}
ruleIndex, rule, groups, namedGroups := matchRules(l.Text, l.Pos, selectedRule)
if l.Lexer.trace {
var length int
if groups != nil {
length = len(groups[0])
} else {
length = -1
}
_ = trace.Encode(Trace{ //nolint
Lexer: l.Lexer.config.Name,
State: l.State,
Rule: ruleIndex,
Pattern: rule.Pattern,
Pos: l.Pos,
Length: length,
Elapsed: float64(time.Since(start)) / float64(time.Millisecond),
})
// fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q, elapsed=%s\n", l.State, l.Pos, string(l.Text[l.Pos:]), time.Since(start))
}
// No match.
if groups == nil {
// From Pygments :\