mirror of
https://github.com/alecthomas/chroma.git
synced 2025-10-08 22:52:04 +02:00
feat: improve tracing
`--trace` now outputs a JSON structure with tracing information, eg. ``` {"lexer":"markdown","state":"root","rule":15,"pos":0,"elapsedMs":0.022875} {"lexer":"markdown","state":"root","rule":15,"pos":1,"elapsedMs":0.002667} {"lexer":"markdown","state":"root","rule":15,"pos":2,"elapsedMs":0.001833} {"lexer":"markdown","state":"root","rule":15,"pos":3,"elapsedMs":0.002166} {"lexer":"markdown","state":"root","rule":15,"pos":4,"elapsedMs":0.002125} ``` This should generally be much more amenable to analysis, eg. convenient filtering using jq to help track down hotspots: ``` chroma --trace docs.md 2>&1 > /dev/null | jq 'select(. | .elapsedMs > 0.1)' | less ```
This commit is contained in:
@@ -346,8 +346,8 @@ func listAll() {
|
||||
}
|
||||
|
||||
func lex(ctx *kong.Context, lexer chroma.Lexer, contents string) chroma.Iterator {
|
||||
if rel, ok := lexer.(*chroma.RegexLexer); ok {
|
||||
rel.Trace(cli.Trace)
|
||||
if rel, ok := lexer.(chroma.TracingLexer); ok {
|
||||
rel.SetTracing(cli.Trace)
|
||||
}
|
||||
lexer = chroma.Coalesce(lexer)
|
||||
it, err := lexer.Tokenise(nil, contents)
|
||||
|
@@ -24,6 +24,15 @@ func DelegatingLexer(root Lexer, language Lexer) Lexer {
|
||||
}
|
||||
}
|
||||
|
||||
func (d *delegatingLexer) SetTracing(enable bool) {
|
||||
if l, ok := d.language.(TracingLexer); ok {
|
||||
l.SetTracing(enable)
|
||||
}
|
||||
if l, ok := d.root.(TracingLexer); ok {
|
||||
l.SetTracing(enable)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *delegatingLexer) AnalyseText(text string) float32 {
|
||||
return d.root.AnalyseText(text)
|
||||
}
|
||||
|
17
lexer.go
17
lexer.go
@@ -130,6 +130,23 @@ type Lexer interface {
|
||||
AnalyseText(text string) float32
|
||||
}
|
||||
|
||||
// Trace is the trace of a tokenisation process.
|
||||
type Trace struct {
|
||||
Lexer string `json:"lexer"`
|
||||
State string `json:"state"`
|
||||
Rule int `json:"rule"`
|
||||
Pattern string `json:"pattern"`
|
||||
Pos int `json:"pos"`
|
||||
Length int `json:"length"`
|
||||
Elapsed float64 `json:"elapsedMs"` // Elapsed time spent matching for this rule.
|
||||
}
|
||||
|
||||
// TracingLexer is a Lexer that can trace its tokenisation process.
|
||||
type TracingLexer interface {
|
||||
Lexer
|
||||
SetTracing(enable bool)
|
||||
}
|
||||
|
||||
// Lexers is a slice of lexers sortable by name.
|
||||
type Lexers []Lexer
|
||||
|
||||
|
36
regexp.go
36
regexp.go
@@ -1,6 +1,7 @@
|
||||
package chroma
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -135,11 +136,20 @@ func NewLexer(config *Config, rulesFunc func() Rules) (*RegexLexer, error) {
|
||||
}
|
||||
|
||||
// Trace enables debug tracing.
|
||||
//
|
||||
// Deprecated: Use SetTracing instead.
|
||||
func (r *RegexLexer) Trace(trace bool) *RegexLexer {
|
||||
r.trace = trace
|
||||
return r
|
||||
}
|
||||
|
||||
// SetTracing enables debug tracing.
|
||||
//
|
||||
// This complies with the [TracingLexer] interface.
|
||||
func (r *RegexLexer) SetTracing(trace bool) {
|
||||
r.trace = trace
|
||||
}
|
||||
|
||||
// A CompiledRule is a Rule with a pre-compiled regex.
|
||||
//
|
||||
// Note that regular expressions are lazily compiled on first use of the lexer.
|
||||
@@ -185,6 +195,7 @@ func (l *LexerState) Get(key interface{}) interface{} {
|
||||
|
||||
// Iterator returns the next Token from the lexer.
|
||||
func (l *LexerState) Iterator() Token { // nolint: gocognit
|
||||
trace := json.NewEncoder(os.Stderr)
|
||||
end := len(l.Text)
|
||||
if l.newlineAdded {
|
||||
end--
|
||||
@@ -205,14 +216,33 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
|
||||
}
|
||||
|
||||
l.State = l.Stack[len(l.Stack)-1]
|
||||
if l.Lexer.trace {
|
||||
fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q\n", l.State, l.Pos, string(l.Text[l.Pos:]))
|
||||
}
|
||||
selectedRule, ok := l.Rules[l.State]
|
||||
if !ok {
|
||||
panic("unknown state " + l.State)
|
||||
}
|
||||
var start time.Time
|
||||
if l.Lexer.trace {
|
||||
start = time.Now()
|
||||
}
|
||||
ruleIndex, rule, groups, namedGroups := matchRules(l.Text, l.Pos, selectedRule)
|
||||
if l.Lexer.trace {
|
||||
var length int
|
||||
if groups != nil {
|
||||
length = len(groups[0])
|
||||
} else {
|
||||
length = -1
|
||||
}
|
||||
_ = trace.Encode(Trace{ //nolint
|
||||
Lexer: l.Lexer.config.Name,
|
||||
State: l.State,
|
||||
Rule: ruleIndex,
|
||||
Pattern: rule.Pattern,
|
||||
Pos: l.Pos,
|
||||
Length: length,
|
||||
Elapsed: float64(time.Since(start)) / float64(time.Millisecond),
|
||||
})
|
||||
// fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q, elapsed=%s\n", l.State, l.Pos, string(l.Text[l.Pos:]), time.Since(start))
|
||||
}
|
||||
// No match.
|
||||
if groups == nil {
|
||||
// From Pygments :\
|
||||
|
Reference in New Issue
Block a user