1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-11-23 22:24:39 +02:00

feat: support basic regex analysers in XML (#828)

The `<analyse>` element contains a regex to match against the input, and
a score if the pattern matches.

The scores of all matching patterns for a lexer are summed.

Replaces #815, #813 and #826.
This commit is contained in:
Alec Thomas
2023-08-22 05:32:23 +10:00
committed by GitHub
parent 22266635c1
commit a20cd7e8df
18 changed files with 46 additions and 175 deletions

View File

@@ -11,6 +11,8 @@ import (
"reflect"
"regexp"
"strings"
"github.com/dlclark/regexp2"
)
// Serialisation of Chroma rules to XML. The format is:
@@ -107,7 +109,7 @@ func fastUnmarshalConfig(from fs.FS, path string) (*Config, error) {
var config Config
err = dec.DecodeElement(&config, &se)
if err != nil {
panic(err)
return nil, fmt.Errorf("%s: %w", path, err)
}
return &config, nil
}
@@ -135,8 +137,29 @@ func NewXMLLexer(from fs.FS, path string) (*RegexLexer, error) {
return nil, fmt.Errorf("%s: %q is not a valid glob: %w", config.Name, glob, err)
}
}
type regexAnalyse struct {
re *regexp2.Regexp
score float32
}
regexAnalysers := make([]regexAnalyse, 0, len(config.Analyse))
for _, ra := range config.Analyse {
re, err := regexp2.Compile(ra.Regex, regexp2.None)
if err != nil {
return nil, fmt.Errorf("%s: %q is not a valid analyser regex: %w", config.Name, ra.Regex, err)
}
regexAnalysers = append(regexAnalysers, regexAnalyse{re, ra.Score})
}
return &RegexLexer{
config: config,
analyser: func(text string) float32 {
var score float32
for _, ra := range regexAnalysers {
if ok, _ := ra.re.MatchString(text); ok {
score += ra.score
}
}
return score
},
fetchRulesFunc: func() (Rules, error) {
var lexer struct {
Config