mirror of
https://github.com/alecthomas/chroma.git
synced 2025-03-17 20:58:08 +02:00
feat: improve regex analysers in XML (#831)
This commit is contained in:
parent
d37de78b18
commit
708662a581
18
lexer.go
18
lexer.go
@ -60,15 +60,23 @@ type Config struct {
|
||||
|
||||
// Analyse is a list of regexes to match against the input.
|
||||
//
|
||||
// The sum of all the score of matching patterns will be
|
||||
// If a match is found, the score is returned if single attribute is set to true,
|
||||
// otherwise the sum of all the score of matching patterns will be
|
||||
// used as the final score.
|
||||
Analyse []AnalyseConfig `xml:"analyse,omitempty"`
|
||||
Analyse *AnalyseConfig `xml:"analyse,omitempty"`
|
||||
}
|
||||
|
||||
// AnalyseConfig defines a single regex analyser pattern.
|
||||
// AnalyseConfig defines the list of regexes analysers.
|
||||
type AnalyseConfig struct {
|
||||
Regex string `xml:"regex,attr"`
|
||||
Score float32 `xml:"score,attr"`
|
||||
Regexes []RegexConfig `xml:"regex,omitempty"`
|
||||
// If true, the score is returned despite other matches.
|
||||
Single bool `xml:"single,attr"`
|
||||
}
|
||||
|
||||
// RegexConfig defines a single regex pattern and its score in case of match.
|
||||
type RegexConfig struct {
|
||||
Pattern string `xml:"pattern,attr"`
|
||||
Score float32 `xml:"score,attr"`
|
||||
}
|
||||
|
||||
// Token output to formatter.
|
||||
|
@ -19,8 +19,10 @@
|
||||
<mime_type>text/x-c++hdr</mime_type>
|
||||
<mime_type>text/x-c++src</mime_type>
|
||||
<ensure_nl>true</ensure_nl>
|
||||
<analyse regex="#include <[a-z_]+>" score="0.2" />
|
||||
<analyse regex="using namespace " score="0.4" />
|
||||
<analyse single="true">
|
||||
<regex pattern="#include <[a-z_]+>" score="0.2" />
|
||||
<regex pattern="using namespace " score="0.4" />
|
||||
</analyse>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="classname">
|
||||
|
@ -11,8 +11,10 @@
|
||||
<mime_type>image/x-xbitmap</mime_type>
|
||||
<mime_type>image/x-xpixmap</mime_type>
|
||||
<ensure_nl>true</ensure_nl>
|
||||
<analyse regex="(?m)^\s*#include <" score="0.1"/>
|
||||
<analyse regex="(?m)^\s*#ifn?def " score="0.1" />
|
||||
<analyse single="true" >
|
||||
<regex pattern="(?m)^\s*#include <" score="0.1" />
|
||||
<regex pattern="(?m)^\s*#ifn?def " score="0.1" />
|
||||
</analyse>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="statement">
|
||||
|
11
regexp.go
11
regexp.go
@ -298,11 +298,12 @@ func (r *RegexLexer) SetAnalyser(analyser func(text string) float32) Lexer {
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RegexLexer) AnalyseText(text string) float32 { // nolint
|
||||
// AnalyseText scores how likely a fragment of text is to match this lexer, between 0.0 and 1.0.
|
||||
func (r *RegexLexer) AnalyseText(text string) float32 {
|
||||
if r.analyser != nil {
|
||||
return r.analyser(text)
|
||||
}
|
||||
return 0.0
|
||||
return 0
|
||||
}
|
||||
|
||||
// SetConfig replaces the Config for this Lexer.
|
||||
@ -311,7 +312,8 @@ func (r *RegexLexer) SetConfig(config *Config) *RegexLexer {
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RegexLexer) Config() *Config { // nolint
|
||||
// Config returns the Config for this Lexer.
|
||||
func (r *RegexLexer) Config() *Config {
|
||||
return r.config
|
||||
}
|
||||
|
||||
@ -406,7 +408,8 @@ func (r *RegexLexer) needRules() error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint
|
||||
// Tokenise text using lexer, returning an iterator.
|
||||
func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
|
||||
err := r.needRules()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
55
serialise.go
55
serialise.go
@ -131,35 +131,58 @@ func NewXMLLexer(from fs.FS, path string) (*RegexLexer, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, glob := range append(config.Filenames, config.AliasFilenames...) {
|
||||
_, err := filepath.Match(glob, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: %q is not a valid glob: %w", config.Name, glob, err)
|
||||
}
|
||||
}
|
||||
type regexAnalyse struct {
|
||||
re *regexp2.Regexp
|
||||
score float32
|
||||
}
|
||||
regexAnalysers := make([]regexAnalyse, 0, len(config.Analyse))
|
||||
for _, ra := range config.Analyse {
|
||||
re, err := regexp2.Compile(ra.Regex, regexp2.None)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: %q is not a valid analyser regex: %w", config.Name, ra.Regex, err)
|
||||
|
||||
var analyserFn func(string) float32
|
||||
|
||||
if config.Analyse != nil {
|
||||
type regexAnalyse struct {
|
||||
re *regexp2.Regexp
|
||||
score float32
|
||||
}
|
||||
regexAnalysers = append(regexAnalysers, regexAnalyse{re, ra.Score})
|
||||
}
|
||||
return &RegexLexer{
|
||||
config: config,
|
||||
analyser: func(text string) float32 {
|
||||
|
||||
regexAnalysers := make([]regexAnalyse, 0, len(config.Analyse.Regexes))
|
||||
|
||||
for _, ra := range config.Analyse.Regexes {
|
||||
re, err := regexp2.Compile(ra.Pattern, regexp2.None)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: %q is not a valid analyser regex: %w", config.Name, ra.Pattern, err)
|
||||
}
|
||||
|
||||
regexAnalysers = append(regexAnalysers, regexAnalyse{re, ra.Score})
|
||||
}
|
||||
|
||||
analyserFn = func(text string) float32 {
|
||||
var score float32
|
||||
|
||||
for _, ra := range regexAnalysers {
|
||||
if ok, _ := ra.re.MatchString(text); ok {
|
||||
ok, err := ra.re.MatchString(text)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
if ok && config.Analyse.Single {
|
||||
return ra.score
|
||||
}
|
||||
|
||||
if ok {
|
||||
score += ra.score
|
||||
}
|
||||
}
|
||||
|
||||
return score
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return &RegexLexer{
|
||||
config: config,
|
||||
analyser: analyserFn,
|
||||
fetchRulesFunc: func() (Rules, error) {
|
||||
var lexer struct {
|
||||
Config
|
||||
|
Loading…
x
Reference in New Issue
Block a user