mirror of
https://github.com/alecthomas/chroma.git
synced 2025-01-26 03:20:10 +02:00
feat: support basic regex analysers in XML (#828)
The `<analyse>` element contains a regex to match against the input, and a score if the pattern matches. The scores of all matching patterns for a lexer are summed. Replaces #815, #813 and #826.
This commit is contained in:
parent
22266635c1
commit
a20cd7e8df
12
lexer.go
12
lexer.go
@ -57,6 +57,18 @@ type Config struct {
|
||||
//
|
||||
// If this is 0 it will be treated as a default of 1.
|
||||
Priority float32 `xml:"priority,omitempty"`
|
||||
|
||||
// Analyse is a list of regexes to match against the input.
|
||||
//
|
||||
// The sum of all the score of matching patterns will be
|
||||
// used as the final score.
|
||||
Analyse []AnalyseConfig `xml:"analyse,omitempty"`
|
||||
}
|
||||
|
||||
// AnalyseConfig defines a single regex analyser pattern.
|
||||
type AnalyseConfig struct {
|
||||
Regex string `xml:"regex,attr"`
|
||||
Score float32 `xml:"score,attr"`
|
||||
}
|
||||
|
||||
// Token output to formatter.
|
||||
|
37
lexers/c.go
37
lexers/c.go
@ -1,37 +0,0 @@
|
||||
package lexers
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
. "github.com/alecthomas/chroma/v2" // nolint
|
||||
)
|
||||
|
||||
var (
|
||||
cAnalyserIncludeRe = regexp.MustCompile(`(?m)^\s*#include [<"]`)
|
||||
cAnalyserIfdefRe = regexp.MustCompile(`(?m)^\s*#ifn?def `)
|
||||
)
|
||||
|
||||
// C lexer.
|
||||
var C = Register(MustNewXMLLexer(
|
||||
embedded,
|
||||
"embedded/c.xml",
|
||||
).SetConfig(
|
||||
&Config{
|
||||
Name: "C",
|
||||
Aliases: []string{"c"},
|
||||
Filenames: []string{"*.c", "*.h", "*.idc", "*.x[bp]m"},
|
||||
MimeTypes: []string{"text/x-chdr", "text/x-csrc", "image/x-xbitmap", "image/x-xpixmap"},
|
||||
EnsureNL: true,
|
||||
Priority: 0.1,
|
||||
},
|
||||
).SetAnalyser(func(text string) float32 {
|
||||
if cAnalyserIncludeRe.MatchString(text) {
|
||||
return 0.1
|
||||
}
|
||||
|
||||
if cAnalyserIfdefRe.MatchString(text) {
|
||||
return 0.1
|
||||
}
|
||||
|
||||
return 0
|
||||
}))
|
@ -1,44 +0,0 @@
|
||||
package lexers_test
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
|
||||
"github.com/alecthomas/chroma/v2"
|
||||
"github.com/alecthomas/chroma/v2/lexers"
|
||||
|
||||
"github.com/alecthomas/assert/v2"
|
||||
)
|
||||
|
||||
func TestC_AnalyseText(t *testing.T) {
|
||||
tests := map[string]struct {
|
||||
Filepath string
|
||||
Expected float32
|
||||
}{
|
||||
"include": {
|
||||
Filepath: "testdata/c_include.c",
|
||||
Expected: 0.1,
|
||||
},
|
||||
"ifdef": {
|
||||
Filepath: "testdata/c_ifdef.c",
|
||||
Expected: 0.1,
|
||||
},
|
||||
"ifndef": {
|
||||
Filepath: "testdata/c_ifndef.c",
|
||||
Expected: 0.1,
|
||||
},
|
||||
}
|
||||
|
||||
for name, test := range tests {
|
||||
test := test
|
||||
t.Run(name, func(t *testing.T) {
|
||||
data, err := ioutil.ReadFile(test.Filepath)
|
||||
assert.NoError(t, err)
|
||||
|
||||
analyser, ok := lexers.C.(chroma.Analyser)
|
||||
assert.True(t, ok)
|
||||
|
||||
assert.Equal(t, test.Expected, analyser.AnalyseText(string(data)))
|
||||
})
|
||||
}
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
package lexers
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
. "github.com/alecthomas/chroma/v2" // nolint
|
||||
)
|
||||
|
||||
var (
|
||||
cppAnalyserIncludeRe = regexp.MustCompile(`#include <[a-z_]+>`)
|
||||
cppAnalyserNamespaceRe = regexp.MustCompile(`using namespace `)
|
||||
)
|
||||
|
||||
var CPP = Register(MustNewXMLLexer(
|
||||
embedded,
|
||||
"embedded/c++.xml",
|
||||
).SetConfig(
|
||||
&Config{
|
||||
Name: "C++",
|
||||
Aliases: []string{"cpp", "c++"},
|
||||
Filenames: []string{"*.cpp", "*.hpp", "*.c++", "*.h++", "*.cc", "*.hh", "*.cxx", "*.hxx", "*.C", "*.H", "*.cp", "*.CPP", "*.cppm", "*.ixx", "*.tpp"},
|
||||
MimeTypes: []string{"text/x-c++hdr", "text/x-c++src"},
|
||||
Priority: 0.1,
|
||||
EnsureNL: true,
|
||||
},
|
||||
)).SetAnalyser(func(text string) float32 {
|
||||
if cppAnalyserIncludeRe.MatchString(text) {
|
||||
return 0.2
|
||||
}
|
||||
|
||||
if cppAnalyserNamespaceRe.MatchString(text) {
|
||||
return 0.4
|
||||
}
|
||||
|
||||
return 0
|
||||
})
|
@ -1,57 +0,0 @@
|
||||
package lexers_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/alecthomas/chroma/v2"
|
||||
"github.com/alecthomas/chroma/v2/lexers"
|
||||
|
||||
"github.com/alecthomas/assert/v2"
|
||||
)
|
||||
|
||||
func TestCpp_AnalyseText(t *testing.T) {
|
||||
tests := map[string]struct {
|
||||
Filepath string
|
||||
Expected float32
|
||||
}{
|
||||
"include": {
|
||||
Filepath: "testdata/cpp_include.cpp",
|
||||
Expected: 0.2,
|
||||
},
|
||||
"namespace": {
|
||||
Filepath: "testdata/cpp_namespace.cpp",
|
||||
Expected: 0.4,
|
||||
},
|
||||
}
|
||||
|
||||
for name, test := range tests {
|
||||
test := test
|
||||
t.Run(name, func(t *testing.T) {
|
||||
data, err := os.ReadFile(test.Filepath)
|
||||
assert.NoError(t, err)
|
||||
|
||||
analyser, ok := lexers.CPP.(chroma.Analyser)
|
||||
assert.True(t, ok)
|
||||
|
||||
assert.Equal(t, test.Expected, analyser.AnalyseText(string(data)))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIssue290(t *testing.T) {
|
||||
input := `// 64-bit floats have 53 digits of precision, including the whole-number-part.
|
||||
double a = 0011111110111001100110011001100110011001100110011001100110011010; // imperfect representation of 0.1
|
||||
double b = 0011111111001001100110011001100110011001100110011001100110011010; // imperfect representation of 0.2
|
||||
double c = 0011111111010011001100110011001100110011001100110011001100110011; // imperfect representation of 0.3
|
||||
double a + b = 0011111111010011001100110011001100110011001100110011001100110100; // Note that this is not quite equal to the "canonical" 0.3!a
|
||||
`
|
||||
it, err := lexers.GlobalLexerRegistry.Get("C++").Tokenise(nil, input)
|
||||
assert.NoError(t, err)
|
||||
for {
|
||||
token := it()
|
||||
if token == chroma.EOF {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
@ -15,9 +15,12 @@
|
||||
<filename>*.H</filename>
|
||||
<filename>*.cp</filename>
|
||||
<filename>*.CPP</filename>
|
||||
<filename>*.tpp</filename>
|
||||
<mime_type>text/x-c++hdr</mime_type>
|
||||
<mime_type>text/x-c++src</mime_type>
|
||||
<ensure_nl>true</ensure_nl>
|
||||
<analyse regex="#include <[a-z_]+>" score="0.2" />
|
||||
<analyse regex="using namespace " score="0.4" />
|
||||
</config>
|
||||
<rules>
|
||||
<state name="classname">
|
||||
|
@ -11,6 +11,8 @@
|
||||
<mime_type>image/x-xbitmap</mime_type>
|
||||
<mime_type>image/x-xpixmap</mime_type>
|
||||
<ensure_nl>true</ensure_nl>
|
||||
<analyse regex="(?m)^\s*#include <" score="0.1"/>
|
||||
<analyse regex="(?m)^\s*#ifn?def " score="0.1" />
|
||||
</config>
|
||||
<rules>
|
||||
<state name="statement">
|
||||
|
1
lexers/testdata/analysis/c.ifdef.expected
vendored
Normal file
1
lexers/testdata/analysis/c.ifdef.expected
vendored
Normal file
@ -0,0 +1 @@
|
||||
0.1
|
1
lexers/testdata/analysis/c.ifndef.expected
vendored
Normal file
1
lexers/testdata/analysis/c.ifndef.expected
vendored
Normal file
@ -0,0 +1 @@
|
||||
0.1
|
1
lexers/testdata/analysis/c.include.expected
vendored
Normal file
1
lexers/testdata/analysis/c.include.expected
vendored
Normal file
@ -0,0 +1 @@
|
||||
0.1
|
1
lexers/testdata/analysis/cpp.include.expected
vendored
Normal file
1
lexers/testdata/analysis/cpp.include.expected
vendored
Normal file
@ -0,0 +1 @@
|
||||
0.2
|
1
lexers/testdata/analysis/cpp.namespace.expected
vendored
Normal file
1
lexers/testdata/analysis/cpp.namespace.expected
vendored
Normal file
@ -0,0 +1 @@
|
||||
0.4
|
25
serialise.go
25
serialise.go
@ -11,6 +11,8 @@ import (
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/dlclark/regexp2"
|
||||
)
|
||||
|
||||
// Serialisation of Chroma rules to XML. The format is:
|
||||
@ -107,7 +109,7 @@ func fastUnmarshalConfig(from fs.FS, path string) (*Config, error) {
|
||||
var config Config
|
||||
err = dec.DecodeElement(&config, &se)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
return nil, fmt.Errorf("%s: %w", path, err)
|
||||
}
|
||||
return &config, nil
|
||||
}
|
||||
@ -135,8 +137,29 @@ func NewXMLLexer(from fs.FS, path string) (*RegexLexer, error) {
|
||||
return nil, fmt.Errorf("%s: %q is not a valid glob: %w", config.Name, glob, err)
|
||||
}
|
||||
}
|
||||
type regexAnalyse struct {
|
||||
re *regexp2.Regexp
|
||||
score float32
|
||||
}
|
||||
regexAnalysers := make([]regexAnalyse, 0, len(config.Analyse))
|
||||
for _, ra := range config.Analyse {
|
||||
re, err := regexp2.Compile(ra.Regex, regexp2.None)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: %q is not a valid analyser regex: %w", config.Name, ra.Regex, err)
|
||||
}
|
||||
regexAnalysers = append(regexAnalysers, regexAnalyse{re, ra.Score})
|
||||
}
|
||||
return &RegexLexer{
|
||||
config: config,
|
||||
analyser: func(text string) float32 {
|
||||
var score float32
|
||||
for _, ra := range regexAnalysers {
|
||||
if ok, _ := ra.re.MatchString(text); ok {
|
||||
score += ra.score
|
||||
}
|
||||
}
|
||||
return score
|
||||
},
|
||||
fetchRulesFunc: func() (Rules, error) {
|
||||
var lexer struct {
|
||||
Config
|
||||
|
Loading…
x
Reference in New Issue
Block a user