mirror of
https://github.com/alecthomas/chroma.git
synced 2025-11-25 22:32:32 +02:00
Fix: sort words in descending order of length before regex generation (#496)
* Fix: sort words in descending order of length before regex generation * Avoid code duplication in Raku lexer
This commit is contained in:
@@ -2,7 +2,6 @@ package r
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
|
||||||
"strings"
|
"strings"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
@@ -70,7 +69,7 @@ func rakuRules() Rules {
|
|||||||
`dynamic-scope`, `built`, `temp`,
|
`dynamic-scope`, `built`, `temp`,
|
||||||
}
|
}
|
||||||
|
|
||||||
keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, sortWords(keywords)...)
|
keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
|
||||||
|
|
||||||
wordOperators := []string{
|
wordOperators := []string{
|
||||||
`X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
|
`X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
|
||||||
@@ -80,7 +79,7 @@ func rakuRules() Rules {
|
|||||||
`(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
|
`(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
|
||||||
}
|
}
|
||||||
|
|
||||||
wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, sortWords(wordOperators)...)
|
wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
|
||||||
|
|
||||||
operators := []string{
|
operators := []string{
|
||||||
`++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
|
`++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
|
||||||
@@ -93,7 +92,7 @@ func rakuRules() Rules {
|
|||||||
`⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
|
`⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
|
||||||
}
|
}
|
||||||
|
|
||||||
operatorsPattern := Words(``, ``, sortWords(operators)...)
|
operatorsPattern := Words(``, ``, operators...)
|
||||||
|
|
||||||
builtinTypes := []string{
|
builtinTypes := []string{
|
||||||
`False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
|
`False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
|
||||||
@@ -142,7 +141,7 @@ func rakuRules() Rules {
|
|||||||
`strict`, `trace`, `variables`,
|
`strict`, `trace`, `variables`,
|
||||||
}
|
}
|
||||||
|
|
||||||
builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, sortWords(builtinTypes)...)
|
builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
|
||||||
|
|
||||||
builtinRoutines := []string{
|
builtinRoutines := []string{
|
||||||
`ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
|
`ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
|
||||||
@@ -266,7 +265,7 @@ func rakuRules() Rules {
|
|||||||
`yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
|
`yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
|
||||||
}
|
}
|
||||||
|
|
||||||
builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, sortWords(builtinRoutines)...)
|
builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
|
||||||
|
|
||||||
// A map of opening and closing brackets
|
// A map of opening and closing brackets
|
||||||
brackets := map[rune]rune{
|
brackets := map[rune]rune{
|
||||||
@@ -1197,15 +1196,6 @@ func joinRuneMap(m map[rune]rune) string {
|
|||||||
return string(runes)
|
return string(runes)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sorts words in descending order
|
|
||||||
func sortWords(words []string) []string {
|
|
||||||
sort.Slice(words, func(i, j int) bool {
|
|
||||||
return len([]rune(words[i])) > len([]rune(words[j]))
|
|
||||||
})
|
|
||||||
|
|
||||||
return words
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finds the index of substring in the string starting at position n
|
// Finds the index of substring in the string starting at position n
|
||||||
func indexAt(str []rune, substr []rune, pos int) int {
|
func indexAt(str []rune, substr []rune, pos int) int {
|
||||||
text := string(str[pos:])
|
text := string(str[pos:])
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -141,6 +142,9 @@ func UsingSelf(stateName string) Emitter {
|
|||||||
|
|
||||||
// Words creates a regex that matches any of the given literal words.
|
// Words creates a regex that matches any of the given literal words.
|
||||||
func Words(prefix, suffix string, words ...string) string {
|
func Words(prefix, suffix string, words ...string) string {
|
||||||
|
sort.Slice(words, func(i, j int) bool {
|
||||||
|
return len(words[j]) < len(words[i])
|
||||||
|
})
|
||||||
for i, word := range words {
|
for i, word := range words {
|
||||||
words[i] = regexp.QuoteMeta(word)
|
words[i] = regexp.QuoteMeta(word)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user