1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-11-25 22:32:32 +02:00

Fix: sort words in descending order of length before regex generation (#496)

* Fix: sort words in descending order of length before regex generation

* Avoid code duplication in Raku lexer
This commit is contained in:
mlpo
2021-05-08 01:10:18 +02:00
committed by GitHub
parent 225e1862d3
commit ff6eedba72
2 changed files with 9 additions and 15 deletions

View File

@@ -2,7 +2,6 @@ package r
import ( import (
"regexp" "regexp"
"sort"
"strings" "strings"
"unicode/utf8" "unicode/utf8"
@@ -70,7 +69,7 @@ func rakuRules() Rules {
`dynamic-scope`, `built`, `temp`, `dynamic-scope`, `built`, `temp`,
} }
keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, sortWords(keywords)...) keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
wordOperators := []string{ wordOperators := []string{
`X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`, `X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
@@ -80,7 +79,7 @@ func rakuRules() Rules {
`(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`, `(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
} }
wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, sortWords(wordOperators)...) wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
operators := []string{ operators := []string{
`++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`, `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
@@ -93,7 +92,7 @@ func rakuRules() Rules {
``, ``, ``, ``, `:`, `!!!`, `???`, `¯`, `×`, `÷`, ``, ``, ``, ``, ``, ``, ``, `:`, `!!!`, `???`, `¯`, `×`, `÷`, ``, ``, ``,
} }
operatorsPattern := Words(``, ``, sortWords(operators)...) operatorsPattern := Words(``, ``, operators...)
builtinTypes := []string{ builtinTypes := []string{
`False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`, `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
@@ -142,7 +141,7 @@ func rakuRules() Rules {
`strict`, `trace`, `variables`, `strict`, `trace`, `variables`,
} }
builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, sortWords(builtinTypes)...) builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
builtinRoutines := []string{ builtinRoutines := []string{
`ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`, `ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
@@ -266,7 +265,7 @@ func rakuRules() Rules {
`yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`, `yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
} }
builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, sortWords(builtinRoutines)...) builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
// A map of opening and closing brackets // A map of opening and closing brackets
brackets := map[rune]rune{ brackets := map[rune]rune{
@@ -1197,15 +1196,6 @@ func joinRuneMap(m map[rune]rune) string {
return string(runes) return string(runes)
} }
// Sorts words in descending order
func sortWords(words []string) []string {
sort.Slice(words, func(i, j int) bool {
return len([]rune(words[i])) > len([]rune(words[j]))
})
return words
}
// Finds the index of substring in the string starting at position n // Finds the index of substring in the string starting at position n
func indexAt(str []rune, substr []rune, pos int) int { func indexAt(str []rune, substr []rune, pos int) int {
text := string(str[pos:]) text := string(str[pos:])

View File

@@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"os" "os"
"regexp" "regexp"
"sort"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -141,6 +142,9 @@ func UsingSelf(stateName string) Emitter {
// Words creates a regex that matches any of the given literal words. // Words creates a regex that matches any of the given literal words.
func Words(prefix, suffix string, words ...string) string { func Words(prefix, suffix string, words ...string) string {
sort.Slice(words, func(i, j int) bool {
return len(words[j]) < len(words[i])
})
for i, word := range words { for i, word := range words {
words[i] = regexp.QuoteMeta(word) words[i] = regexp.QuoteMeta(word)
} }