1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-01-26 03:20:10 +02:00
chroma/delegate.go

138 lines
3.2 KiB
Go
Raw Normal View History

2017-09-30 12:44:22 +10:00
package chroma
import (
"bytes"
)
type delegatingLexer struct {
root Lexer
language Lexer
}
// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
// inside HTML or PHP inside plain text.
//
// It takes two lexer as arguments: a root lexer and a language lexer. First everything is scanned using the language
// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
// Finally, these two sets of tokens are merged.
2017-09-30 12:44:22 +10:00
//
// The lexers from the template lexer package use this base lexer.
func DelegatingLexer(root Lexer, language Lexer) Lexer {
return &delegatingLexer{
root: root,
language: language,
}
}
func (d *delegatingLexer) Config() *Config {
return d.language.Config()
}
// An insertion is the character range where language tokens should be inserted.
type insertion struct {
start, end int
tokens []Token
2017-09-30 12:44:22 +10:00
}
func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
tokens, err := Tokenise(Coalesce(d.language), options, text)
if err != nil {
return nil, err
}
// Compute insertions and gather "Other" tokens.
others := &bytes.Buffer{}
insertions := []*insertion{}
var insert *insertion
2017-09-30 12:44:22 +10:00
offset := 0
var last Token
for _, t := range tokens {
2017-09-30 12:44:22 +10:00
if t.Type == Other {
if last != EOF && insert != nil && last.Type != Other {
insert.end = offset
2017-09-30 12:44:22 +10:00
}
others.WriteString(t.Value)
2017-09-30 12:44:22 +10:00
} else {
if last == EOF || last.Type == Other {
insert = &insertion{start: offset}
insertions = append(insertions, insert)
2017-09-30 12:44:22 +10:00
}
insert.tokens = append(insert.tokens, t)
2017-09-30 12:44:22 +10:00
}
last = t
2017-09-30 12:44:22 +10:00
offset += len(t.Value)
}
if len(insertions) == 0 {
return d.root.Tokenise(options, text)
2017-09-30 12:44:22 +10:00
}
// Lex the other tokens.
2018-03-19 10:32:36 +11:00
rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
2017-09-30 12:44:22 +10:00
if err != nil {
return nil, err
}
// Interleave the two sets of tokens.
var out []Token
2018-03-19 10:32:36 +11:00
offset = 0 // Offset into text.
tokenIndex := 0
nextToken := func() Token {
2018-03-19 10:32:36 +11:00
if tokenIndex >= len(rootTokens) {
return EOF
2017-09-30 12:44:22 +10:00
}
2018-03-19 10:32:36 +11:00
t := rootTokens[tokenIndex]
tokenIndex++
return t
}
2018-03-19 10:32:36 +11:00
insertionIndex := 0
nextInsertion := func() *insertion {
if insertionIndex >= len(insertions) {
return nil
}
i := insertions[insertionIndex]
insertionIndex++
return i
}
t := nextToken()
i := nextInsertion()
for t != EOF || i != nil {
2018-03-19 10:32:36 +11:00
// fmt.Printf("%d->%d:%q %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
var l Token
2018-03-19 10:32:36 +11:00
l, t = splitToken(t, i.start-offset)
if l != EOF {
2018-03-19 10:32:36 +11:00
out = append(out, l)
offset += len(l.Value)
}
out = append(out, i.tokens...)
offset += i.end - i.start
if t == EOF {
2018-03-19 10:32:36 +11:00
t = nextToken()
}
i = nextInsertion()
} else {
out = append(out, t)
offset += len(t.Value)
2018-03-19 10:32:36 +11:00
t = nextToken()
}
}
return Literator(out...), nil
}
2017-09-30 12:44:22 +10:00
func splitToken(t Token, offset int) (l Token, r Token) {
if t == EOF {
return EOF, EOF
2018-03-19 10:32:36 +11:00
}
if offset == 0 {
return EOF, t
}
2018-03-19 10:32:36 +11:00
if offset == len(t.Value) {
return t, EOF
}
l = t.Clone()
r = t.Clone()
l.Value = l.Value[:offset]
r.Value = r.Value[offset:]
return
2017-09-30 12:44:22 +10:00
}