1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-03-25 21:39:02 +02:00
chroma/delegate.go

136 lines
3.1 KiB
Go
Raw Permalink Normal View History

2017-09-30 12:44:22 +10:00
package chroma
import (
"bytes"
)
type delegatingLexer struct {
root Lexer
language Lexer
}
// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
// inside HTML or PHP inside plain text.
//
// It takes two lexer as arguments: a root lexer and a language lexer. First everything is scanned using the language
// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
// Finally, these two sets of tokens are merged.
2017-09-30 12:44:22 +10:00
//
// The lexers from the template lexer package use this base lexer.
func DelegatingLexer(root Lexer, language Lexer) Lexer {
return &delegatingLexer{
root: root,
language: language,
}
}
func (d *delegatingLexer) Config() *Config {
return d.language.Config()
}
// An insertion is the character range where language tokens should be inserted.
type insertion struct {
start, end int
tokens []*Token
2017-09-30 12:44:22 +10:00
}
func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
tokens, err := Tokenise(Coalesce(d.language), options, text)
if err != nil {
return nil, err
}
// Compute insertions and gather "Other" tokens.
others := &bytes.Buffer{}
insertions := []*insertion{}
var insert *insertion
2017-09-30 12:44:22 +10:00
offset := 0
var last *Token
for _, t := range tokens {
2017-09-30 12:44:22 +10:00
if t.Type == Other {
if last != nil && insert != nil && last.Type != Other {
insert.end = offset
2017-09-30 12:44:22 +10:00
}
others.WriteString(t.Value)
2017-09-30 12:44:22 +10:00
} else {
if last == nil || last.Type == Other {
insert = &insertion{start: offset}
insertions = append(insertions, insert)
2017-09-30 12:44:22 +10:00
}
insert.tokens = append(insert.tokens, t)
2017-09-30 12:44:22 +10:00
}
last = t
2017-09-30 12:44:22 +10:00
offset += len(t.Value)
}
if len(insertions) == 0 {
return d.root.Tokenise(options, text)
2017-09-30 12:44:22 +10:00
}
// Lex the other tokens.
rootTokens, err := Tokenise(d.root, options, others.String())
2017-09-30 12:44:22 +10:00
if err != nil {
return nil, err
}
// Interleave the two sets of tokens.
out := []*Token{}
offset = 0
index := 0
next := func() *Token {
if index >= len(rootTokens) {
return nil
2017-09-30 12:44:22 +10:00
}
t := rootTokens[index]
index++
return t
}
t := next()
for _, insert := range insertions {
// Consume tokens until insertion point.
for t != nil && offset+len(t.Value) <= insert.start {
out = append(out, t)
offset += len(t.Value)
t = next()
}
// End of root tokens, append insertion point.
2017-09-30 12:44:22 +10:00
if t == nil {
out = append(out, insert.tokens...)
break
2017-09-30 12:44:22 +10:00
}
// Split and insert.
l, r := splitToken(t, insert.start-offset)
if l != nil {
out = append(out, l)
offset += len(l.Value)
}
out = append(out, insert.tokens...)
offset += insert.end - insert.start
if r != nil {
out = append(out, r)
offset += len(r.Value)
2017-09-30 12:44:22 +10:00
}
t = next()
}
if t != nil {
out = append(out, t)
}
// Remainder.
out = append(out, rootTokens[index:]...)
return Literator(out...), nil
}
2017-09-30 12:44:22 +10:00
func splitToken(t *Token, offset int) (l *Token, r *Token) {
if offset == 0 {
return nil, t
}
if offset >= len(t.Value) {
return t, nil
}
l = t.Clone()
r = t.Clone()
l.Value = l.Value[:offset]
r.Value = r.Value[offset:]
return
2017-09-30 12:44:22 +10:00
}