mirror of
https://github.com/alecthomas/chroma.git
synced 2025-02-21 19:06:18 +02:00
This is a lexer that is useful for templating languages, where the surrounding text may be of a different syntax. eg. PHP+HTML The PHP lexer has been changed accordingly. Fixes #80
136 lines
3.1 KiB
Go
136 lines
3.1 KiB
Go
package chroma
|
|
|
|
import (
|
|
"bytes"
|
|
)
|
|
|
|
type delegatingLexer struct {
|
|
root Lexer
|
|
language Lexer
|
|
}
|
|
|
|
// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
|
|
// inside HTML or PHP inside plain text.
|
|
//
|
|
// It takes two lexer as arguments: a root lexer and a language lexer. First everything is scanned using the language
|
|
// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
|
|
// Finally, these two sets of tokens are merged.
|
|
//
|
|
// The lexers from the template lexer package use this base lexer.
|
|
func DelegatingLexer(root Lexer, language Lexer) Lexer {
|
|
return &delegatingLexer{
|
|
root: root,
|
|
language: language,
|
|
}
|
|
}
|
|
|
|
func (d *delegatingLexer) Config() *Config {
|
|
return d.language.Config()
|
|
}
|
|
|
|
// An insertion is the character range where language tokens should be inserted.
|
|
type insertion struct {
|
|
start, end int
|
|
tokens []*Token
|
|
}
|
|
|
|
func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) {
|
|
tokens, err := Tokenise(Coalesce(d.language), options, text)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Compute insertions and gather "Other" tokens.
|
|
others := &bytes.Buffer{}
|
|
insertions := []*insertion{}
|
|
var insert *insertion
|
|
offset := 0
|
|
var last *Token
|
|
for _, t := range tokens {
|
|
if t.Type == Other {
|
|
if last != nil && insert != nil && last.Type != Other {
|
|
insert.end = offset
|
|
}
|
|
others.WriteString(t.Value)
|
|
} else {
|
|
if last == nil || last.Type == Other {
|
|
insert = &insertion{start: offset}
|
|
insertions = append(insertions, insert)
|
|
}
|
|
insert.tokens = append(insert.tokens, t)
|
|
}
|
|
last = t
|
|
offset += len(t.Value)
|
|
}
|
|
|
|
if len(insertions) == 0 {
|
|
return d.root.Tokenise(options, text)
|
|
}
|
|
|
|
// Lex the other tokens.
|
|
rootTokens, err := Tokenise(d.root, options, others.String())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Interleave the two sets of tokens.
|
|
out := []*Token{}
|
|
offset = 0
|
|
index := 0
|
|
next := func() *Token {
|
|
if index >= len(rootTokens) {
|
|
return nil
|
|
}
|
|
t := rootTokens[index]
|
|
index++
|
|
return t
|
|
}
|
|
t := next()
|
|
for _, insert := range insertions {
|
|
// Consume tokens until insertion point.
|
|
for t != nil && offset+len(t.Value) <= insert.start {
|
|
out = append(out, t)
|
|
offset += len(t.Value)
|
|
t = next()
|
|
}
|
|
// End of root tokens, append insertion point.
|
|
if t == nil {
|
|
out = append(out, insert.tokens...)
|
|
break
|
|
}
|
|
|
|
// Split and insert.
|
|
l, r := splitToken(t, insert.start-offset)
|
|
if l != nil {
|
|
out = append(out, l)
|
|
offset += len(l.Value)
|
|
}
|
|
out = append(out, insert.tokens...)
|
|
offset += insert.end - insert.start
|
|
if r != nil {
|
|
out = append(out, r)
|
|
offset += len(r.Value)
|
|
}
|
|
t = next()
|
|
}
|
|
if t != nil {
|
|
out = append(out, t)
|
|
}
|
|
// Remainder.
|
|
out = append(out, rootTokens[index:]...)
|
|
return Literator(out...), nil
|
|
}
|
|
|
|
func splitToken(t *Token, offset int) (l *Token, r *Token) {
|
|
if offset == 0 {
|
|
return nil, t
|
|
}
|
|
if offset >= len(t.Value) {
|
|
return t, nil
|
|
}
|
|
l = t.Clone()
|
|
r = t.Clone()
|
|
l.Value = l.Value[:offset]
|
|
r.Value = r.Value[offset:]
|
|
return
|
|
}
|