diff --git a/coalesce.go b/coalesce.go index 688017a..5af0a7c 100644 --- a/coalesce.go +++ b/coalesce.go @@ -13,6 +13,9 @@ func (d *coalescer) Tokenise(options *TokeniseOptions, text string) (Iterator, e } return func() *Token { for token := it(); token != nil; token = it() { + if len(token.Value) == 0 { + continue + } if prev == nil { prev = token } else { diff --git a/delegate.go b/delegate.go index bd2b42c..06f55db 100644 --- a/delegate.go +++ b/delegate.go @@ -67,64 +67,66 @@ func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Itera } // Lex the other tokens. - rootTokens, err := Tokenise(d.root, options, others.String()) + rootTokens, err := Tokenise(Coalesce(d.root), options, others.String()) if err != nil { return nil, err } // Interleave the two sets of tokens. out := []*Token{} - offset = 0 - index := 0 - next := func() *Token { - if index >= len(rootTokens) { + offset = 0 // Offset into text. + tokenIndex := 0 + nextToken := func() *Token { + if tokenIndex >= len(rootTokens) { return nil } - t := rootTokens[index] - index++ + t := rootTokens[tokenIndex] + tokenIndex++ return t } - t := next() - for _, insert := range insertions { - // Consume tokens until insertion point. - for t != nil && offset+len(t.Value) <= insert.start { + insertionIndex := 0 + nextInsertion := func() *insertion { + if insertionIndex >= len(insertions) { + return nil + } + i := insertions[insertionIndex] + insertionIndex++ + return i + } + t := nextToken() + i := nextInsertion() + for t != nil || i != nil { + // fmt.Printf("%d->%d:%q %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...)) + if t == nil || (i != nil && i.start < offset+len(t.Value)) { + var l *Token + l, t = splitToken(t, i.start-offset) + if l != nil { + out = append(out, l) + offset += len(l.Value) + } + out = append(out, i.tokens...) + offset += i.end - i.start + if t == nil { + t = nextToken() + } + i = nextInsertion() + } else { out = append(out, t) offset += len(t.Value) - t = next() + t = nextToken() } - // End of root tokens, append insertion point. - if t == nil { - out = append(out, insert.tokens...) - break - } - - // Split and insert. - l, r := splitToken(t, insert.start-offset) - if l != nil { - out = append(out, l) - offset += len(l.Value) - } - out = append(out, insert.tokens...) - offset += insert.end - insert.start - if r != nil { - out = append(out, r) - offset += len(r.Value) - } - t = next() } - if t != nil { - out = append(out, t) - } - // Remainder. - out = append(out, rootTokens[index:]...) return Literator(out...), nil } func splitToken(t *Token, offset int) (l *Token, r *Token) { + if t == nil { + return nil, nil + } if offset == 0 { return nil, t } - if offset >= len(t.Value) { + if offset == len(t.Value) { return t, nil } l = t.Clone() diff --git a/lexers/g/go_test.go b/lexers/g/go_test.go new file mode 100644 index 0000000..5029822 --- /dev/null +++ b/lexers/g/go_test.go @@ -0,0 +1,50 @@ +package g + +import ( + "testing" + + "github.com/alecthomas/assert" + "github.com/alecthomas/chroma" +) + +func TestGoHTMLTemplateIssue126(t *testing.T) { + for _, source := range []string{ + ` + + {{ if eq .Title .Site.Title }}{{ .Site.Title }}{{ else }}{{ with .Title }}{{.}} on {{ end }}{{ .Site.Title }}{{ end }} + {{ .Permalink }} + Recent content {{ if ne .Title .Site.Title }}{{ with .Title }}in {{.}} {{ end }}{{ end }}on {{ .Site.Title }} + Hugo -- gohugo.io{{ with .Site.LanguageCode }} + {{.}}{{end}}{{ with .Site.Author.email }} + {{.}}{{ with $.Site.Author.name }} ({{.}}){{end}}{{end}}{{ with .Site.Author.email }} + {{.}}{{ with $.Site.Author.name }} ({{.}}){{end}}{{end}}{{ with .Site.Copyright }} + {{.}}{{end}}{{ if not .Date.IsZero }} + {{ .Date.Format "Mon, 02 Jan 2006 15:04:05 -0700" | safeHTML }}{{ end }} + {{ with .OutputFormats.Get "RSS" }} + {{ printf "" .Permalink .MediaType | safeHTML }} + {{ end }} + {{ range .Data.Pages }} + + {{ .Title }} + {{ .Permalink }} + {{ .Date.Format "Mon, 02 Jan 2006 15:04:05 -0700" | safeHTML }} + {{ with .Site.Author.email }}{{.}}{{ with $.Site.Author.name }} ({{.}}){{end}}{{end}} + {{ .Permalink }} + {{ .Summary | html }} + + {{ end }} + + +`, + `{{ $headless := .Site.GetPage "page" "some-headless-bundle" }} +{{ $reusablePages := $headless.Resources.Match "author*" }} +

Authors

+{{ range $reusablePages }} +

{{ .Title }}

+ {{ .Content }} +{{ end }}`} { + tokens, err := chroma.Tokenise(GoHTMLTemplate, nil, source) + assert.NoError(t, err) + assert.Equal(t, source, chroma.Stringify(tokens...)) + } +} diff --git a/lexers/lexers_test.go b/lexers/lexers_test.go index 92af9a2..34bcd3d 100644 --- a/lexers/lexers_test.go +++ b/lexers/lexers_test.go @@ -8,7 +8,7 @@ import ( "strings" "testing" - "github.com/stretchr/testify/assert" + "github.com/alecthomas/assert" "github.com/alecthomas/chroma" "github.com/alecthomas/chroma/formatters" diff --git a/lexers/testdata/css.expected b/lexers/testdata/css.expected index 5ad24d6..e47c61d 100644 --- a/lexers/testdata/css.expected +++ b/lexers/testdata/css.expected @@ -5,7 +5,6 @@ {"type":"Punctuation","value":"{"}, {"type":"Text","value":"\n "}, {"type":"NameVariable","value":"--variable-name"}, - {"type":"Text","value":""}, {"type":"Punctuation","value":":"}, {"type":"Text","value":" "}, {"type":"LiteralNumberHex","value":"#fff"}, diff --git a/lexers/testdata/jsx.expected b/lexers/testdata/jsx.expected index 91c1718..f9bb2a1 100644 --- a/lexers/testdata/jsx.expected +++ b/lexers/testdata/jsx.expected @@ -44,9 +44,7 @@ {"type":"Punctuation","value":"\u003c"}, {"type":"NameTag","value":"App"}, {"type":"Text","value":" "}, - {"type":"Punctuation","value":"/"}, - {"type":"Text","value":""}, - {"type":"Punctuation","value":"\u003e,"}, + {"type":"Punctuation","value":"/\u003e,"}, {"type":"Text","value":"\n "}, {"type":"NameBuiltin","value":"document"}, {"type":"Punctuation","value":"."}, diff --git a/lexers/testdata/php.expected b/lexers/testdata/php.expected index b5b8310..108a23b 100644 --- a/lexers/testdata/php.expected +++ b/lexers/testdata/php.expected @@ -2,32 +2,19 @@ {"type":"CommentPreproc","value":"\u003c!DOCTYPE html\u003e"}, {"type":"Text","value":"\n"}, {"type":"Punctuation","value":"\u003c"}, - {"type":"Text","value":""}, {"type":"NameTag","value":"html"}, - {"type":"Punctuation","value":""}, - {"type":"Text","value":""}, {"type":"Punctuation","value":"\u003e"}, {"type":"Text","value":"\n"}, {"type":"Punctuation","value":"\u003c"}, - {"type":"Text","value":""}, {"type":"NameTag","value":"body"}, - {"type":"Punctuation","value":""}, - {"type":"Text","value":""}, {"type":"Punctuation","value":"\u003e"}, {"type":"Text","value":"\n\n"}, {"type":"Punctuation","value":"\u003c"}, - {"type":"Text","value":""}, {"type":"NameTag","value":"h1"}, - {"type":"Punctuation","value":""}, - {"type":"Text","value":""}, {"type":"Punctuation","value":"\u003e"}, {"type":"Text","value":"My first PHP page"}, - {"type":"Punctuation","value":"\u003c"}, - {"type":"Text","value":""}, - {"type":"Punctuation","value":"/"}, - {"type":"Text","value":""}, + {"type":"Punctuation","value":"\u003c/"}, {"type":"NameTag","value":"h1"}, - {"type":"Text","value":""}, {"type":"Punctuation","value":"\u003e"}, {"type":"Text","value":"\n"}, {"type":"CommentPreproc","value":"\u003c?php"}, @@ -38,7 +25,6 @@ {"type":"Text","value":" "}, {"type":"NameVariable","value":"$modx"}, {"type":"Operator","value":"-\u003e"}, - {"type":"Text","value":""}, {"type":"NameAttribute","value":"getIterator"}, {"type":"Punctuation","value":"("}, {"type":"LiteralStringSingle","value":"'modResource'"}, @@ -65,13 +51,11 @@ {"type":"Operator","value":"="}, {"type":"NameVariable","value":"$doc"}, {"type":"Operator","value":"-\u003e"}, - {"type":"Text","value":""}, {"type":"NameAttribute","value":"content"}, {"type":"Punctuation","value":";"}, {"type":"Text","value":"\n "}, {"type":"NameVariable","value":"$doc"}, {"type":"Operator","value":"-\u003e"}, - {"type":"Text","value":""}, {"type":"NameAttribute","value":"set"}, {"type":"Punctuation","value":"("}, {"type":"LiteralStringDouble","value":"\"content\""}, @@ -92,7 +76,6 @@ {"type":"Punctuation","value":"("}, {"type":"NameVariable","value":"$doc"}, {"type":"Operator","value":"-\u003e"}, - {"type":"Text","value":""}, {"type":"NameAttribute","value":"content"}, {"type":"Punctuation","value":");"}, {"type":"Text","value":"\n "}, @@ -102,20 +85,12 @@ {"type":"CommentSingle","value":"// some comment\n"}, {"type":"CommentPreproc","value":"?\u003e"}, {"type":"Text","value":"\n"}, - {"type":"Punctuation","value":"\u003c"}, - {"type":"Text","value":""}, - {"type":"Punctuation","value":"/"}, - {"type":"Text","value":""}, + {"type":"Punctuation","value":"\u003c/"}, {"type":"NameTag","value":"body"}, - {"type":"Text","value":""}, {"type":"Punctuation","value":"\u003e"}, {"type":"Text","value":"\n"}, - {"type":"Punctuation","value":"\u003c"}, - {"type":"Text","value":""}, - {"type":"Punctuation","value":"/"}, - {"type":"Text","value":""}, + {"type":"Punctuation","value":"\u003c/"}, {"type":"NameTag","value":"html"}, - {"type":"Text","value":""}, {"type":"Punctuation","value":"\u003e"}, {"type":"Text","value":"\n"} ] diff --git a/lexers/testdata/scala.expected b/lexers/testdata/scala.expected index 63253d8..0cd7d64 100644 --- a/lexers/testdata/scala.expected +++ b/lexers/testdata/scala.expected @@ -50,7 +50,6 @@ {"type":"Keyword","value":":"}, {"type":"Text","value":" "}, {"type":"KeywordType","value":"Seq"}, - {"type":"Text","value":""}, {"type":"Operator","value":"["}, {"type":"KeywordType","value":"T"}, {"type":"Operator","value":"];"}, diff --git a/mutators.go b/mutators.go index e4d1f84..30f1eed 100644 --- a/mutators.go +++ b/mutators.go @@ -120,3 +120,12 @@ func Pop(n int) MutatorFunc { func Default(mutators ...Mutator) Rule { return Rule{Mutator: Mutators(mutators...)} } + +// Stringify returns the raw string for a set of tokens. +func Stringify(tokens ...*Token) string { + out := []string{} + for _, t := range tokens { + out = append(out, t.Value) + } + return strings.Join(out, "") +}