1
0
mirror of https://github.com/alecthomas/chroma.git synced 2025-03-21 21:17:50 +02:00

533 lines
15 KiB
Go
Raw Normal View History

package html
import (
"fmt"
"html"
"io"
"sort"
"strings"
Version 2 of Chroma This cleans up the API in general, removing a bunch of deprecated stuff, cleaning up circular imports, etc. But the biggest change is switching to an optional XML format for the regex lexer. Having lexers defined only in Go is not ideal for a couple of reasons. Firstly, it impedes a significant portion of contributors who use Chroma in Hugo, but don't know Go. Secondly, it bloats the binary size of any project that imports Chroma. Why XML? YAML is an abomination and JSON is not human editable. XML also compresses very well (eg. Go template lexer XML compresses from 3239 bytes to 718). Why a new syntax format? All major existing formats rely on the Oniguruma regex engine, which is extremely complex and for which there is no Go port. Why not earlier? Prior to the existence of fs.FS this was not a viable option. Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' A slight increase in init time, but I think this is okay given the increase in flexibility. And binary size difference: $ du -h lexers.test* $ du -sh chroma* 951371ms 8.8M chroma.master 7.8M chroma.xml 7.8M chroma.xml-pre-opt Benchmarks: $ hyperfine --warmup 3 \ './chroma.master --version' \ './chroma.xml-pre-opt --version' \ './chroma.xml --version' Benchmark 1: ./chroma.master --version Time (mean ± σ): 5.3 ms ± 0.5 ms [User: 3.6 ms, System: 1.4 ms] Range (min … max): 4.2 ms … 6.6 ms 233 runs Benchmark 2: ./chroma.xml-pre-opt --version Time (mean ± σ): 50.6 ms ± 0.5 ms [User: 52.4 ms, System: 3.6 ms] Range (min … max): 49.2 ms … 51.5 ms 51 runs Benchmark 3: ./chroma.xml --version Time (mean ± σ): 6.9 ms ± 1.1 ms [User: 5.1 ms, System: 1.5 ms] Range (min … max): 5.7 ms … 19.9 ms 196 runs Summary './chroma.master --version' ran 1.30 ± 0.23 times faster than './chroma.xml --version' 9.56 ± 0.83 times faster than './chroma.xml-pre-opt --version' Incompatible changes: - (*RegexLexer).SetAnalyser: changed from func(func(text string) float32) *RegexLexer to func(func(text string) float32) Lexer - (*TokenType).UnmarshalJSON: removed - Lexer.AnalyseText: added - Lexer.SetAnalyser: added - Lexer.SetRegistry: added - MustNewLazyLexer: removed - MustNewLexer: changed from func(*Config, Rules) *RegexLexer to func(*Config, func() Rules) *RegexLexer - Mutators: changed from func(...Mutator) MutatorFunc to func(...Mutator) Mutator - NewLazyLexer: removed - NewLexer: changed from func(*Config, Rules) (*RegexLexer, error) to func(*Config, func() Rules) (*RegexLexer, error) - Pop: changed from func(int) MutatorFunc to func(int) Mutator - Push: changed from func(...string) MutatorFunc to func(...string) Mutator - TokenType.MarshalJSON: removed - Using: changed from func(Lexer) Emitter to func(string) Emitter - UsingByGroup: changed from func(func(string) Lexer, int, int, ...Emitter) Emitter to func(int, int, ...Emitter) Emitter
2022-01-03 23:51:17 +11:00
"github.com/alecthomas/chroma/v2"
)
// Option sets an option of the HTML formatter.
2017-09-19 23:04:10 +10:00
type Option func(f *Formatter)
// Standalone configures the HTML formatter for generating a standalone HTML document.
func Standalone(b bool) Option { return func(f *Formatter) { f.standalone = b } }
// ClassPrefix sets the CSS class prefix.
2017-09-19 23:04:10 +10:00
func ClassPrefix(prefix string) Option { return func(f *Formatter) { f.prefix = prefix } }
// WithClasses emits HTML using CSS classes, rather than inline styles.
func WithClasses(b bool) Option { return func(f *Formatter) { f.Classes = b } }
// WithAllClasses disables an optimisation that omits redundant CSS classes.
func WithAllClasses(b bool) Option { return func(f *Formatter) { f.allClasses = b } }
// WithCustomCSS sets user's custom CSS styles.
func WithCustomCSS(css map[chroma.TokenType]string) Option {
return func(f *Formatter) {
f.customCSS = css
}
}
// TabWidth sets the number of characters for a tab. Defaults to 8.
2017-09-19 23:04:10 +10:00
func TabWidth(width int) Option { return func(f *Formatter) { f.tabWidth = width } }
// PreventSurroundingPre prevents the surrounding pre tags around the generated code.
func PreventSurroundingPre(b bool) Option {
2019-11-18 12:31:49 +01:00
return func(f *Formatter) {
if b {
f.preWrapper = nopPreWrapper
} else {
f.preWrapper = defaultPreWrapper
}
2019-11-18 12:31:49 +01:00
}
}
// WithPreWrapper allows control of the surrounding pre tags.
func WithPreWrapper(wrapper PreWrapper) Option {
return func(f *Formatter) {
f.preWrapper = wrapper
}
}
// WrapLongLines wraps long lines.
func WrapLongLines(b bool) Option {
return func(f *Formatter) {
f.wrapLongLines = b
}
}
2017-09-19 23:04:10 +10:00
// WithLineNumbers formats output with line numbers.
func WithLineNumbers(b bool) Option {
2017-09-19 23:04:10 +10:00
return func(f *Formatter) {
f.lineNumbers = b
}
}
// LineNumbersInTable will, when combined with WithLineNumbers, separate the line numbers
// and code in table td's, which make them copy-and-paste friendly.
func LineNumbersInTable(b bool) Option {
return func(f *Formatter) {
f.lineNumbersInTable = b
}
}
// LinkableLineNumbers decorates the line numbers HTML elements with an "id"
// attribute so they can be linked.
func LinkableLineNumbers(b bool, prefix string) Option {
return func(f *Formatter) {
f.linkableLineNumbers = b
f.lineNumbersIDPrefix = prefix
}
}
// HighlightLines higlights the given line ranges with the Highlight style.
2017-09-19 23:04:10 +10:00
//
// A range is the beginning and ending of a range as 1-based line numbers, inclusive.
func HighlightLines(ranges [][2]int) Option {
2017-09-19 23:04:10 +10:00
return func(f *Formatter) {
f.highlightRanges = ranges
2017-09-20 13:30:46 +10:00
sort.Sort(f.highlightRanges)
2017-09-19 23:04:10 +10:00
}
}
2017-09-24 20:33:50 +10:00
// BaseLineNumber sets the initial number to start line numbering at. Defaults to 1.
func BaseLineNumber(n int) Option {
return func(f *Formatter) {
f.baseLineNumber = n
}
}
2017-09-19 23:04:10 +10:00
// New HTML formatter.
func New(options ...Option) *Formatter {
2017-09-24 20:33:50 +10:00
f := &Formatter{
baseLineNumber: 1,
2019-11-18 12:31:49 +01:00
preWrapper: defaultPreWrapper,
2017-09-24 20:33:50 +10:00
}
2017-09-19 23:04:10 +10:00
for _, option := range options {
option(f)
}
2017-09-19 23:04:10 +10:00
return f
}
2019-11-18 12:31:49 +01:00
// PreWrapper defines the operations supported in WithPreWrapper.
type PreWrapper interface {
// Start is called to write a start <pre> element.
// The code flag tells whether this block surrounds
// highlighted code. This will be false when surrounding
// line numbers.
Start(code bool, styleAttr string) string
// End is called to write the end </pre> element.
End(code bool) string
}
type preWrapper struct {
start func(code bool, styleAttr string) string
end func(code bool) string
}
func (p preWrapper) Start(code bool, styleAttr string) string {
return p.start(code, styleAttr)
}
func (p preWrapper) End(code bool) string {
return p.end(code)
}
var (
nopPreWrapper = preWrapper{
start: func(code bool, styleAttr string) string { return "" },
end: func(code bool) string { return "" },
}
defaultPreWrapper = preWrapper{
start: func(code bool, styleAttr string) string {
if code {
return fmt.Sprintf(`<pre tabindex="0"%s><code>`, styleAttr)
}
return fmt.Sprintf(`<pre tabindex="0"%s>`, styleAttr)
2019-11-18 12:31:49 +01:00
},
end: func(code bool) string {
if code {
return `</code></pre>`
}
return `</pre>`
2019-11-18 12:31:49 +01:00
},
}
)
2017-09-19 23:04:10 +10:00
// Formatter that generates HTML.
type Formatter struct {
standalone bool
prefix string
Classes bool // Exported field to detect when classes are being used
allClasses bool
customCSS map[chroma.TokenType]string
preWrapper PreWrapper
tabWidth int
wrapLongLines bool
lineNumbers bool
lineNumbersInTable bool
linkableLineNumbers bool
lineNumbersIDPrefix string
highlightRanges highlightRanges
baseLineNumber int
}
2017-09-20 13:30:46 +10:00
type highlightRanges [][2]int
func (h highlightRanges) Len() int { return len(h) }
func (h highlightRanges) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h highlightRanges) Less(i, j int) bool { return h[i][0] < h[j][0] }
func (f *Formatter) Format(w io.Writer, style *chroma.Style, iterator chroma.Iterator) (err error) {
return f.writeHTML(w, style, iterator.Tokens())
2017-09-20 13:30:46 +10:00
}
// We deliberately don't use html/template here because it is two orders of magnitude slower (benchmarked).
//
// OTOH we need to be super careful about correct escaping...
func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []chroma.Token) (err error) { // nolint: gocyclo
2017-09-20 13:30:46 +10:00
css := f.styleToCSS(style)
if !f.Classes {
2017-09-20 13:30:46 +10:00
for t, style := range css {
css[t] = compressStyle(style)
2017-09-19 23:04:10 +10:00
}
}
2017-09-19 23:04:10 +10:00
if f.standalone {
fmt.Fprint(w, "<html>\n")
if f.Classes {
2017-09-19 23:04:10 +10:00
fmt.Fprint(w, "<style type=\"text/css\">\n")
err = f.WriteCSS(w, style)
if err != nil {
return err
}
2017-09-20 13:30:46 +10:00
fmt.Fprintf(w, "body { %s; }\n", css[chroma.Background])
2017-09-19 23:04:10 +10:00
fmt.Fprint(w, "</style>")
}
2017-09-20 13:30:46 +10:00
fmt.Fprintf(w, "<body%s>\n", f.styleAttr(css, chroma.Background))
}
2017-09-20 13:30:46 +10:00
wrapInTable := f.lineNumbers && f.lineNumbersInTable
lines := chroma.SplitTokensIntoLines(tokens)
lineDigits := len(fmt.Sprintf("%d", f.baseLineNumber+len(lines)-1))
highlightIndex := 0
if wrapInTable {
// List line numbers in its own <td>
fmt.Fprintf(w, "<div%s>\n", f.styleAttr(css, chroma.PreWrapper))
fmt.Fprintf(w, "<table%s><tr>", f.styleAttr(css, chroma.LineTable))
fmt.Fprintf(w, "<td%s>\n", f.styleAttr(css, chroma.LineTableTD))
fmt.Fprintf(w, f.preWrapper.Start(false, f.styleAttr(css, chroma.PreWrapper)))
for index := range lines {
line := f.baseLineNumber + index
highlight, next := f.shouldHighlight(highlightIndex, line)
if next {
highlightIndex++
}
if highlight {
fmt.Fprintf(w, "<span%s>", f.styleAttr(css, chroma.LineHighlight))
}
fmt.Fprintf(w, "<span%s%s>%s\n</span>", f.styleAttr(css, chroma.LineNumbersTable), f.lineIDAttribute(line), f.lineTitleWithLinkIfNeeded(lineDigits, line))
if highlight {
fmt.Fprintf(w, "</span>")
}
}
2019-11-18 12:31:49 +01:00
fmt.Fprint(w, f.preWrapper.End(false))
fmt.Fprint(w, "</td>\n")
fmt.Fprintf(w, "<td%s>\n", f.styleAttr(css, chroma.LineTableTD, "width:100%"))
}
fmt.Fprintf(w, f.preWrapper.Start(true, f.styleAttr(css, chroma.PreWrapper)))
2019-11-18 12:31:49 +01:00
highlightIndex = 0
2017-09-24 20:33:50 +10:00
for index, tokens := range lines {
// 1-based line number.
line := f.baseLineNumber + index
highlight, next := f.shouldHighlight(highlightIndex, line)
if next {
highlightIndex++
}
// Start of Line
fmt.Fprint(w, `<span`)
if highlight {
// Line + LineHighlight
if f.Classes {
fmt.Fprintf(w, ` class="%s %s"`, f.class(chroma.Line), f.class(chroma.LineHighlight))
} else {
fmt.Fprintf(w, ` style="%s %s"`, css[chroma.Line], css[chroma.LineHighlight])
}
fmt.Fprint(w, `>`)
} else {
fmt.Fprintf(w, "%s>", f.styleAttr(css, chroma.Line))
}
// Line number
if f.lineNumbers && !wrapInTable {
fmt.Fprintf(w, "<span%s%s>%s</span>", f.styleAttr(css, chroma.LineNumbers), f.lineIDAttribute(line), f.lineTitleWithLinkIfNeeded(lineDigits, line))
}
2017-09-20 13:30:46 +10:00
fmt.Fprintf(w, `<span%s>`, f.styleAttr(css, chroma.CodeLine))
2017-09-20 13:30:46 +10:00
for _, token := range tokens {
html := html.EscapeString(token.String())
attr := f.styleAttr(css, token.Type)
if attr != "" {
html = fmt.Sprintf("<span%s>%s</span>", attr, html)
}
fmt.Fprint(w, html)
}
fmt.Fprint(w, `</span>`) // End of CodeLine
fmt.Fprint(w, `</span>`) // End of Line
2017-09-20 13:30:46 +10:00
}
2019-11-18 12:31:49 +01:00
fmt.Fprintf(w, f.preWrapper.End(true))
if wrapInTable {
fmt.Fprint(w, "</td></tr></table>\n")
fmt.Fprint(w, "</div>\n")
}
2017-09-20 13:30:46 +10:00
if f.standalone {
fmt.Fprint(w, "\n</body>\n")
2017-09-20 13:30:46 +10:00
fmt.Fprint(w, "</html>\n")
}
return nil
}
func (f *Formatter) lineIDAttribute(line int) string {
if !f.linkableLineNumbers {
return ""
}
return fmt.Sprintf(" id=\"%s\"", f.lineID(line))
}
func (f *Formatter) lineTitleWithLinkIfNeeded(lineDigits, line int) string {
title := fmt.Sprintf("%*d", lineDigits, line)
if !f.linkableLineNumbers {
return title
}
return fmt.Sprintf("<a style=\"outline: none; text-decoration:none; color:inherit\" href=\"#%s\">%s</a>", f.lineID(line), title)
}
func (f *Formatter) lineID(line int) string {
return fmt.Sprintf("%s%d", f.lineNumbersIDPrefix, line)
}
func (f *Formatter) shouldHighlight(highlightIndex, line int) (bool, bool) {
next := false
for highlightIndex < len(f.highlightRanges) && line > f.highlightRanges[highlightIndex][1] {
highlightIndex++
next = true
}
if highlightIndex < len(f.highlightRanges) {
hrange := f.highlightRanges[highlightIndex]
if line >= hrange[0] && line <= hrange[1] {
return true, next
}
}
return false, next
}
func (f *Formatter) class(t chroma.TokenType) string {
for t != 0 {
if cls, ok := chroma.StandardTypes[t]; ok {
if cls != "" {
return f.prefix + cls
}
return ""
}
t = t.Parent()
}
if cls := chroma.StandardTypes[t]; cls != "" {
return f.prefix + cls
}
return ""
2017-09-19 23:04:10 +10:00
}
func (f *Formatter) styleAttr(styles map[chroma.TokenType]string, tt chroma.TokenType, extraCSS ...string) string {
if f.Classes {
cls := f.class(tt)
if cls == "" {
return ""
}
return fmt.Sprintf(` class="%s"`, cls)
}
2017-09-19 23:04:10 +10:00
if _, ok := styles[tt]; !ok {
tt = tt.SubCategory()
if _, ok := styles[tt]; !ok {
tt = tt.Category()
if _, ok := styles[tt]; !ok {
return ""
}
}
}
css := []string{styles[tt]}
css = append(css, extraCSS...)
return fmt.Sprintf(` style="%s"`, strings.Join(css, ";"))
2017-09-19 23:04:10 +10:00
}
func (f *Formatter) tabWidthStyle() string {
if f.tabWidth != 0 && f.tabWidth != 8 {
return fmt.Sprintf("-moz-tab-size: %[1]d; -o-tab-size: %[1]d; tab-size: %[1]d", f.tabWidth)
2017-09-19 23:04:10 +10:00
}
return ""
}
2017-09-18 13:59:11 +10:00
// WriteCSS writes CSS style definitions (without any surrounding HTML).
2017-09-19 23:04:10 +10:00
func (f *Formatter) WriteCSS(w io.Writer, style *chroma.Style) error {
2017-09-20 13:30:46 +10:00
css := f.styleToCSS(style)
// Special-case background as it is mapped to the outer ".chroma" class.
if _, err := fmt.Fprintf(w, "/* %s */ .%sbg { %s }\n", chroma.Background, f.prefix, css[chroma.Background]); err != nil {
return err
}
// Special-case PreWrapper as it is the ".chroma" class.
if _, err := fmt.Fprintf(w, "/* %s */ .%schroma { %s }\n", chroma.PreWrapper, f.prefix, css[chroma.PreWrapper]); err != nil {
2017-09-18 13:59:11 +10:00
return err
}
// Special-case code column of table to expand width.
if f.lineNumbers && f.lineNumbersInTable {
2018-02-17 17:28:43 -08:00
if _, err := fmt.Fprintf(w, "/* %s */ .%schroma .%s:last-child { width: 100%%; }",
chroma.LineTableTD, f.prefix, f.class(chroma.LineTableTD)); err != nil {
return err
}
}
// Special-case line number highlighting when targeted.
if f.lineNumbers || f.lineNumbersInTable {
targetedLineCSS := StyleEntryToCSS(style.Get(chroma.LineHighlight))
for _, tt := range []chroma.TokenType{chroma.LineNumbers, chroma.LineNumbersTable} {
fmt.Fprintf(w, "/* %s targeted by URL anchor */ .%schroma .%s:target { %s }\n", tt, f.prefix, f.class(tt), targetedLineCSS)
}
}
tts := []int{}
2017-09-20 13:30:46 +10:00
for tt := range css {
tts = append(tts, int(tt))
}
sort.Ints(tts)
for _, ti := range tts {
tt := chroma.TokenType(ti)
switch tt {
case chroma.Background, chroma.PreWrapper:
continue
}
class := f.class(tt)
if class == "" {
continue
}
2017-09-20 13:30:46 +10:00
styles := css[tt]
if _, err := fmt.Fprintf(w, "/* %s */ .%schroma .%s { %s }\n", tt, f.prefix, class, styles); err != nil {
2017-09-18 13:59:11 +10:00
return err
}
}
2017-09-18 13:59:11 +10:00
return nil
}
2017-09-20 13:30:46 +10:00
func (f *Formatter) styleToCSS(style *chroma.Style) map[chroma.TokenType]string {
classes := map[chroma.TokenType]string{}
bg := style.Get(chroma.Background)
2017-09-20 13:30:46 +10:00
// Convert the style.
for t := range chroma.StandardTypes {
entry := style.Get(t)
if t != chroma.Background {
entry = entry.Sub(bg)
}
// Inherit from custom CSS provided by user
tokenCategory := t.Category()
tokenSubCategory := t.SubCategory()
if t != tokenCategory {
if css, ok := f.customCSS[tokenCategory]; ok {
classes[t] = css
}
}
if tokenCategory != tokenSubCategory {
if css, ok := f.customCSS[tokenSubCategory]; ok {
classes[t] += css
}
}
// Add custom CSS provided by user
if css, ok := f.customCSS[t]; ok {
classes[t] += css
}
if !f.allClasses && entry.IsZero() && classes[t] == `` {
continue
}
styleEntryCSS := StyleEntryToCSS(entry)
if styleEntryCSS != `` {
styleEntryCSS += `;`
}
classes[t] = styleEntryCSS + classes[t]
}
2017-09-19 23:04:10 +10:00
classes[chroma.Background] += f.tabWidthStyle()
classes[chroma.PreWrapper] += classes[chroma.Background]
// Make PreWrapper a grid to show highlight style with full width.
if len(f.highlightRanges) > 0 && f.customCSS[chroma.PreWrapper] == `` {
classes[chroma.PreWrapper] += `display: grid;`
}
// Make PreWrapper wrap long lines.
if f.wrapLongLines {
classes[chroma.PreWrapper] += `white-space: pre-wrap; word-break: break-word;`
}
lineNumbersStyle := `white-space: pre; user-select: none; margin-right: 0.4em; padding: 0 0.4em 0 0.4em;`
// All rules begin with default rules followed by user provided rules
classes[chroma.Line] = `display: flex;` + classes[chroma.Line]
classes[chroma.LineNumbers] = lineNumbersStyle + classes[chroma.LineNumbers]
classes[chroma.LineNumbersTable] = lineNumbersStyle + classes[chroma.LineNumbersTable]
classes[chroma.LineTable] = "border-spacing: 0; padding: 0; margin: 0; border: 0;" + classes[chroma.LineTable]
classes[chroma.LineTableTD] = "vertical-align: top; padding: 0; margin: 0; border: 0;" + classes[chroma.LineTableTD]
return classes
}
2017-09-20 13:30:46 +10:00
// StyleEntryToCSS converts a chroma.StyleEntry to CSS attributes.
func StyleEntryToCSS(e chroma.StyleEntry) string {
styles := []string{}
if e.Colour.IsSet() {
styles = append(styles, "color: "+e.Colour.String())
}
if e.Background.IsSet() {
styles = append(styles, "background-color: "+e.Background.String())
}
if e.Bold == chroma.Yes {
styles = append(styles, "font-weight: bold")
}
if e.Italic == chroma.Yes {
styles = append(styles, "font-style: italic")
}
if e.Underline == chroma.Yes {
styles = append(styles, "text-decoration: underline")
}
2017-09-19 23:04:10 +10:00
return strings.Join(styles, "; ")
}
2017-09-20 13:30:46 +10:00
// Compress CSS attributes - remove spaces, transform 6-digit colours to 3.
func compressStyle(s string) string {
parts := strings.Split(s, ";")
out := []string{}
for _, p := range parts {
p = strings.Join(strings.Fields(p), " ")
p = strings.Replace(p, ": ", ":", 1)
2017-09-20 13:30:46 +10:00
if strings.Contains(p, "#") {
c := p[len(p)-6:]
if c[0] == c[1] && c[2] == c[3] && c[4] == c[5] {
p = p[:len(p)-6] + c[0:1] + c[2:3] + c[4:5]
}
}
out = append(out, p)
}
return strings.Join(out, ";")
}