From e56590a81516215fc018dc470825984318f0984a Mon Sep 17 00:00:00 2001 From: Alec Thomas Date: Tue, 2 Jan 2018 14:53:25 +1100 Subject: [PATCH] Add data-driven test framework for lexers. See #68. --- cmd/chroma/main.go | 6 ++++ formatters/json.go | 31 +++++++++++++++++++ lexer.go | 4 +-- lexers/lexers_test.go | 54 ++++++++++++++++++++++++++++++++ lexers/testdata/README.md | 60 ++++++++++++++++++++++++++++++++++++ lexers/testdata/css.css | 3 ++ lexers/testdata/css.expected | 16 ++++++++++ types.go | 21 +++++++++++++ 8 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 formatters/json.go create mode 100644 lexers/lexers_test.go create mode 100644 lexers/testdata/README.md create mode 100644 lexers/testdata/css.css create mode 100644 lexers/testdata/css.expected diff --git a/cmd/chroma/main.go b/cmd/chroma/main.go index 4f74614..1e1636b 100644 --- a/cmd/chroma/main.go +++ b/cmd/chroma/main.go @@ -41,6 +41,8 @@ var ( styleFlag = kingpin.Flag("style", "Style to use for formatting.").Short('s').Default("swapoff").Enum(styles.Names()...) formatterFlag = kingpin.Flag("formatter", "Formatter to use.").Default("terminal").Short('f').Enum(formatters.Names()...) + jsonFlag = kingpin.Flag("json", "Output JSON representation of tokens.").Bool() + htmlFlag = kingpin.Flag("html", "Enable HTML mode (equivalent to '--formatter html').").Bool() htmlPrefixFlag = kingpin.Flag("html-prefix", "HTML CSS class prefix.").PlaceHolder("PREFIX").String() htmlStylesFlag = kingpin.Flag("html-styles", "Output HTML CSS styles.").Bool() @@ -103,6 +105,10 @@ command, for Go. } defer w.Flush() + if *jsonFlag { + *formatterFlag = "json" + } + if *htmlFlag { *formatterFlag = "html" } diff --git a/formatters/json.go b/formatters/json.go new file mode 100644 index 0000000..4ba1dd4 --- /dev/null +++ b/formatters/json.go @@ -0,0 +1,31 @@ +package formatters + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/alecthomas/chroma" +) + +// JSON formatter outputs the raw token structures as JSON. +var JSON = Register("json", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, it chroma.Iterator) error { + fmt.Fprintln(w, "[") + i := 0 + for t := it(); t != nil; t = it() { + if i > 0 { + fmt.Fprintln(w, ",") + } + i++ + bytes, err := json.Marshal(t) + if err != nil { + return err + } + if _, err := fmt.Fprint(w, " "+string(bytes)); err != nil { + return err + } + } + fmt.Fprintln(w) + fmt.Fprintln(w, "]") + return nil +})) diff --git a/lexer.go b/lexer.go index d9784e8..fbf4233 100644 --- a/lexer.go +++ b/lexer.go @@ -59,8 +59,8 @@ type Config struct { // Token output to formatter. type Token struct { - Type TokenType - Value string + Type TokenType `json:"type"` + Value string `json:"value"` } func (t *Token) String() string { return t.Value } diff --git a/lexers/lexers_test.go b/lexers/lexers_test.go new file mode 100644 index 0000000..94ed31f --- /dev/null +++ b/lexers/lexers_test.go @@ -0,0 +1,54 @@ +package lexers + +import ( + "encoding/json" + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/alecthomas/chroma" +) + +// Test source files are in the form . and validation data is in the form ..expected. +func TestLexers(t *testing.T) { + for _, lexer := range Registry.Lexers { + name := strings.ToLower(lexer.Config().Name) + filename := filepath.Join("testdata", name+"."+name) + expectedFilename := filepath.Join("testdata", name+".expected") + if _, err := os.Stat(filename); err != nil { + continue + } + if !assert.NotNil(t, lexer) { + continue + } + t.Run(lexer.Config().Name, func(t *testing.T) { + // Read and tokenise source text. + actualText, err := ioutil.ReadFile(filename) + if !assert.NoError(t, err) { + return + } + actual, err := chroma.Tokenise(lexer, nil, string(actualText)) + if !assert.NoError(t, err) { + return + } + + // Read expected JSON into token slice. + expected := []*chroma.Token{} + r, err := os.Open(expectedFilename) + if !assert.NoError(t, err) { + return + } + err = json.NewDecoder(r).Decode(&expected) + if !assert.NoError(t, err) { + return + } + + // Equal? + assert.Equal(t, expected, actual) + }) + } +} diff --git a/lexers/testdata/README.md b/lexers/testdata/README.md new file mode 100644 index 0000000..b8392bb --- /dev/null +++ b/lexers/testdata/README.md @@ -0,0 +1,60 @@ +# Lexer tests + +This directory contains input source and expected output lexer tokens. + +Input filenames for lexers are in the form `.`. Expected output filenames are in the form `.expected`. + +Each input filename is parsed by the corresponding lexer and checked against the expected JSON-encoded token list. + + +To add/update tests do the following: + +1. `export LEXER=css` +1. Create/edit a file `lexers/testdata/${LEXER}.${LEXER}` (eg. `css.css`). +2. Run `go run ./cmd/chroma/main.go --lexer ${LEXER} --json lexers/testdata/${LEXER}.${LEXER} > lexers/testdata/${LEXER}.expected`. +3. Run `go test -v ./lexers`. + + +eg. + +```bash +$ export LEXER=css +$ go run ./cmd/chroma/main.go --lexer ${LEXER} --json lexers/testdata/${LEXER}.${LEXER} > lexers/testdata/${LEXER}.expected +$ cat lexers/testdata/${LEXER}.expected +[ + {"type":"Punctuation","value":":"}, + {"type":"NameDecorator","value":"root"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"{"}, + {"type":"Text","value":"\n "}, + {"type":"NameVariable","value":"--variable-name"}, + {"type":"Text","value":""}, + {"type":"Punctuation","value":":"}, + {"type":"Text","value":" "}, + {"type":"LiteralNumberHex","value":"#fff"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":"}"}, + {"type":"Text","value":"\n"} +] +$ go test -v ./lexers +=== RUN TestDiffLexerWithoutTralingNewLine +--- PASS: TestDiffLexerWithoutTralingNewLine (0.00s) +=== RUN TestLexers +=== RUN TestLexers/CSS +--- PASS: TestLexers (0.00s) + --- PASS: TestLexers/CSS (0.00s) +=== RUN TestCompileAllRegexes +--- PASS: TestCompileAllRegexes (0.61s) +=== RUN TestGet +=== RUN TestGet/ByName +=== RUN TestGet/ByAlias +=== RUN TestGet/ViaFilename +--- PASS: TestGet (0.00s) + --- PASS: TestGet/ByName (0.00s) + --- PASS: TestGet/ByAlias (0.00s) + --- PASS: TestGet/ViaFilename (0.00s) +PASS +ok github.com/alecthomas/chroma/lexers 0.649s +``` + diff --git a/lexers/testdata/css.css b/lexers/testdata/css.css new file mode 100644 index 0000000..f848915 --- /dev/null +++ b/lexers/testdata/css.css @@ -0,0 +1,3 @@ +:root { + --variable-name: #fff; +} diff --git a/lexers/testdata/css.expected b/lexers/testdata/css.expected new file mode 100644 index 0000000..5ad24d6 --- /dev/null +++ b/lexers/testdata/css.expected @@ -0,0 +1,16 @@ +[ + {"type":"Punctuation","value":":"}, + {"type":"NameDecorator","value":"root"}, + {"type":"Text","value":" "}, + {"type":"Punctuation","value":"{"}, + {"type":"Text","value":"\n "}, + {"type":"NameVariable","value":"--variable-name"}, + {"type":"Text","value":""}, + {"type":"Punctuation","value":":"}, + {"type":"Text","value":" "}, + {"type":"LiteralNumberHex","value":"#fff"}, + {"type":"Punctuation","value":";"}, + {"type":"Text","value":"\n"}, + {"type":"Punctuation","value":"}"}, + {"type":"Text","value":"\n"} +] diff --git a/types.go b/types.go index 7242688..a9eddd5 100644 --- a/types.go +++ b/types.go @@ -1,5 +1,10 @@ package chroma +import ( + "encoding/json" + "fmt" +) + //go:generate stringer -type TokenType // TokenType is the type of token to highlight. @@ -7,6 +12,22 @@ package chroma // It is also an Emitter, emitting a single token of itself type TokenType int +func (t *TokenType) MarshalJSON() ([]byte, error) { return json.Marshal(t.String()) } +func (t *TokenType) UnmarshalJSON(data []byte) error { + key := "" + err := json.Unmarshal(data, &key) + if err != nil { + return err + } + for tt, text := range _TokenType_map { + if text == key { + *t = tt + return nil + } + } + return fmt.Errorf("unknown TokenType %q", data) +} + // Set of TokenTypes. // // Categories of types are grouped in ranges of 1000, while sub-categories are in ranges of 100. For