mirror of
				https://github.com/alecthomas/chroma.git
				synced 2025-10-30 23:57:49 +02:00 
			
		
		
		
	Tokens by value (#187)
This results in about a 8% improvement in speed.
This commit is contained in:
		
				
					committed by
					
						 Alec Thomas
						Alec Thomas
					
				
			
			
				
	
			
			
			
						parent
						
							5a473179cf
						
					
				
				
					commit
					9c3abeae1d
				
			| @@ -259,7 +259,7 @@ func format(w io.Writer, style *chroma.Style, it chroma.Iterator) { | ||||
|  | ||||
| func check(filename string, it chroma.Iterator) { | ||||
| 	line, col := 1, 0 | ||||
| 	for token := it(); token != nil; token = it() { | ||||
| 	for token := it(); token != chroma.EOF; token = it() { | ||||
| 		if token.Type == chroma.Error { | ||||
| 			fmt.Printf("%s:%d:%d %q\n", filename, line, col, token.String()) | ||||
| 		} | ||||
|   | ||||
							
								
								
									
										10
									
								
								coalesce.go
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								coalesce.go
									
									
									
									
									
								
							| @@ -6,17 +6,17 @@ func Coalesce(lexer Lexer) Lexer { return &coalescer{lexer} } | ||||
| type coalescer struct{ Lexer } | ||||
|  | ||||
| func (d *coalescer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { | ||||
| 	var prev *Token | ||||
| 	var prev Token | ||||
| 	it, err := d.Lexer.Tokenise(options, text) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return func() *Token { | ||||
| 		for token := it(); token != nil; token = it() { | ||||
| 	return func() Token { | ||||
| 		for token := it(); token != (EOF); token = it() { | ||||
| 			if len(token.Value) == 0 { | ||||
| 				continue | ||||
| 			} | ||||
| 			if prev == nil { | ||||
| 			if prev == EOF { | ||||
| 				prev = token | ||||
| 			} else { | ||||
| 				if prev.Type == token.Type && len(prev.Value) < 8192 { | ||||
| @@ -29,7 +29,7 @@ func (d *coalescer) Tokenise(options *TokeniseOptions, text string) (Iterator, e | ||||
| 			} | ||||
| 		} | ||||
| 		out := prev | ||||
| 		prev = nil | ||||
| 		prev = EOF | ||||
| 		return out | ||||
| 	}, nil | ||||
| } | ||||
|   | ||||
| @@ -14,6 +14,6 @@ func TestCoalesce(t *testing.T) { | ||||
| 	})) | ||||
| 	actual, err := Tokenise(lexer, nil, "!@#$") | ||||
| 	assert.NoError(t, err) | ||||
| 	expected := []*Token{{Punctuation, "!@#$"}} | ||||
| 	expected := []Token{{Punctuation, "!@#$"}} | ||||
| 	assert.Equal(t, expected, actual) | ||||
| } | ||||
|   | ||||
							
								
								
									
										34
									
								
								delegate.go
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								delegate.go
									
									
									
									
									
								
							| @@ -31,7 +31,7 @@ func (d *delegatingLexer) Config() *Config { | ||||
| // An insertion is the character range where language tokens should be inserted. | ||||
| type insertion struct { | ||||
| 	start, end int | ||||
| 	tokens     []*Token | ||||
| 	tokens     []Token | ||||
| } | ||||
|  | ||||
| func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { | ||||
| @@ -44,15 +44,15 @@ func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Itera | ||||
| 	insertions := []*insertion{} | ||||
| 	var insert *insertion | ||||
| 	offset := 0 | ||||
| 	var last *Token | ||||
| 	var last Token | ||||
| 	for _, t := range tokens { | ||||
| 		if t.Type == Other { | ||||
| 			if last != nil && insert != nil && last.Type != Other { | ||||
| 			if last != EOF && insert != nil && last.Type != Other { | ||||
| 				insert.end = offset | ||||
| 			} | ||||
| 			others.WriteString(t.Value) | ||||
| 		} else { | ||||
| 			if last == nil || last.Type == Other { | ||||
| 			if last == EOF || last.Type == Other { | ||||
| 				insert = &insertion{start: offset} | ||||
| 				insertions = append(insertions, insert) | ||||
| 			} | ||||
| @@ -73,12 +73,12 @@ func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Itera | ||||
| 	} | ||||
|  | ||||
| 	// Interleave the two sets of tokens. | ||||
| 	out := []*Token{} | ||||
| 	var out []Token | ||||
| 	offset = 0 // Offset into text. | ||||
| 	tokenIndex := 0 | ||||
| 	nextToken := func() *Token { | ||||
| 	nextToken := func() Token { | ||||
| 		if tokenIndex >= len(rootTokens) { | ||||
| 			return nil | ||||
| 			return EOF | ||||
| 		} | ||||
| 		t := rootTokens[tokenIndex] | ||||
| 		tokenIndex++ | ||||
| @@ -95,18 +95,18 @@ func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Itera | ||||
| 	} | ||||
| 	t := nextToken() | ||||
| 	i := nextInsertion() | ||||
| 	for t != nil || i != nil { | ||||
| 	for t != EOF || i != nil { | ||||
| 		// fmt.Printf("%d->%d:%q   %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...)) | ||||
| 		if t == nil || (i != nil && i.start < offset+len(t.Value)) { | ||||
| 			var l *Token | ||||
| 		if t == EOF || (i != nil && i.start < offset+len(t.Value)) { | ||||
| 			var l Token | ||||
| 			l, t = splitToken(t, i.start-offset) | ||||
| 			if l != nil { | ||||
| 			if l != EOF { | ||||
| 				out = append(out, l) | ||||
| 				offset += len(l.Value) | ||||
| 			} | ||||
| 			out = append(out, i.tokens...) | ||||
| 			offset += i.end - i.start | ||||
| 			if t == nil { | ||||
| 			if t == EOF { | ||||
| 				t = nextToken() | ||||
| 			} | ||||
| 			i = nextInsertion() | ||||
| @@ -119,15 +119,15 @@ func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Itera | ||||
| 	return Literator(out...), nil | ||||
| } | ||||
|  | ||||
| func splitToken(t *Token, offset int) (l *Token, r *Token) { | ||||
| 	if t == nil { | ||||
| 		return nil, nil | ||||
| func splitToken(t Token, offset int) (l Token, r Token) { | ||||
| 	if t == EOF { | ||||
| 		return EOF, EOF | ||||
| 	} | ||||
| 	if offset == 0 { | ||||
| 		return nil, t | ||||
| 		return EOF, t | ||||
| 	} | ||||
| 	if offset == len(t.Value) { | ||||
| 		return t, nil | ||||
| 		return t, EOF | ||||
| 	} | ||||
| 	l = t.Clone() | ||||
| 	r = t.Clone() | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| package chroma | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/alecthomas/assert" | ||||
| @@ -31,9 +32,9 @@ func TestDelegate(t *testing.T) { | ||||
| 	testdata := []struct { | ||||
| 		name     string | ||||
| 		source   string | ||||
| 		expected []*Token | ||||
| 		expected []Token | ||||
| 	}{ | ||||
| 		{"SourceInMiddle", `hello world <? what ?> there`, []*Token{ | ||||
| 		{"SourceInMiddle", `hello world <? what ?> there`, []Token{ | ||||
| 			{Keyword, "hello"}, | ||||
| 			{TextWhitespace, " "}, | ||||
| 			{Name, "world"}, | ||||
| @@ -48,7 +49,7 @@ func TestDelegate(t *testing.T) { | ||||
| 			{TextWhitespace, " "}, | ||||
| 			{Name, "there"}, | ||||
| 		}}, | ||||
| 		{"SourceBeginning", `<? what ?> hello world there`, []*Token{ | ||||
| 		{"SourceBeginning", `<? what ?> hello world there`, []Token{ | ||||
| 			{CommentPreproc, "<?"}, | ||||
| 			{TextWhitespace, " "}, | ||||
| 			{Keyword, "what"}, | ||||
| @@ -61,7 +62,7 @@ func TestDelegate(t *testing.T) { | ||||
| 			{TextWhitespace, " "}, | ||||
| 			{Name, "there"}, | ||||
| 		}}, | ||||
| 		{"SourceEnd", `hello world <? what there`, []*Token{ | ||||
| 		{"SourceEnd", `hello world <? what there`, []Token{ | ||||
| 			{Keyword, "hello"}, | ||||
| 			{TextWhitespace, " "}, | ||||
| 			{Name, "world"}, | ||||
| @@ -73,7 +74,7 @@ func TestDelegate(t *testing.T) { | ||||
| 			{TextWhitespace, " "}, | ||||
| 			{Error, "there"}, | ||||
| 		}}, | ||||
| 		{"SourceMultiple", "hello world <? what ?> hello there <? what ?> hello", []*Token{ | ||||
| 		{"SourceMultiple", "hello world <? what ?> hello there <? what ?> hello", []Token{ | ||||
| 			{Keyword, "hello"}, | ||||
| 			{TextWhitespace, " "}, | ||||
| 			{Name, "world"}, | ||||
| @@ -104,6 +105,7 @@ func TestDelegate(t *testing.T) { | ||||
| 			it, err := delegate.Tokenise(nil, test.source) | ||||
| 			assert.NoError(t, err) | ||||
| 			actual := it.Tokens() | ||||
| 			fmt.Println(actual) | ||||
| 			assert.Equal(t, test.expected, actual) | ||||
| 		}) | ||||
| 	} | ||||
|   | ||||
| @@ -11,7 +11,7 @@ import ( | ||||
| var ( | ||||
| 	// NoOp formatter. | ||||
| 	NoOp = Register("noop", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, iterator chroma.Iterator) error { | ||||
| 		for t := iterator(); t != nil; t = iterator() { | ||||
| 		for t := iterator(); t != chroma.EOF; t = iterator() { | ||||
| 			if _, err := io.WriteString(w, t.Value); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|   | ||||
| @@ -129,7 +129,7 @@ func (f *Formatter) restyle(style *chroma.Style) (*chroma.Style, error) { | ||||
| // We deliberately don't use html/template here because it is two orders of magnitude slower (benchmarked). | ||||
| // | ||||
| // OTOH we need to be super careful about correct escaping... | ||||
| func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []*chroma.Token) (err error) { // nolint: gocyclo | ||||
| func (f *Formatter) writeHTML(w io.Writer, style *chroma.Style, tokens []chroma.Token) (err error) { // nolint: gocyclo | ||||
| 	style, err = f.restyle(style) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| @@ -391,8 +391,8 @@ func compressStyle(s string) string { | ||||
| 	return strings.Join(out, ";") | ||||
| } | ||||
|  | ||||
| func splitTokensIntoLines(tokens []*chroma.Token) (out [][]*chroma.Token) { | ||||
| 	line := []*chroma.Token{} | ||||
| func splitTokensIntoLines(tokens []chroma.Token) (out [][]chroma.Token) { | ||||
| 	var line []chroma.Token | ||||
| 	for _, token := range tokens { | ||||
| 		for strings.Contains(token.Value, "\n") { | ||||
| 			parts := strings.SplitAfterN(token.Value, "\n", 2) | ||||
|   | ||||
| @@ -32,11 +32,11 @@ func BenchmarkHTMLFormatter(b *testing.B) { | ||||
| } | ||||
|  | ||||
| func TestSplitTokensIntoLines(t *testing.T) { | ||||
| 	in := []*chroma.Token{ | ||||
| 	in := []chroma.Token{ | ||||
| 		{Value: "hello", Type: chroma.NameKeyword}, | ||||
| 		{Value: " world\nwhat?\n", Type: chroma.NameKeyword}, | ||||
| 	} | ||||
| 	expected := [][]*chroma.Token{ | ||||
| 	expected := [][]chroma.Token{ | ||||
| 		{ | ||||
| 			{Type: chroma.NameKeyword, Value: "hello"}, | ||||
| 			{Type: chroma.NameKeyword, Value: " world\n"}, | ||||
| @@ -53,7 +53,7 @@ func TestSplitTokensIntoLines(t *testing.T) { | ||||
| } | ||||
|  | ||||
| func TestIteratorPanicRecovery(t *testing.T) { | ||||
| 	it := func() *chroma.Token { | ||||
| 	it := func() chroma.Token { | ||||
| 		panic(errors.New("bad")) | ||||
| 	} | ||||
| 	err := New().Format(ioutil.Discard, styles.Fallback, it) | ||||
|   | ||||
| @@ -12,7 +12,7 @@ import ( | ||||
| var JSON = Register("json", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, it chroma.Iterator) error { | ||||
| 	fmt.Fprintln(w, "[") | ||||
| 	i := 0 | ||||
| 	for t := it(); t != nil; t = it() { | ||||
| 	for t := it(); t != chroma.EOF; t = it() { | ||||
| 		if i > 0 { | ||||
| 			fmt.Fprintln(w, ",") | ||||
| 		} | ||||
|   | ||||
| @@ -9,7 +9,7 @@ import ( | ||||
|  | ||||
| // Tokens formatter outputs the raw token structures. | ||||
| var Tokens = Register("tokens", chroma.FormatterFunc(func(w io.Writer, s *chroma.Style, it chroma.Iterator) error { | ||||
| 	for t := it(); t != nil; t = it() { | ||||
| 	for t := it(); t != chroma.EOF; t = it() { | ||||
| 		if _, err := fmt.Fprintln(w, t.GoString()); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|   | ||||
| @@ -216,7 +216,7 @@ func (c *indexedTTYFormatter) Format(w io.Writer, style *chroma.Style, it chroma | ||||
| 		} | ||||
| 	}() | ||||
| 	theme := styleToEscapeSequence(c.table, style) | ||||
| 	for token := it(); token != nil; token = it() { | ||||
| 	for token := it(); token != chroma.EOF; token = it() { | ||||
| 		// TODO: Cache token lookups? | ||||
| 		clr, ok := theme[token.Type] | ||||
| 		if !ok { | ||||
|   | ||||
| @@ -11,7 +11,7 @@ import ( | ||||
| var TTY16m = Register("terminal16m", chroma.FormatterFunc(trueColourFormatter)) | ||||
|  | ||||
| func trueColourFormatter(w io.Writer, style *chroma.Style, it chroma.Iterator) error { | ||||
| 	for token := it(); token != nil; token = it() { | ||||
| 	for token := it(); token != chroma.EOF; token = it() { | ||||
| 		entry := style.Get(token.Type) | ||||
| 		if !entry.IsZero() { | ||||
| 			out := "" | ||||
|   | ||||
							
								
								
									
										20
									
								
								iterator.go
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								iterator.go
									
									
									
									
									
								
							| @@ -5,12 +5,12 @@ package chroma | ||||
| // nil will be returned at the end of the Token stream. | ||||
| // | ||||
| // If an error occurs within an Iterator, it may propagate this in a panic. Formatters should recover. | ||||
| type Iterator func() *Token | ||||
| type Iterator func() Token | ||||
|  | ||||
| // Tokens consumes all tokens from the iterator and returns them as a slice. | ||||
| func (i Iterator) Tokens() []*Token { | ||||
| 	out := []*Token{} | ||||
| 	for t := i(); t != nil; t = i() { | ||||
| func (i Iterator) Tokens() []Token { | ||||
| 	var out []Token | ||||
| 	for t := i(); t != EOF; t = i() { | ||||
| 		out = append(out, t) | ||||
| 	} | ||||
| 	return out | ||||
| @@ -18,23 +18,23 @@ func (i Iterator) Tokens() []*Token { | ||||
|  | ||||
| // Concaterator concatenates tokens from a series of iterators. | ||||
| func Concaterator(iterators ...Iterator) Iterator { | ||||
| 	return func() *Token { | ||||
| 	return func() Token { | ||||
| 		for len(iterators) > 0 { | ||||
| 			t := iterators[0]() | ||||
| 			if t != nil { | ||||
| 			if t != EOF { | ||||
| 				return t | ||||
| 			} | ||||
| 			iterators = iterators[1:] | ||||
| 		} | ||||
| 		return nil | ||||
| 		return EOF | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Literator converts a sequence of literal Tokens into an Iterator. | ||||
| func Literator(tokens ...*Token) Iterator { | ||||
| 	return func() (out *Token) { | ||||
| func Literator(tokens ...Token) Iterator { | ||||
| 	return func() Token { | ||||
| 		if len(tokens) == 0 { | ||||
| 			return nil | ||||
| 			return EOF | ||||
| 		} | ||||
| 		token := tokens[0] | ||||
| 		tokens = tokens[1:] | ||||
|   | ||||
							
								
								
									
										8
									
								
								lexer.go
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								lexer.go
									
									
									
									
									
								
							| @@ -66,12 +66,12 @@ type Token struct { | ||||
| func (t *Token) String() string   { return t.Value } | ||||
| func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) } | ||||
|  | ||||
| func (t *Token) Clone() *Token { | ||||
| 	clone := &Token{} | ||||
| 	*clone = *t | ||||
| 	return clone | ||||
| func (t *Token) Clone() Token { | ||||
| 	return *t | ||||
| } | ||||
|  | ||||
| var EOF Token | ||||
|  | ||||
| type TokeniseOptions struct { | ||||
| 	// State to start tokenisation in. Defaults to "root". | ||||
| 	State string | ||||
|   | ||||
| @@ -35,7 +35,7 @@ func TestSimpleLexer(t *testing.T) { | ||||
| 	a = 10 | ||||
| `) | ||||
| 	assert.NoError(t, err) | ||||
| 	expected := []*Token{ | ||||
| 	expected := []Token{ | ||||
| 		{Whitespace, "\n\t"}, | ||||
| 		{Comment, "; this is a comment"}, | ||||
| 		{Whitespace, "\n\t"}, | ||||
|   | ||||
| @@ -34,7 +34,7 @@ var HTTP = internal.Register(httpBodyContentTypeLexer(MustNewLexer( | ||||
| ))) | ||||
|  | ||||
| func httpContentBlock(groups []string, lexer Lexer) Iterator { | ||||
| 	tokens := []*Token{ | ||||
| 	tokens := []Token{ | ||||
| 		{Generic, groups[0]}, | ||||
| 	} | ||||
| 	return Literator(tokens...) | ||||
| @@ -42,7 +42,7 @@ func httpContentBlock(groups []string, lexer Lexer) Iterator { | ||||
| } | ||||
|  | ||||
| func httpHeaderBlock(groups []string, lexer Lexer) Iterator { | ||||
| 	tokens := []*Token{ | ||||
| 	tokens := []Token{ | ||||
| 		{Name, groups[1]}, | ||||
| 		{Text, groups[2]}, | ||||
| 		{Operator, groups[3]}, | ||||
| @@ -54,7 +54,7 @@ func httpHeaderBlock(groups []string, lexer Lexer) Iterator { | ||||
| } | ||||
|  | ||||
| func httpContinuousHeaderBlock(groups []string, lexer Lexer) Iterator { | ||||
| 	tokens := []*Token{ | ||||
| 	tokens := []Token{ | ||||
| 		{Text, groups[1]}, | ||||
| 		{Literal, groups[2]}, | ||||
| 		{Text, groups[3]}, | ||||
| @@ -76,8 +76,8 @@ func (d *httpBodyContentTyper) Tokenise(options *TokeniseOptions, text string) ( | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	return func() *Token { | ||||
| 		for token := it(); token != nil; token = it() { | ||||
| 	return func() Token { | ||||
| 		for token := it(); token != EOF; token = it() { | ||||
| 			switch { | ||||
| 			case token.Type == Name && strings.ToLower(token.Value) == "content-type": | ||||
| 				{ | ||||
| @@ -112,7 +112,7 @@ func (d *httpBodyContentTyper) Tokenise(options *TokeniseOptions, text string) ( | ||||
| 						if err != nil { | ||||
| 							panic(err) | ||||
| 						} | ||||
| 						return nil | ||||
| 						return EOF | ||||
| 					} | ||||
| 				} | ||||
|  | ||||
| @@ -122,11 +122,11 @@ func (d *httpBodyContentTyper) Tokenise(options *TokeniseOptions, text string) ( | ||||
| 		} | ||||
|  | ||||
| 		if subIterator != nil { | ||||
| 			for token := subIterator(); token != nil; token = subIterator() { | ||||
| 			for token := subIterator(); token != EOF; token = subIterator() { | ||||
| 				return token | ||||
| 			} | ||||
| 		} | ||||
| 		return nil | ||||
| 		return EOF | ||||
|  | ||||
| 	}, nil | ||||
| } | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -65,7 +65,7 @@ func TestLexers(t *testing.T) { | ||||
| 			assert.NoError(t, err) | ||||
|  | ||||
| 			// Read expected JSON into token slice. | ||||
| 			expected := []*chroma.Token{} | ||||
| 			var expected []chroma.Token | ||||
| 			r, err := os.Open(expectedFilename) | ||||
| 			assert.NoError(t, err) | ||||
| 			err = json.NewDecoder(r).Decode(&expected) | ||||
|   | ||||
| @@ -61,7 +61,7 @@ var Restructuredtext = internal.Register(MustNewLexer( | ||||
|  | ||||
| func rstCodeBlock(groups []string, lexer Lexer) Iterator { | ||||
| 	iterators := []Iterator{} | ||||
| 	tokens := []*Token{ | ||||
| 	tokens := []Token{ | ||||
| 		{Punctuation, groups[1]}, | ||||
| 		{Text, groups[2]}, | ||||
| 		{OperatorWord, groups[3]}, | ||||
| @@ -73,7 +73,7 @@ func rstCodeBlock(groups []string, lexer Lexer) Iterator { | ||||
| 	code := strings.Join(groups[8:], "") | ||||
| 	lexer = internal.Get(groups[6]) | ||||
| 	if lexer == nil { | ||||
| 		tokens = append(tokens, &Token{String, code}) | ||||
| 		tokens = append(tokens, Token{String, code}) | ||||
| 		iterators = append(iterators, Literator(tokens...)) | ||||
| 	} else { | ||||
| 		sub, err := lexer.Tokenise(nil, code) | ||||
|   | ||||
| @@ -122,7 +122,7 @@ func Default(mutators ...Mutator) Rule { | ||||
| } | ||||
|  | ||||
| // Stringify returns the raw string for a set of tokens. | ||||
| func Stringify(tokens ...*Token) string { | ||||
| func Stringify(tokens ...Token) string { | ||||
| 	out := []string{} | ||||
| 	for _, t := range tokens { | ||||
| 		out = append(out, t.Value) | ||||
|   | ||||
| @@ -52,6 +52,6 @@ func TestCombine(t *testing.T) { | ||||
| 	}) | ||||
| 	it, err := l.Tokenise(nil, "hello world") | ||||
| 	assert.NoError(t, err) | ||||
| 	expected := []*Token{{String, `hello`}, {Whitespace, ` `}, {Name, `world`}} | ||||
| 	expected := []Token{{String, `hello`}, {Whitespace, ` `}, {Name, `world`}} | ||||
| 	assert.Equal(t, expected, it.Tokens()) | ||||
| } | ||||
|   | ||||
							
								
								
									
										18
									
								
								regexp.go
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								regexp.go
									
									
									
									
									
								
							| @@ -140,13 +140,13 @@ func Words(prefix, suffix string, words ...string) string { | ||||
| } | ||||
|  | ||||
| // Tokenise text using lexer, returning tokens as a slice. | ||||
| func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]*Token, error) { | ||||
| 	out := []*Token{} | ||||
| func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]Token, error) { | ||||
| 	var out []Token | ||||
| 	it, err := lexer.Tokenise(options, text) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	for t := it(); t != nil; t = it() { | ||||
| 	for t := it(); t != EOF; t = it() { | ||||
| 		out = append(out, t) | ||||
| 	} | ||||
| 	return out, nil | ||||
| @@ -246,13 +246,13 @@ func (l *LexerState) Get(key interface{}) interface{} { | ||||
| 	return l.MutatorContext[key] | ||||
| } | ||||
|  | ||||
| func (l *LexerState) Iterator() *Token { | ||||
| func (l *LexerState) Iterator() Token { | ||||
| 	for l.Pos < len(l.Text) && len(l.Stack) > 0 { | ||||
| 		// Exhaust the iterator stack, if any. | ||||
| 		for len(l.iteratorStack) > 0 { | ||||
| 			n := len(l.iteratorStack) - 1 | ||||
| 			t := l.iteratorStack[n]() | ||||
| 			if t == nil { | ||||
| 			if t == EOF { | ||||
| 				l.iteratorStack = l.iteratorStack[:n] | ||||
| 				continue | ||||
| 			} | ||||
| @@ -271,7 +271,7 @@ func (l *LexerState) Iterator() *Token { | ||||
| 		// No match. | ||||
| 		if groups == nil { | ||||
| 			l.Pos++ | ||||
| 			return &Token{Error, string(l.Text[l.Pos-1 : l.Pos])} | ||||
| 			return Token{Error, string(l.Text[l.Pos-1 : l.Pos])} | ||||
| 		} | ||||
| 		l.Rule = ruleIndex | ||||
| 		l.Groups = groups | ||||
| @@ -290,7 +290,7 @@ func (l *LexerState) Iterator() *Token { | ||||
| 	for len(l.iteratorStack) > 0 { | ||||
| 		n := len(l.iteratorStack) - 1 | ||||
| 		t := l.iteratorStack[n]() | ||||
| 		if t == nil { | ||||
| 		if t == EOF { | ||||
| 			l.iteratorStack = l.iteratorStack[:n] | ||||
| 			continue | ||||
| 		} | ||||
| @@ -301,9 +301,9 @@ func (l *LexerState) Iterator() *Token { | ||||
| 	if l.Pos != len(l.Text) && len(l.Stack) == 0 { | ||||
| 		value := string(l.Text[l.Pos:]) | ||||
| 		l.Pos = len(l.Text) | ||||
| 		return &Token{Type: Error, Value: value} | ||||
| 		return Token{Type: Error, Value: value} | ||||
| 	} | ||||
| 	return nil | ||||
| 	return EOF | ||||
| } | ||||
|  | ||||
| type RegexLexer struct { | ||||
|   | ||||
| @@ -14,7 +14,7 @@ func TestNewlineAtEndOfFile(t *testing.T) { | ||||
| 	})) | ||||
| 	it, err := l.Tokenise(nil, `hello`) | ||||
| 	assert.NoError(t, err) | ||||
| 	assert.Equal(t, []*Token{{Keyword, "hello"}, {Whitespace, "\n"}}, it.Tokens()) | ||||
| 	assert.Equal(t, []Token{{Keyword, "hello"}, {Whitespace, "\n"}}, it.Tokens()) | ||||
|  | ||||
| 	l = Coalesce(MustNewLexer(nil, Rules{ | ||||
| 		"root": { | ||||
| @@ -23,5 +23,5 @@ func TestNewlineAtEndOfFile(t *testing.T) { | ||||
| 	})) | ||||
| 	it, err = l.Tokenise(nil, `hello`) | ||||
| 	assert.NoError(t, err) | ||||
| 	assert.Equal(t, []*Token{{Error, "hello"}}, it.Tokens()) | ||||
| 	assert.Equal(t, []Token{{Error, "hello"}}, it.Tokens()) | ||||
| } | ||||
|   | ||||
							
								
								
									
										14
									
								
								remap.go
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								remap.go
									
									
									
									
									
								
							| @@ -2,11 +2,11 @@ package chroma | ||||
|  | ||||
| type remappingLexer struct { | ||||
| 	lexer  Lexer | ||||
| 	mapper func(*Token) []*Token | ||||
| 	mapper func(Token) []Token | ||||
| } | ||||
|  | ||||
| // RemappingLexer remaps a token to a set of, potentially empty, tokens. | ||||
| func RemappingLexer(lexer Lexer, mapper func(*Token) []*Token) Lexer { | ||||
| func RemappingLexer(lexer Lexer, mapper func(Token) []Token) Lexer { | ||||
| 	return &remappingLexer{lexer, mapper} | ||||
| } | ||||
|  | ||||
| @@ -19,8 +19,8 @@ func (r *remappingLexer) Tokenise(options *TokeniseOptions, text string) (Iterat | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	buffer := []*Token{} | ||||
| 	return func() *Token { | ||||
| 	var buffer []Token | ||||
| 	return func() Token { | ||||
| 		for { | ||||
| 			if len(buffer) > 0 { | ||||
| 				t := buffer[0] | ||||
| @@ -28,7 +28,7 @@ func (r *remappingLexer) Tokenise(options *TokeniseOptions, text string) (Iterat | ||||
| 				return t | ||||
| 			} | ||||
| 			t := it() | ||||
| 			if t == nil { | ||||
| 			if t == EOF { | ||||
| 				return t | ||||
| 			} | ||||
| 			buffer = r.mapper(t) | ||||
| @@ -67,7 +67,7 @@ func TypeRemappingLexer(lexer Lexer, mapping TypeMapping) Lexer { | ||||
| 		} | ||||
|  | ||||
| 	} | ||||
| 	return RemappingLexer(lexer, func(t *Token) []*Token { | ||||
| 	return RemappingLexer(lexer, func(t Token) []Token { | ||||
| 		if k, ok := lut[t.Type]; ok { | ||||
| 			if tt, ok := k[t.Value]; ok { | ||||
| 				t.Type = tt | ||||
| @@ -75,6 +75,6 @@ func TypeRemappingLexer(lexer Lexer, mapping TypeMapping) Lexer { | ||||
| 				t.Type = tt | ||||
| 			} | ||||
| 		} | ||||
| 		return []*Token{t} | ||||
| 		return []Token{t} | ||||
| 	}) | ||||
| } | ||||
|   | ||||
| @@ -19,7 +19,7 @@ func TestRemappingLexer(t *testing.T) { | ||||
|  | ||||
| 	it, err := lexer.Tokenise(nil, `if true then print else end`) | ||||
| 	assert.NoError(t, err) | ||||
| 	expected := []*Token{ | ||||
| 	expected := []Token{ | ||||
| 		{Keyword, "if"}, {TextWhitespace, " "}, {Name, "true"}, {TextWhitespace, " "}, {Name, "then"}, | ||||
| 		{TextWhitespace, " "}, {Name, "print"}, {TextWhitespace, " "}, {Keyword, "else"}, | ||||
| 		{TextWhitespace, " "}, {Name, "end"}, | ||||
|   | ||||
							
								
								
									
										4
									
								
								types.go
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								types.go
									
									
									
									
									
								
							| @@ -54,6 +54,8 @@ const ( | ||||
| 	Other | ||||
| 	// No highlighting. | ||||
| 	None | ||||
| 	// Used as an EOF marker / nil token | ||||
| 	EOFType TokenType = 0 | ||||
| ) | ||||
|  | ||||
| // Keywords. | ||||
| @@ -341,5 +343,5 @@ func (t TokenType) InSubCategory(other TokenType) bool { | ||||
| } | ||||
|  | ||||
| func (t TokenType) Emit(groups []string, lexer Lexer) Iterator { | ||||
| 	return Literator(&Token{Type: t, Value: groups[0]}) | ||||
| 	return Literator(Token{Type: t, Value: groups[0]}) | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user