From f475967a4a46b6b079e85b07a5281be281aa53cf Mon Sep 17 00:00:00 2001 From: Gani Georgiev <gani.georgiev@gmail.com> Date: Mon, 6 Feb 2023 16:30:47 +0200 Subject: [PATCH] updated default tokenizer separators --- tools/tokenizer/tokenizer.go | 4 ++-- tools/tokenizer/tokenizer_test.go | 36 +++++++++++++++---------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/tokenizer/tokenizer.go b/tools/tokenizer/tokenizer.go index b9fbc09b..31e76513 100644 --- a/tools/tokenizer/tokenizer.go +++ b/tools/tokenizer/tokenizer.go @@ -19,7 +19,7 @@ import ( const eof = rune(0) // DefaultSeparators is a list with the default token separator characters. -var DefaultSeparators = []rune{',', ' ', '\t', '\n'} +var DefaultSeparators = []rune{','} // NewFromString creates new Tokenizer from the provided string. func NewFromString(str string) *Tokenizer { @@ -31,7 +31,7 @@ func NewFromBytes(b []byte) *Tokenizer { return New(bytes.NewReader(b)) } -// New creates new Tokenizer from the provided reader. +// New creates new Tokenizer from the provided reader with DefaultSeparators. func New(r io.Reader) *Tokenizer { return &Tokenizer{ r: bufio.NewReader(r), diff --git a/tools/tokenizer/tokenizer_test.go b/tools/tokenizer/tokenizer_test.go index 134ae2d8..8bf21084 100644 --- a/tools/tokenizer/tokenizer_test.go +++ b/tools/tokenizer/tokenizer_test.go @@ -54,7 +54,7 @@ func TestFactories(t *testing.T) { } func TestScan(t *testing.T) { - tk := NewFromString("abc 123.456 (abc)") + tk := NewFromString("abc, 123.456, (abc)") expectedTokens := []string{"abc", "123.456", "(abc)"} @@ -79,7 +79,7 @@ func TestScan(t *testing.T) { } } -func TestScanAllWithDefaultSeparators(t *testing.T) { +func TestScanAll(t *testing.T) { scenarios := []struct { name string content string @@ -119,10 +119,24 @@ func TestScanAllWithDefaultSeparators(t *testing.T) { }, { "default separators", + `a, b, c, d e, "a,b, c ", (123, 456)`, + DefaultSeparators, + false, + []string{ + "a", + "b", + "c", + "d e", + `"a,b, c "`, + `(123, 456)`, + }, + }, + { + "custom separators", ` a , 123.456, b, c d, ( test (a,b,c) " 123 " ),"(abc d", "abc) d", "(abc) d \" " 'abc "'`, - DefaultSeparators, + []rune{',', ' ', '\t', '\n'}, false, []string{ "a", @@ -137,20 +151,6 @@ func TestScanAllWithDefaultSeparators(t *testing.T) { `'abc "'`, }, }, - { - "custom separators", - `a, b, c, d e, "a,b, c ", (123, 456)`, - []rune{','}, - false, - []string{ - "a", - "b", - "c", - "d e", - `"a,b, c "`, - `(123, 456)`, - }, - }, } for _, s := range scenarios { @@ -166,7 +166,7 @@ func TestScanAllWithDefaultSeparators(t *testing.T) { } if len(tokens) != len(s.expectTokens) { - t.Fatalf("[%s] Expected \n%v, \ngot \n%v", s.name, s.expectTokens, tokens) + t.Fatalf("[%s] Expected \n%v (%d), \ngot \n%v (%d)", s.name, s.expectTokens, len(s.expectTokens), tokens, len(tokens)) } for _, tok := range tokens {