mirror of
https://github.com/pocketbase/pocketbase.git
synced 2025-02-14 00:42:10 +02:00
added tokenizer.IgnoreParenthesis() to allow ignoring the parenthesis characters boundary checks
This commit is contained in:
parent
71f9be3cb0
commit
f3bcd7d3df
@ -34,9 +34,10 @@ func NewFromBytes(b []byte) *Tokenizer {
|
|||||||
// New creates new Tokenizer from the provided reader with DefaultSeparators.
|
// New creates new Tokenizer from the provided reader with DefaultSeparators.
|
||||||
func New(r io.Reader) *Tokenizer {
|
func New(r io.Reader) *Tokenizer {
|
||||||
return &Tokenizer{
|
return &Tokenizer{
|
||||||
r: bufio.NewReader(r),
|
r: bufio.NewReader(r),
|
||||||
separators: DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
keepSeparator: false,
|
keepSeparator: false,
|
||||||
|
ignoreParenthesis: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -45,54 +46,61 @@ func New(r io.Reader) *Tokenizer {
|
|||||||
type Tokenizer struct {
|
type Tokenizer struct {
|
||||||
r *bufio.Reader
|
r *bufio.Reader
|
||||||
|
|
||||||
separators []rune
|
separators []rune
|
||||||
keepSeparator bool
|
keepSeparator bool
|
||||||
|
ignoreParenthesis bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Separators defines the provided separatos of the current Tokenizer.
|
// Separators defines the provided separatos of the current Tokenizer.
|
||||||
func (s *Tokenizer) Separators(separators ...rune) {
|
func (t *Tokenizer) Separators(separators ...rune) {
|
||||||
s.separators = separators
|
t.separators = separators
|
||||||
}
|
}
|
||||||
|
|
||||||
// KeepSeparator defines whether to keep the separator rune as part
|
// KeepSeparator defines whether to keep the separator rune as part
|
||||||
// of the token (default to false).
|
// of the token (default to false).
|
||||||
func (s *Tokenizer) KeepSeparator(state bool) {
|
func (t *Tokenizer) KeepSeparator(state bool) {
|
||||||
s.keepSeparator = state
|
t.keepSeparator = state
|
||||||
|
}
|
||||||
|
|
||||||
|
// IgnoreParenthesis defines whether to ignore the parenthesis boundaries
|
||||||
|
// and to treat the '(' and ')' as regular characters.
|
||||||
|
func (t *Tokenizer) IgnoreParenthesis(state bool) {
|
||||||
|
t.ignoreParenthesis = state
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan reads and returns the next available token from the Tokenizer's buffer (trimmed).
|
// Scan reads and returns the next available token from the Tokenizer's buffer (trimmed).
|
||||||
//
|
//
|
||||||
// Returns [io.EOF] error when there are no more tokens to scan.
|
// Returns [io.EOF] error when there are no more tokens to scan.
|
||||||
func (s *Tokenizer) Scan() (string, error) {
|
func (t *Tokenizer) Scan() (string, error) {
|
||||||
ch := s.read()
|
ch := t.read()
|
||||||
|
|
||||||
if ch == eof {
|
if ch == eof {
|
||||||
return "", io.EOF
|
return "", io.EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
if isWhitespaceRune(ch) {
|
if isWhitespaceRune(ch) {
|
||||||
s.readWhiteSpaces()
|
t.readWhiteSpaces()
|
||||||
} else {
|
} else {
|
||||||
s.unread()
|
t.unread()
|
||||||
}
|
}
|
||||||
|
|
||||||
token, err := s.readToken()
|
token, err := t.readToken()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
// read all remaining whitespaces
|
// read all remaining whitespaces
|
||||||
s.readWhiteSpaces()
|
t.readWhiteSpaces()
|
||||||
|
|
||||||
return token, err
|
return token, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScanAll reads the entire Tokenizer's buffer and return all found tokens.
|
// ScanAll reads the entire Tokenizer's buffer and return all found tokens.
|
||||||
func (s *Tokenizer) ScanAll() ([]string, error) {
|
func (t *Tokenizer) ScanAll() ([]string, error) {
|
||||||
tokens := []string{}
|
tokens := []string{}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
token, err := s.Scan()
|
token, err := t.Scan()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
break
|
break
|
||||||
@ -108,35 +116,35 @@ func (s *Tokenizer) ScanAll() ([]string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// readToken reads a single token from the buffer and returns it.
|
// readToken reads a single token from the buffer and returns it.
|
||||||
func (s *Tokenizer) readToken() (string, error) {
|
func (t *Tokenizer) readToken() (string, error) {
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
var parenthesis int
|
var parenthesis int
|
||||||
var quoteCh rune
|
var quoteCh rune
|
||||||
var prevCh rune
|
var prevCh rune
|
||||||
|
|
||||||
for {
|
for {
|
||||||
ch := s.read()
|
ch := t.read()
|
||||||
|
|
||||||
if ch == eof {
|
if ch == eof {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
if !isEscapeRune(prevCh) {
|
if !isEscapeRune(prevCh) {
|
||||||
if ch == '(' && quoteCh == eof {
|
if !t.ignoreParenthesis && ch == '(' && quoteCh == eof {
|
||||||
parenthesis++
|
parenthesis++ // opening parenthesis
|
||||||
} else if ch == ')' && parenthesis > 0 && quoteCh == eof {
|
} else if !t.ignoreParenthesis && ch == ')' && parenthesis > 0 && quoteCh == eof {
|
||||||
parenthesis--
|
parenthesis-- // closing parenthesis
|
||||||
} else if isQuoteRune(ch) {
|
} else if isQuoteRune(ch) {
|
||||||
if quoteCh == ch {
|
if quoteCh == ch {
|
||||||
quoteCh = eof // reached closing quote
|
quoteCh = eof // closing quote
|
||||||
} else if quoteCh == eof {
|
} else if quoteCh == eof {
|
||||||
quoteCh = ch // opening quote
|
quoteCh = ch // opening quote
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.isSeperatorRune(ch) && parenthesis == 0 && quoteCh == eof {
|
if t.isSeperatorRune(ch) && parenthesis == 0 && quoteCh == eof {
|
||||||
if s.keepSeparator {
|
if t.keepSeparator {
|
||||||
buf.WriteRune(ch)
|
buf.WriteRune(ch)
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
@ -154,16 +162,16 @@ func (s *Tokenizer) readToken() (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// readWhiteSpaces consumes all contiguous whitespace runes.
|
// readWhiteSpaces consumes all contiguous whitespace runes.
|
||||||
func (s *Tokenizer) readWhiteSpaces() {
|
func (t *Tokenizer) readWhiteSpaces() {
|
||||||
for {
|
for {
|
||||||
ch := s.read()
|
ch := t.read()
|
||||||
|
|
||||||
if ch == eof {
|
if ch == eof {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
if !s.isSeperatorRune(ch) {
|
if !t.isSeperatorRune(ch) {
|
||||||
s.unread()
|
t.unread()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -171,8 +179,8 @@ func (s *Tokenizer) readWhiteSpaces() {
|
|||||||
|
|
||||||
// read reads the next rune from the buffered reader.
|
// read reads the next rune from the buffered reader.
|
||||||
// Returns the `rune(0)` if an error or `io.EOF` occurs.
|
// Returns the `rune(0)` if an error or `io.EOF` occurs.
|
||||||
func (s *Tokenizer) read() rune {
|
func (t *Tokenizer) read() rune {
|
||||||
ch, _, err := s.r.ReadRune()
|
ch, _, err := t.r.ReadRune()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return eof
|
return eof
|
||||||
}
|
}
|
||||||
@ -181,13 +189,13 @@ func (s *Tokenizer) read() rune {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// unread places the previously read rune back on the reader.
|
// unread places the previously read rune back on the reader.
|
||||||
func (s *Tokenizer) unread() error {
|
func (t *Tokenizer) unread() error {
|
||||||
return s.r.UnreadRune()
|
return t.r.UnreadRune()
|
||||||
}
|
}
|
||||||
|
|
||||||
// isSeperatorRune checks if a rune is a token part separator.
|
// isSeperatorRune checks if a rune is a token part separator.
|
||||||
func (s *Tokenizer) isSeperatorRune(ch rune) bool {
|
func (t *Tokenizer) isSeperatorRune(ch rune) bool {
|
||||||
for _, r := range s.separators {
|
for _, r := range t.separators {
|
||||||
if ch == r {
|
if ch == r {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -28,32 +28,38 @@ func TestFactories(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, s := range scenarios {
|
for _, s := range scenarios {
|
||||||
content, _ := s.tk.r.ReadString(0)
|
t.Run(s.name, func(t *testing.T) {
|
||||||
|
content, _ := s.tk.r.ReadString(0)
|
||||||
|
|
||||||
if content != expectedContent {
|
if content != expectedContent {
|
||||||
t.Fatalf("[%s] Expected reader with content %q, got %q", s.name, expectedContent, content)
|
t.Fatalf("Expected reader with content %q, got %q", expectedContent, content)
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.tk.keepSeparator != false {
|
if s.tk.keepSeparator != false {
|
||||||
t.Fatalf("[%s] Expected false, got true", s.name)
|
t.Fatal("Expected keepSeparator false, got true")
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(s.tk.separators) != len(DefaultSeparators) {
|
if s.tk.ignoreParenthesis != false {
|
||||||
t.Fatalf("[%s] Expected \n%v, \ngot \n%v", s.name, DefaultSeparators, s.tk.separators)
|
t.Fatal("Expected ignoreParenthesis false, got true")
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, r := range s.tk.separators {
|
if len(s.tk.separators) != len(DefaultSeparators) {
|
||||||
exists := false
|
t.Fatalf("Expected \n%v, \ngot \n%v", DefaultSeparators, s.tk.separators)
|
||||||
for _, def := range s.tk.separators {
|
}
|
||||||
if r == def {
|
|
||||||
exists = true
|
for _, r := range s.tk.separators {
|
||||||
break
|
exists := false
|
||||||
|
for _, def := range s.tk.separators {
|
||||||
|
if r == def {
|
||||||
|
exists = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !exists {
|
||||||
|
t.Fatalf("Unexpected sepator %s", string(r))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !exists {
|
})
|
||||||
t.Fatalf("[%s] Unexpected sepator %s", s.name, string(r))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,54 +91,58 @@ func TestScan(t *testing.T) {
|
|||||||
|
|
||||||
func TestScanAll(t *testing.T) {
|
func TestScanAll(t *testing.T) {
|
||||||
scenarios := []struct {
|
scenarios := []struct {
|
||||||
name string
|
name string
|
||||||
content string
|
content string
|
||||||
separators []rune
|
separators []rune
|
||||||
keepSeparator bool
|
keepSeparator bool
|
||||||
expectError bool
|
ignoreParenthesis bool
|
||||||
expectTokens []string
|
expectError bool
|
||||||
|
expectTokens []string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
"empty string",
|
name: "empty string",
|
||||||
"",
|
content: "",
|
||||||
DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
false,
|
keepSeparator: false,
|
||||||
false,
|
ignoreParenthesis: false,
|
||||||
nil,
|
expectError: false,
|
||||||
|
expectTokens: nil,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"unbalanced parenthesis",
|
name: "unbalanced parenthesis",
|
||||||
`(a,b() c`,
|
content: `(a,b() c`,
|
||||||
DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
false,
|
keepSeparator: false,
|
||||||
true,
|
ignoreParenthesis: false,
|
||||||
[]string{},
|
expectError: true,
|
||||||
|
expectTokens: []string{},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"unmatching quotes",
|
name: "unmatching quotes",
|
||||||
`'asd"`,
|
content: `'asd"`,
|
||||||
DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
false,
|
keepSeparator: false,
|
||||||
true,
|
ignoreParenthesis: false,
|
||||||
[]string{},
|
expectError: true,
|
||||||
|
expectTokens: []string{},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"no separators",
|
name: "no separators",
|
||||||
`a, b, c, d, e 123, "abc"`,
|
content: `a, b, c, d, e 123, "abc"`,
|
||||||
nil,
|
separators: nil,
|
||||||
false,
|
keepSeparator: false,
|
||||||
false,
|
ignoreParenthesis: false,
|
||||||
[]string{
|
expectError: false,
|
||||||
`a, b, c, d, e 123, "abc"`,
|
expectTokens: []string{`a, b, c, d, e 123, "abc"`},
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"default separators",
|
name: "default separators",
|
||||||
`a, b, c, d e, "a,b, c ", (123, 456)`,
|
content: `a, b, c, d e, "a,b, c ", (123, 456)`,
|
||||||
DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
false,
|
keepSeparator: false,
|
||||||
false,
|
ignoreParenthesis: false,
|
||||||
[]string{
|
expectError: false,
|
||||||
|
expectTokens: []string{
|
||||||
"a",
|
"a",
|
||||||
"b",
|
"b",
|
||||||
"c",
|
"c",
|
||||||
@ -142,12 +152,13 @@ func TestScanAll(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"default separators (with preserve)",
|
name: "default separators (with preserve)",
|
||||||
`a, b, c, d e, "a,b, c ", (123, 456)`,
|
content: `a, b, c, d e, "a,b, c ", (123, 456)`,
|
||||||
DefaultSeparators,
|
separators: DefaultSeparators,
|
||||||
true,
|
keepSeparator: true,
|
||||||
false,
|
ignoreParenthesis: false,
|
||||||
[]string{
|
expectError: false,
|
||||||
|
expectTokens: []string{
|
||||||
"a,",
|
"a,",
|
||||||
"b,",
|
"b,",
|
||||||
"c,",
|
"c,",
|
||||||
@ -157,14 +168,15 @@ func TestScanAll(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"custom separators",
|
name: "custom separators",
|
||||||
` a , 123.456, b, c d, (
|
content: ` a , 123.456, b, c d, (
|
||||||
test (a,b,c) " 123 "
|
test (a,b,c) " 123 "
|
||||||
),"(abc d", "abc) d", "(abc) d \" " 'abc "'`,
|
),"(abc d", "abc) d", "(abc) d \" " 'abc "'`,
|
||||||
[]rune{',', ' ', '\t', '\n'},
|
separators: []rune{',', ' ', '\t', '\n'},
|
||||||
false,
|
keepSeparator: false,
|
||||||
false,
|
ignoreParenthesis: false,
|
||||||
[]string{
|
expectError: false,
|
||||||
|
expectTokens: []string{
|
||||||
"a",
|
"a",
|
||||||
"123.456",
|
"123.456",
|
||||||
"b",
|
"b",
|
||||||
@ -178,14 +190,15 @@ func TestScanAll(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"custom separators (with preserve)",
|
name: "custom separators (with preserve)",
|
||||||
` a , 123.456, b, c d, (
|
content: ` a , 123.456, b, c d, (
|
||||||
test (a,b,c) " 123 "
|
test (a,b,c) " 123 "
|
||||||
),"(abc d", "abc) d", "(abc) d \" " 'abc "'`,
|
),"(abc d", "abc) d", "(abc) d \" " 'abc "'`,
|
||||||
[]rune{',', ' ', '\t', '\n'},
|
separators: []rune{',', ' ', '\t', '\n'},
|
||||||
true,
|
keepSeparator: true,
|
||||||
false,
|
ignoreParenthesis: false,
|
||||||
[]string{
|
expectError: false,
|
||||||
|
expectTokens: []string{
|
||||||
"a ",
|
"a ",
|
||||||
"123.456,",
|
"123.456,",
|
||||||
"b,",
|
"b,",
|
||||||
@ -198,36 +211,53 @@ func TestScanAll(t *testing.T) {
|
|||||||
`'abc "'`,
|
`'abc "'`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ignoring parenthesis",
|
||||||
|
content: `a, b, (c,d)`,
|
||||||
|
separators: []rune{','},
|
||||||
|
keepSeparator: false,
|
||||||
|
ignoreParenthesis: true,
|
||||||
|
expectError: false,
|
||||||
|
expectTokens: []string{
|
||||||
|
"a",
|
||||||
|
"b",
|
||||||
|
"(c",
|
||||||
|
"d)",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, s := range scenarios {
|
for _, s := range scenarios {
|
||||||
tk := NewFromString(s.content)
|
t.Run(s.name, func(t *testing.T) {
|
||||||
|
tk := NewFromString(s.content)
|
||||||
|
|
||||||
tk.Separators(s.separators...)
|
tk.Separators(s.separators...)
|
||||||
tk.KeepSeparator(s.keepSeparator)
|
tk.KeepSeparator(s.keepSeparator)
|
||||||
|
tk.IgnoreParenthesis(s.ignoreParenthesis)
|
||||||
|
|
||||||
tokens, err := tk.ScanAll()
|
tokens, err := tk.ScanAll()
|
||||||
|
|
||||||
hasErr := err != nil
|
hasErr := err != nil
|
||||||
if hasErr != s.expectError {
|
if hasErr != s.expectError {
|
||||||
t.Fatalf("[%s] Expected hasErr %v, got %v (%v)", s.name, s.expectError, hasErr, err)
|
t.Fatalf("Expected hasErr %v, got %v (%v)", s.expectError, hasErr, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(tokens) != len(s.expectTokens) {
|
if len(tokens) != len(s.expectTokens) {
|
||||||
t.Fatalf("[%s] Expected \n%v (%d), \ngot \n%v (%d)", s.name, s.expectTokens, len(s.expectTokens), tokens, len(tokens))
|
t.Fatalf("Expected \n%v (%d), \ngot \n%v (%d)", s.expectTokens, len(s.expectTokens), tokens, len(tokens))
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tok := range tokens {
|
for _, tok := range tokens {
|
||||||
exists := false
|
exists := false
|
||||||
for _, def := range s.expectTokens {
|
for _, def := range s.expectTokens {
|
||||||
if tok == def {
|
if tok == def {
|
||||||
exists = true
|
exists = true
|
||||||
break
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !exists {
|
||||||
|
t.Fatalf("Unexpected token %s", tok)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !exists {
|
})
|
||||||
t.Fatalf("[%s] Unexpected token %s", s.name, tok)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user