2017-06-11 09:32:58 +02:00
|
|
|
//
|
|
|
|
// Besides, jsoniter.Iterator provides a different set of interfaces
|
|
|
|
// iterating given bytes/string/reader
|
|
|
|
// and yielding parsed elements one by one.
|
|
|
|
// This set of interfaces reads input as required and gives
|
|
|
|
// better performance.
|
2016-11-30 18:56:25 +02:00
|
|
|
package jsoniter
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2017-01-05 15:23:08 +02:00
|
|
|
"io"
|
2016-11-30 18:56:25 +02:00
|
|
|
)
|
|
|
|
|
2016-12-10 08:34:36 +02:00
|
|
|
type ValueType int
|
|
|
|
|
|
|
|
const (
|
|
|
|
Invalid ValueType = iota
|
|
|
|
String
|
|
|
|
Number
|
2017-01-21 10:09:38 +02:00
|
|
|
Nil
|
2016-12-11 04:04:26 +02:00
|
|
|
Bool
|
2016-12-10 08:34:36 +02:00
|
|
|
Array
|
|
|
|
Object
|
|
|
|
)
|
|
|
|
|
2017-01-15 16:50:31 +02:00
|
|
|
var hexDigits []byte
|
2016-12-10 08:34:36 +02:00
|
|
|
var valueTypes []ValueType
|
2016-12-06 04:41:05 +02:00
|
|
|
|
|
|
|
func init() {
|
2017-01-15 16:50:31 +02:00
|
|
|
hexDigits = make([]byte, 256)
|
|
|
|
for i := 0; i < len(hexDigits); i++ {
|
|
|
|
hexDigits[i] = 255
|
2016-12-06 04:41:05 +02:00
|
|
|
}
|
|
|
|
for i := '0'; i <= '9'; i++ {
|
2017-01-15 16:50:31 +02:00
|
|
|
hexDigits[i] = byte(i - '0')
|
2016-12-06 04:41:05 +02:00
|
|
|
}
|
|
|
|
for i := 'a'; i <= 'f'; i++ {
|
2017-01-15 16:50:31 +02:00
|
|
|
hexDigits[i] = byte((i - 'a') + 10)
|
2016-12-06 04:41:05 +02:00
|
|
|
}
|
|
|
|
for i := 'A'; i <= 'F'; i++ {
|
2017-01-15 16:50:31 +02:00
|
|
|
hexDigits[i] = byte((i - 'A') + 10)
|
2016-12-06 04:41:05 +02:00
|
|
|
}
|
2016-12-10 08:34:36 +02:00
|
|
|
valueTypes = make([]ValueType, 256)
|
|
|
|
for i := 0; i < len(valueTypes); i++ {
|
|
|
|
valueTypes[i] = Invalid
|
|
|
|
}
|
2017-01-05 15:23:08 +02:00
|
|
|
valueTypes['"'] = String
|
|
|
|
valueTypes['-'] = Number
|
|
|
|
valueTypes['0'] = Number
|
|
|
|
valueTypes['1'] = Number
|
|
|
|
valueTypes['2'] = Number
|
|
|
|
valueTypes['3'] = Number
|
|
|
|
valueTypes['4'] = Number
|
|
|
|
valueTypes['5'] = Number
|
|
|
|
valueTypes['6'] = Number
|
|
|
|
valueTypes['7'] = Number
|
|
|
|
valueTypes['8'] = Number
|
|
|
|
valueTypes['9'] = Number
|
|
|
|
valueTypes['t'] = Bool
|
|
|
|
valueTypes['f'] = Bool
|
2017-01-21 10:09:38 +02:00
|
|
|
valueTypes['n'] = Nil
|
2017-01-05 15:23:08 +02:00
|
|
|
valueTypes['['] = Array
|
|
|
|
valueTypes['{'] = Object
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iterator is a fast and flexible JSON parser
|
2016-11-30 18:56:25 +02:00
|
|
|
type Iterator struct {
|
2017-06-13 12:49:35 +02:00
|
|
|
cfg *frozenConfig
|
2016-12-01 04:35:38 +02:00
|
|
|
reader io.Reader
|
|
|
|
buf []byte
|
|
|
|
head int
|
|
|
|
tail int
|
2017-06-18 10:28:43 +02:00
|
|
|
captureStartedAt int
|
|
|
|
captured []byte
|
2016-12-01 04:35:38 +02:00
|
|
|
Error error
|
2016-11-30 18:56:25 +02:00
|
|
|
}
|
|
|
|
|
2017-01-05 15:23:08 +02:00
|
|
|
// Create creates an empty Iterator instance
|
2017-06-13 12:49:35 +02:00
|
|
|
func NewIterator(cfg *frozenConfig) *Iterator {
|
2017-01-05 07:53:38 +02:00
|
|
|
return &Iterator{
|
2017-06-13 10:58:53 +02:00
|
|
|
cfg: cfg,
|
2017-01-05 07:53:38 +02:00
|
|
|
reader: nil,
|
2017-01-05 15:23:08 +02:00
|
|
|
buf: nil,
|
|
|
|
head: 0,
|
|
|
|
tail: 0,
|
2017-01-05 07:53:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-05 15:23:08 +02:00
|
|
|
// Parse parses a json buffer in io.Reader into an Iterator instance
|
2017-06-13 12:49:35 +02:00
|
|
|
func Parse(cfg *frozenConfig, reader io.Reader, bufSize int) *Iterator {
|
2017-01-05 07:53:38 +02:00
|
|
|
return &Iterator{
|
2017-06-13 10:58:53 +02:00
|
|
|
cfg: cfg,
|
2016-12-01 04:35:38 +02:00
|
|
|
reader: reader,
|
2017-01-05 15:23:08 +02:00
|
|
|
buf: make([]byte, bufSize),
|
|
|
|
head: 0,
|
|
|
|
tail: 0,
|
2016-11-30 18:56:25 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-05 15:23:08 +02:00
|
|
|
// ParseBytes parses a json byte slice into an Iterator instance
|
2017-06-13 12:49:35 +02:00
|
|
|
func ParseBytes(cfg *frozenConfig, input []byte) *Iterator {
|
2017-01-05 07:53:38 +02:00
|
|
|
return &Iterator{
|
2017-06-13 10:58:53 +02:00
|
|
|
cfg: cfg,
|
2016-12-01 04:35:38 +02:00
|
|
|
reader: nil,
|
2017-01-05 15:23:08 +02:00
|
|
|
buf: input,
|
|
|
|
head: 0,
|
|
|
|
tail: len(input),
|
2016-11-30 18:56:25 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-05 15:23:08 +02:00
|
|
|
// ParseString parses a json string into an Iterator instance
|
2017-06-13 12:49:35 +02:00
|
|
|
func ParseString(cfg *frozenConfig, input string) *Iterator {
|
2017-06-13 10:58:53 +02:00
|
|
|
return ParseBytes(cfg, []byte(input))
|
2016-12-10 08:34:36 +02:00
|
|
|
}
|
|
|
|
|
2017-01-05 15:23:08 +02:00
|
|
|
// Reset can reset an Iterator instance for another json buffer in io.Reader
|
2016-12-10 08:34:36 +02:00
|
|
|
func (iter *Iterator) Reset(reader io.Reader) *Iterator {
|
|
|
|
iter.reader = reader
|
|
|
|
iter.head = 0
|
|
|
|
iter.tail = 0
|
|
|
|
return iter
|
|
|
|
}
|
|
|
|
|
2017-01-05 15:23:08 +02:00
|
|
|
// ResetBytes can reset an Iterator instance for another json byte slice
|
2016-12-10 08:34:36 +02:00
|
|
|
func (iter *Iterator) ResetBytes(input []byte) *Iterator {
|
2016-12-06 07:48:03 +02:00
|
|
|
iter.reader = nil
|
|
|
|
iter.Error = nil
|
|
|
|
iter.buf = input
|
|
|
|
iter.head = 0
|
|
|
|
iter.tail = len(input)
|
|
|
|
return iter
|
|
|
|
}
|
|
|
|
|
2017-01-05 15:23:08 +02:00
|
|
|
// WhatIsNext gets ValueType of relatively next json object
|
2016-12-10 08:34:36 +02:00
|
|
|
func (iter *Iterator) WhatIsNext() ValueType {
|
2017-01-05 15:23:08 +02:00
|
|
|
valueType := valueTypes[iter.nextToken()]
|
|
|
|
iter.unreadByte()
|
|
|
|
return valueType
|
2016-11-30 18:56:25 +02:00
|
|
|
}
|
|
|
|
|
2016-12-09 07:08:14 +02:00
|
|
|
func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
|
|
|
|
for i := iter.head; i < iter.tail; i++ {
|
|
|
|
c := iter.buf[i]
|
|
|
|
switch c {
|
|
|
|
case ' ', '\n', '\t', '\r':
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
iter.head = i
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2016-12-06 14:09:19 +02:00
|
|
|
func (iter *Iterator) nextToken() byte {
|
2016-12-06 17:51:29 +02:00
|
|
|
// a variation of skip whitespaces, returning the next non-whitespace token
|
2016-12-06 14:09:19 +02:00
|
|
|
for {
|
|
|
|
for i := iter.head; i < iter.tail; i++ {
|
|
|
|
c := iter.buf[i]
|
|
|
|
switch c {
|
2016-12-15 18:25:35 +02:00
|
|
|
case ' ', '\n', '\t', '\r':
|
2016-12-06 14:09:19 +02:00
|
|
|
continue
|
|
|
|
}
|
2017-01-05 15:23:08 +02:00
|
|
|
iter.head = i + 1
|
2016-12-06 14:09:19 +02:00
|
|
|
return c
|
|
|
|
}
|
|
|
|
if !iter.loadMore() {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-05 15:23:08 +02:00
|
|
|
func (iter *Iterator) reportError(operation string, msg string) {
|
2016-12-06 05:08:36 +02:00
|
|
|
if iter.Error != nil {
|
2017-01-23 02:33:43 +02:00
|
|
|
if iter.Error != io.EOF {
|
|
|
|
return
|
|
|
|
}
|
2016-12-06 05:08:36 +02:00
|
|
|
}
|
2016-12-02 05:22:20 +02:00
|
|
|
peekStart := iter.head - 10
|
|
|
|
if peekStart < 0 {
|
|
|
|
peekStart = 0
|
|
|
|
}
|
|
|
|
iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
|
2017-01-05 15:23:08 +02:00
|
|
|
string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
|
2016-11-30 18:56:25 +02:00
|
|
|
}
|
|
|
|
|
2017-01-05 15:23:08 +02:00
|
|
|
// CurrentBuffer gets current buffer as string
|
2016-12-05 04:43:42 +02:00
|
|
|
func (iter *Iterator) CurrentBuffer() string {
|
|
|
|
peekStart := iter.head - 10
|
|
|
|
if peekStart < 0 {
|
|
|
|
peekStart = 0
|
|
|
|
}
|
2016-12-09 07:08:14 +02:00
|
|
|
return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
|
2017-01-05 15:23:08 +02:00
|
|
|
string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
|
2016-12-05 04:43:42 +02:00
|
|
|
}
|
|
|
|
|
2016-12-01 04:35:38 +02:00
|
|
|
func (iter *Iterator) readByte() (ret byte) {
|
|
|
|
if iter.head == iter.tail {
|
2016-12-06 14:01:22 +02:00
|
|
|
if iter.loadMore() {
|
|
|
|
ret = iter.buf[iter.head]
|
|
|
|
iter.head++
|
|
|
|
return ret
|
2016-12-01 04:35:38 +02:00
|
|
|
}
|
2017-01-05 15:23:08 +02:00
|
|
|
return 0
|
2016-11-30 18:56:25 +02:00
|
|
|
}
|
2016-12-01 04:35:38 +02:00
|
|
|
ret = iter.buf[iter.head]
|
2016-12-05 08:18:16 +02:00
|
|
|
iter.head++
|
2016-12-01 04:35:38 +02:00
|
|
|
return ret
|
2016-11-30 18:56:25 +02:00
|
|
|
}
|
|
|
|
|
2016-12-06 14:01:22 +02:00
|
|
|
func (iter *Iterator) loadMore() bool {
|
|
|
|
if iter.reader == nil {
|
2017-01-24 16:36:16 +02:00
|
|
|
if iter.Error == nil {
|
|
|
|
iter.Error = io.EOF
|
|
|
|
}
|
2016-12-06 14:01:22 +02:00
|
|
|
return false
|
|
|
|
}
|
2017-06-18 10:28:43 +02:00
|
|
|
if iter.captureStartedAt != -1 {
|
|
|
|
iter.captured = append(iter.captured,
|
|
|
|
iter.buf[iter.captureStartedAt:iter.tail]...)
|
|
|
|
iter.captureStartedAt = 0
|
|
|
|
}
|
2016-12-06 14:01:22 +02:00
|
|
|
for {
|
|
|
|
n, err := iter.reader.Read(iter.buf)
|
|
|
|
if n == 0 {
|
|
|
|
if err != nil {
|
2017-01-24 16:36:16 +02:00
|
|
|
if iter.Error == nil {
|
|
|
|
iter.Error = err
|
|
|
|
}
|
2016-12-06 14:01:22 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
iter.head = 0
|
|
|
|
iter.tail = n
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-01 04:35:38 +02:00
|
|
|
func (iter *Iterator) unreadByte() {
|
|
|
|
if iter.head == 0 {
|
2017-01-05 15:23:08 +02:00
|
|
|
iter.reportError("unreadByte", "unread too many bytes")
|
2016-11-30 18:56:25 +02:00
|
|
|
return
|
|
|
|
}
|
2017-01-05 15:23:08 +02:00
|
|
|
iter.head--
|
2016-12-01 04:35:38 +02:00
|
|
|
return
|
2016-11-30 18:56:25 +02:00
|
|
|
}
|
|
|
|
|
2017-01-21 10:09:38 +02:00
|
|
|
func (iter *Iterator) Read() interface{} {
|
|
|
|
valueType := iter.WhatIsNext()
|
|
|
|
switch valueType {
|
|
|
|
case String:
|
|
|
|
return iter.ReadString()
|
|
|
|
case Number:
|
|
|
|
return iter.ReadFloat64()
|
|
|
|
case Nil:
|
|
|
|
iter.skipFixedBytes(4) // null
|
|
|
|
return nil
|
|
|
|
case Bool:
|
|
|
|
return iter.ReadBool()
|
|
|
|
case Array:
|
|
|
|
arr := []interface{}{}
|
|
|
|
iter.ReadArrayCB(func(iter *Iterator) bool {
|
|
|
|
arr = append(arr, iter.Read())
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
return arr
|
|
|
|
case Object:
|
|
|
|
obj := map[string]interface{}{}
|
|
|
|
iter.ReadObjectCB(func(Iter *Iterator, field string) bool {
|
|
|
|
obj[field] = iter.Read()
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
return obj
|
|
|
|
default:
|
|
|
|
iter.reportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|