1
0
mirror of https://github.com/json-iterator/go.git synced 2024-11-27 08:30:57 +02:00
json-iterator/feature_iter.go

292 lines
5.8 KiB
Go
Raw Normal View History

2017-06-11 09:32:58 +02:00
//
// Besides, jsoniter.Iterator provides a different set of interfaces
// iterating given bytes/string/reader
// and yielding parsed elements one by one.
// This set of interfaces reads input as required and gives
// better performance.
2016-11-30 18:56:25 +02:00
package jsoniter
import (
"encoding/base64"
2016-11-30 18:56:25 +02:00
"fmt"
"io"
2016-11-30 18:56:25 +02:00
)
2016-12-10 08:34:36 +02:00
type ValueType int
const (
Invalid ValueType = iota
String
Number
2017-01-21 10:09:38 +02:00
Nil
2016-12-11 04:04:26 +02:00
Bool
2016-12-10 08:34:36 +02:00
Array
Object
)
2017-01-15 16:50:31 +02:00
var hexDigits []byte
2016-12-10 08:34:36 +02:00
var valueTypes []ValueType
2016-12-06 04:41:05 +02:00
func init() {
2017-01-15 16:50:31 +02:00
hexDigits = make([]byte, 256)
for i := 0; i < len(hexDigits); i++ {
hexDigits[i] = 255
2016-12-06 04:41:05 +02:00
}
for i := '0'; i <= '9'; i++ {
2017-01-15 16:50:31 +02:00
hexDigits[i] = byte(i - '0')
2016-12-06 04:41:05 +02:00
}
for i := 'a'; i <= 'f'; i++ {
2017-01-15 16:50:31 +02:00
hexDigits[i] = byte((i - 'a') + 10)
2016-12-06 04:41:05 +02:00
}
for i := 'A'; i <= 'F'; i++ {
2017-01-15 16:50:31 +02:00
hexDigits[i] = byte((i - 'A') + 10)
2016-12-06 04:41:05 +02:00
}
2016-12-10 08:34:36 +02:00
valueTypes = make([]ValueType, 256)
for i := 0; i < len(valueTypes); i++ {
valueTypes[i] = Invalid
}
valueTypes['"'] = String
valueTypes['-'] = Number
valueTypes['0'] = Number
valueTypes['1'] = Number
valueTypes['2'] = Number
valueTypes['3'] = Number
valueTypes['4'] = Number
valueTypes['5'] = Number
valueTypes['6'] = Number
valueTypes['7'] = Number
valueTypes['8'] = Number
valueTypes['9'] = Number
valueTypes['t'] = Bool
valueTypes['f'] = Bool
2017-01-21 10:09:38 +02:00
valueTypes['n'] = Nil
valueTypes['['] = Array
valueTypes['{'] = Object
}
// Iterator is a fast and flexible JSON parser
2016-11-30 18:56:25 +02:00
type Iterator struct {
2017-06-13 10:58:53 +02:00
cfg *Config
2016-12-01 04:35:38 +02:00
reader io.Reader
buf []byte
head int
tail int
Error error
2016-11-30 18:56:25 +02:00
}
// Create creates an empty Iterator instance
2017-06-13 10:58:53 +02:00
func NewIterator(cfg *Config) *Iterator {
2017-06-13 11:47:40 +02:00
cfg.init()
return &Iterator{
2017-06-13 10:58:53 +02:00
cfg: cfg,
reader: nil,
buf: nil,
head: 0,
tail: 0,
}
}
// Parse parses a json buffer in io.Reader into an Iterator instance
2017-06-13 10:58:53 +02:00
func Parse(cfg *Config, reader io.Reader, bufSize int) *Iterator {
2017-06-13 11:47:40 +02:00
cfg.init()
return &Iterator{
2017-06-13 10:58:53 +02:00
cfg: cfg,
2016-12-01 04:35:38 +02:00
reader: reader,
buf: make([]byte, bufSize),
head: 0,
tail: 0,
2016-11-30 18:56:25 +02:00
}
}
// ParseBytes parses a json byte slice into an Iterator instance
2017-06-13 10:58:53 +02:00
func ParseBytes(cfg *Config, input []byte) *Iterator {
2017-06-13 11:47:40 +02:00
cfg.init()
return &Iterator{
2017-06-13 10:58:53 +02:00
cfg: cfg,
2016-12-01 04:35:38 +02:00
reader: nil,
buf: input,
head: 0,
tail: len(input),
2016-11-30 18:56:25 +02:00
}
}
// ParseString parses a json string into an Iterator instance
2017-06-13 10:58:53 +02:00
func ParseString(cfg *Config, input string) *Iterator {
return ParseBytes(cfg, []byte(input))
2016-12-10 08:34:36 +02:00
}
// Reset can reset an Iterator instance for another json buffer in io.Reader
2016-12-10 08:34:36 +02:00
func (iter *Iterator) Reset(reader io.Reader) *Iterator {
iter.reader = reader
iter.head = 0
iter.tail = 0
return iter
}
// ResetBytes can reset an Iterator instance for another json byte slice
2016-12-10 08:34:36 +02:00
func (iter *Iterator) ResetBytes(input []byte) *Iterator {
2016-12-06 07:48:03 +02:00
iter.reader = nil
iter.Error = nil
iter.buf = input
iter.head = 0
iter.tail = len(input)
return iter
}
// WhatIsNext gets ValueType of relatively next json object
2016-12-10 08:34:36 +02:00
func (iter *Iterator) WhatIsNext() ValueType {
valueType := valueTypes[iter.nextToken()]
iter.unreadByte()
return valueType
2016-11-30 18:56:25 +02:00
}
2016-12-09 07:08:14 +02:00
func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case ' ', '\n', '\t', '\r':
continue
}
iter.head = i
return false
}
return true
}
2016-12-06 14:09:19 +02:00
func (iter *Iterator) nextToken() byte {
2016-12-06 17:51:29 +02:00
// a variation of skip whitespaces, returning the next non-whitespace token
2016-12-06 14:09:19 +02:00
for {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
2016-12-15 18:25:35 +02:00
case ' ', '\n', '\t', '\r':
2016-12-06 14:09:19 +02:00
continue
}
iter.head = i + 1
2016-12-06 14:09:19 +02:00
return c
}
if !iter.loadMore() {
return 0
}
}
}
func (iter *Iterator) reportError(operation string, msg string) {
2016-12-06 05:08:36 +02:00
if iter.Error != nil {
2017-01-23 02:33:43 +02:00
if iter.Error != io.EOF {
return
}
2016-12-06 05:08:36 +02:00
}
2016-12-02 05:22:20 +02:00
peekStart := iter.head - 10
if peekStart < 0 {
peekStart = 0
}
iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
2016-11-30 18:56:25 +02:00
}
// CurrentBuffer gets current buffer as string
func (iter *Iterator) CurrentBuffer() string {
peekStart := iter.head - 10
if peekStart < 0 {
peekStart = 0
}
2016-12-09 07:08:14 +02:00
return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
}
2016-12-01 04:35:38 +02:00
func (iter *Iterator) readByte() (ret byte) {
if iter.head == iter.tail {
2016-12-06 14:01:22 +02:00
if iter.loadMore() {
ret = iter.buf[iter.head]
iter.head++
return ret
2016-12-01 04:35:38 +02:00
}
return 0
2016-11-30 18:56:25 +02:00
}
2016-12-01 04:35:38 +02:00
ret = iter.buf[iter.head]
iter.head++
2016-12-01 04:35:38 +02:00
return ret
2016-11-30 18:56:25 +02:00
}
2016-12-06 14:01:22 +02:00
func (iter *Iterator) loadMore() bool {
if iter.reader == nil {
2017-01-24 16:36:16 +02:00
if iter.Error == nil {
iter.Error = io.EOF
}
2016-12-06 14:01:22 +02:00
return false
}
for {
n, err := iter.reader.Read(iter.buf)
if n == 0 {
if err != nil {
2017-01-24 16:36:16 +02:00
if iter.Error == nil {
iter.Error = err
}
2016-12-06 14:01:22 +02:00
return false
}
} else {
iter.head = 0
iter.tail = n
return true
}
}
}
2016-12-01 04:35:38 +02:00
func (iter *Iterator) unreadByte() {
if iter.head == 0 {
iter.reportError("unreadByte", "unread too many bytes")
2016-11-30 18:56:25 +02:00
return
}
iter.head--
2016-12-01 04:35:38 +02:00
return
2016-11-30 18:56:25 +02:00
}
2017-01-21 10:09:38 +02:00
func (iter *Iterator) Read() interface{} {
valueType := iter.WhatIsNext()
switch valueType {
case String:
return iter.ReadString()
case Number:
return iter.ReadFloat64()
case Nil:
iter.skipFixedBytes(4) // null
return nil
case Bool:
return iter.ReadBool()
case Array:
arr := []interface{}{}
iter.ReadArrayCB(func(iter *Iterator) bool {
arr = append(arr, iter.Read())
return true
})
return arr
case Object:
obj := map[string]interface{}{}
iter.ReadObjectCB(func(Iter *Iterator, field string) bool {
obj[field] = iter.Read()
return true
})
return obj
default:
iter.reportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
return nil
}
}
2016-12-02 05:22:20 +02:00
// ReadBase64 reads a json object as Base64 in byte slice
2016-12-08 07:29:54 +02:00
func (iter *Iterator) ReadBase64() (ret []byte) {
src := iter.ReadStringAsSlice()
2016-12-08 07:29:54 +02:00
if iter.Error != nil {
return
}
b64 := base64.StdEncoding
ret = make([]byte, b64.DecodedLen(len(src)))
n, err := b64.Decode(ret, src)
if err != nil {
iter.Error = err
return
}
return ret[:n]
}