From 2d647f04cade929c7ac73481621850b58774a785 Mon Sep 17 00:00:00 2001 From: Tao Wen Date: Tue, 24 Jan 2017 00:23:07 +0800 Subject: [PATCH] implement lazy object three iteration way --- feature_any.go | 70 +++++++++++-- feature_any_bool.go | 10 ++ feature_any_float.go | 5 + feature_any_int.go | 5 + feature_any_invalid.go | 5 + feature_any_nil.go | 5 + feature_any_object.go | 218 ++++++++++++++++++++++++++++++++++++++++ feature_any_string.go | 5 + feature_iter_skip.go | 2 + jsoniter_object_test.go | 97 +++++++++++++++--- 10 files changed, 398 insertions(+), 24 deletions(-) create mode 100644 feature_any_object.go diff --git a/feature_any.go b/feature_any.go index c131f24..189aa55 100644 --- a/feature_any.go +++ b/feature_any.go @@ -11,31 +11,50 @@ type Any interface { ToFloat32() float32 ToFloat64() float64 ToString() string + Get(path ...interface{}) Any + Keys() []string + IterateObject() (func() (string, Any, bool), bool) +} + +type baseAny struct {} + +func (any *baseAny) Keys() []string { + return []string{} +} + +func (any *baseAny) IterateObject() (func() (string, Any, bool), bool) { + return nil, false } func (iter *Iterator) ReadAny() Any { + return iter.readAny(nil) +} + +func (iter *Iterator) readAny(reusableIter *Iterator) Any { c := iter.nextToken() switch c { case '"': - return iter.readStringAny() + return iter.readStringAny(reusableIter) case 'n': iter.skipFixedBytes(3) // null return &nilAny{} case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': iter.unreadByte() - return iter.readNumberAny() + return iter.readNumberAny(reusableIter) case 't': iter.skipFixedBytes(3) // true return &trueAny{} case 'f': iter.skipFixedBytes(4) // false return &falseAny{} + case '{': + return iter.readObjectAny(reusableIter) } iter.reportError("ReadAny", fmt.Sprintf("unexpected character: %v", c)) return &invalidAny{} } -func (iter *Iterator) readNumberAny() Any { +func (iter *Iterator) readNumberAny(reusableIter *Iterator) Any { dotFound := false var lazyBuf []byte for { @@ -50,9 +69,9 @@ func (iter *Iterator) readNumberAny() Any { lazyBuf = append(lazyBuf, iter.buf[iter.head:i]...) iter.head = i if dotFound { - return &floatLazyAny{lazyBuf, nil, nil, 0} + return &floatLazyAny{baseAny{},lazyBuf, reusableIter, nil, 0} } else { - return &intLazyAny{lazyBuf, nil, nil, 0} + return &intLazyAny{baseAny{}, lazyBuf, reusableIter, nil, 0} } } } @@ -60,15 +79,15 @@ func (iter *Iterator) readNumberAny() Any { if !iter.loadMore() { iter.head = iter.tail if dotFound { - return &floatLazyAny{lazyBuf, nil, nil, 0} + return &floatLazyAny{baseAny{}, lazyBuf, reusableIter, nil, 0} } else { - return &intLazyAny{lazyBuf, nil, nil, 0} + return &intLazyAny{baseAny{}, lazyBuf, reusableIter, nil, 0} } } } } -func (iter *Iterator) readStringAny() Any { +func (iter *Iterator) readStringAny(reusableIter *Iterator) Any { lazyBuf := make([]byte, 1, 8) lazyBuf[0] = '"' for { @@ -85,7 +104,40 @@ func (iter *Iterator) readStringAny() Any { } else { lazyBuf = append(lazyBuf, iter.buf[iter.head:end]...) iter.head = end - return &stringLazyAny{lazyBuf, nil, nil, ""} + return &stringLazyAny{baseAny{}, lazyBuf, reusableIter, nil, ""} + } + } +} + +func (iter *Iterator) readObjectAny(reusableIter *Iterator) Any { + level := 1 + lazyBuf := make([]byte, 1, 32) + lazyBuf[0] = '{' + for { + start := iter.head + for i := iter.head; i < iter.tail; i++ { + switch iter.buf[i] { + case '"': // If inside string, skip it + iter.head = i + 1 + iter.skipString() + i = iter.head - 1 // it will be i++ soon + case '{': // If open symbol, increase level + level++ + case '}': // If close symbol, increase level + level-- + + // If we have returned to the original level, we're done + if level == 0 { + iter.head = i + 1 + lazyBuf = append(lazyBuf, iter.buf[start:iter.head]...) + return &objectLazyAny{lazyBuf, reusableIter, nil, nil, lazyBuf} + } + } + } + lazyBuf = append(lazyBuf, iter.buf[iter.head:iter.tail]...) + if !iter.loadMore() { + iter.reportError("skipObject", "incomplete object") + return &invalidAny{} } } } diff --git a/feature_any_bool.go b/feature_any_bool.go index 63c056b..0c56c52 100644 --- a/feature_any_bool.go +++ b/feature_any_bool.go @@ -1,6 +1,7 @@ package jsoniter type trueAny struct { + baseAny } func (any *trueAny) LastError() error { @@ -35,7 +36,12 @@ func (any *trueAny) ToString() string { return "true" } +func (any *trueAny) Get(path ...interface{}) Any { + return &invalidAny{} +} + type falseAny struct { + baseAny } func (any *falseAny) LastError() error { @@ -69,3 +75,7 @@ func (any *falseAny) ToFloat64() float64 { func (any *falseAny) ToString() string { return "false" } + +func (any *falseAny) Get(path ...interface{}) Any { + return &invalidAny{} +} \ No newline at end of file diff --git a/feature_any_float.go b/feature_any_float.go index 1684cef..0f7f399 100644 --- a/feature_any_float.go +++ b/feature_any_float.go @@ -6,6 +6,7 @@ import ( ) type floatLazyAny struct { + baseAny buf []byte iter *Iterator err error @@ -63,4 +64,8 @@ func (any *floatLazyAny) ToFloat64() float64 { func (any *floatLazyAny) ToString() string { return *(*string)(unsafe.Pointer(&any.buf)) +} + +func (any *floatLazyAny) Get(path ...interface{}) Any { + return &invalidAny{} } \ No newline at end of file diff --git a/feature_any_int.go b/feature_any_int.go index b2708f4..45a8394 100644 --- a/feature_any_int.go +++ b/feature_any_int.go @@ -6,6 +6,7 @@ import ( ) type intLazyAny struct { + baseAny buf []byte iter *Iterator err error @@ -63,4 +64,8 @@ func (any *intLazyAny) ToFloat64() float64 { func (any *intLazyAny) ToString() string { return *(*string)(unsafe.Pointer(&any.buf)) +} + +func (any *intLazyAny) Get(path ...interface{}) Any { + return &invalidAny{} } \ No newline at end of file diff --git a/feature_any_invalid.go b/feature_any_invalid.go index 2e05d7d..cb03597 100644 --- a/feature_any_invalid.go +++ b/feature_any_invalid.go @@ -1,6 +1,7 @@ package jsoniter type invalidAny struct { + baseAny } func (any *invalidAny) LastError() error { @@ -34,3 +35,7 @@ func (any *invalidAny) ToFloat64() float64 { func (any *invalidAny) ToString() string { return "" } + +func (any *invalidAny) Get(path ...interface{}) Any { + return any +} diff --git a/feature_any_nil.go b/feature_any_nil.go index cd3067a..4d5f8a2 100644 --- a/feature_any_nil.go +++ b/feature_any_nil.go @@ -1,6 +1,7 @@ package jsoniter type nilAny struct { + baseAny } func (any *nilAny) LastError() error { @@ -34,3 +35,7 @@ func (any *nilAny) ToFloat64() float64 { func (any *nilAny) ToString() string { return "" } + +func (any *nilAny) Get(path ...interface{}) Any { + return &invalidAny{} +} diff --git a/feature_any_object.go b/feature_any_object.go new file mode 100644 index 0000000..70fc998 --- /dev/null +++ b/feature_any_object.go @@ -0,0 +1,218 @@ +package jsoniter + +import ( + "unsafe" +) + +type objectLazyAny struct { + buf []byte + iter *Iterator + err error + cache map[string]Any + remaining []byte +} + +func (any *objectLazyAny) parse() *Iterator { + iter := any.iter + if iter == nil { + iter = NewIterator() + any.iter = iter + } + iter.ResetBytes(any.remaining) + return iter +} + +func (any *objectLazyAny) fillCacheUntil(target string) Any { + if any.remaining == nil { + return any.cache[target] + } + if any.cache == nil { + any.cache = map[string]Any{} + } + val := any.cache[target] + if val != nil { + return val + } + iter := any.parse() + if len(any.remaining) == len(any.buf) { + iter.head++ + c := iter.nextToken() + if c != '}' { + iter.unreadByte() + k := string(iter.readObjectFieldAsBytes()) + v := iter.readAny(iter) + any.cache[k] = v + if target == k { + any.remaining = iter.buf[iter.head:] + return v + } + } else { + any.remaining = nil + return nil + } + } + for iter.nextToken() == ',' { + k := string(iter.readObjectFieldAsBytes()) + v := iter.readAny(iter) + any.cache[k] = v + if target == k { + any.remaining = iter.buf[iter.head:] + return v + } + } + any.remaining = nil + return nil +} + +func (any *objectLazyAny) fillCache() { + if any.remaining == nil { + return + } + if any.cache == nil { + any.cache = map[string]Any{} + } + iter := any.parse() + if len(any.remaining) == len(any.buf) { + iter.head++ + c := iter.nextToken() + if c != '}' { + iter.unreadByte() + k := string(iter.readObjectFieldAsBytes()) + v := iter.readAny(iter) + any.cache[k] = v + } else { + any.remaining = nil + return + } + } + for iter.nextToken() == ',' { + k := string(iter.readObjectFieldAsBytes()) + v := iter.readAny(iter) + any.cache[k] = v + } + any.remaining = nil + return +} + +func (any *objectLazyAny) LastError() error { + return nil +} + +func (any *objectLazyAny) ToBool() bool { + return false +} + +func (any *objectLazyAny) ToInt() int { + return 0 +} + +func (any *objectLazyAny) ToInt32() int32 { + return 0 +} + +func (any *objectLazyAny) ToInt64() int64 { + return 0 +} + +func (any *objectLazyAny) ToFloat32() float32 { + return 0 +} + +func (any *objectLazyAny) ToFloat64() float64 { + return 0 +} + +func (any *objectLazyAny) ToString() string { + if len(any.remaining) == len(any.buf) { + // nothing has been parsed yet + return *(*string)(unsafe.Pointer(&any.buf)) + } else { + // TODO: serialize the cache + return "" + } +} + +func (any *objectLazyAny) Get(path ...interface{}) Any { + key := path[0].(string) + return any.fillCacheUntil(key) +} + +func (any *objectLazyAny) Keys() []string { + any.fillCache() + keys := make([]string, 0, len(any.cache)) + for key := range any.cache { + keys = append(keys, key) + } + return keys +} +func (any *objectLazyAny) IterateObject() (func() (string, Any, bool), bool) { + if any.cache == nil { + any.cache = map[string]Any{} + } + remaining := any.remaining + if len(remaining) == len(any.buf) { + iter := any.parse() + iter.head++ + c := iter.nextToken() + if c != '}' { + iter.unreadByte() + k := string(iter.readObjectFieldAsBytes()) + v := iter.readAny(iter) + any.cache[k] = v + remaining = iter.buf[iter.head:] + any.remaining = remaining + } else { + remaining = nil + any.remaining = nil + return nil, false + } + } + if len(any.cache) == 0 { + return nil, false + } + keys := make([]string, 0, len(any.cache)) + values := make([]Any, 0, len(any.cache)) + for key, value := range any.cache { + keys = append(keys, key) + values = append(values, value) + } + nextKey := keys[0] + nextValue := values[0] + i := 1 + return func() (string, Any, bool) { + key := nextKey + value := nextValue + if i < len(keys) { + // read from cache + nextKey = keys[i] + nextValue = values[i] + i++ + return key, value, true + } else { + if remaining == nil { + return "", nil, false + } + // read from buffer + iter := any.iter + if iter == nil { + iter = NewIterator() + any.iter = iter + } + iter.ResetBytes(remaining) + c := iter.nextToken() + if c == ',' { + nextKey = string(iter.readObjectFieldAsBytes()) + nextValue = iter.readAny(iter) + any.cache[nextKey] = nextValue + remaining = iter.buf[iter.head:] + any.remaining = remaining + return key, value, true + } else { + remaining = nil + any.remaining = nil + return key, value, false + } + } + }, true +} + diff --git a/feature_any_string.go b/feature_any_string.go index 45ae405..f478349 100644 --- a/feature_any_string.go +++ b/feature_any_string.go @@ -5,6 +5,7 @@ import ( ) type stringLazyAny struct{ + baseAny buf []byte iter *Iterator err error @@ -95,4 +96,8 @@ func (any *stringLazyAny) ToFloat64() float64 { func (any *stringLazyAny) ToString() string { any.fillCache() return any.cache +} + +func (any *stringLazyAny) Get(path ...interface{}) Any { + return &invalidAny{} } \ No newline at end of file diff --git a/feature_iter_skip.go b/feature_iter_skip.go index 582098f..ec37e72 100644 --- a/feature_iter_skip.go +++ b/feature_iter_skip.go @@ -141,6 +141,7 @@ func (iter *Iterator) skipArray() { } } if !iter.loadMore() { + iter.reportError("skipObject", "incomplete array") return } } @@ -168,6 +169,7 @@ func (iter *Iterator) skipObject() { } } if !iter.loadMore() { + iter.reportError("skipObject", "incomplete object") return } } diff --git a/jsoniter_object_test.go b/jsoniter_object_test.go index 6d13ccd..b421cf2 100644 --- a/jsoniter_object_test.go +++ b/jsoniter_object_test.go @@ -36,27 +36,18 @@ func Test_one_field(t *testing.T) { } func Test_two_field(t *testing.T) { + should := require.New(t) iter := ParseString(`{ "a": "b" , "c": "d" }`) field := iter.ReadObject() - if field != "a" { - t.Fatal(field) - } + should.Equal("a", field) value := iter.ReadString() - if value != "b" { - t.Fatal(field) - } + should.Equal("b", value) field = iter.ReadObject() - if field != "c" { - t.Fatal(field) - } + should.Equal("c", field) value = iter.ReadString() - if value != "d" { - t.Fatal(field) - } + should.Equal("d", value) field = iter.ReadObject() - if field != "" { - t.Fatal(field) - } + should.Equal("", field) iter = ParseString(`{"field1": "1", "field2": 2}`) for field := iter.ReadObject(); field != ""; field = iter.ReadObject() { switch field { @@ -70,6 +61,82 @@ func Test_two_field(t *testing.T) { } } +func Test_read_object_as_any(t *testing.T) { + should := require.New(t) + any, err := UnmarshalAnyFromString(`{"a":"b","c":"d"}`) + should.Nil(err) + should.Equal(`{"a":"b","c":"d"}`, any.ToString()) + // partial parse + should.Equal("b", any.Get("a").ToString()) + should.Equal("d", any.Get("c").ToString()) + should.Equal(2, len(any.Keys())) + any, err = UnmarshalAnyFromString(`{"a":"b","c":"d"}`) + // full parse + should.Equal(2, len(any.Keys())) +} + +func Test_object_any_lazy_iterator(t *testing.T) { + should := require.New(t) + any, err := UnmarshalAnyFromString(`{"a":"b","c":"d"}`) + should.Nil(err) + // iterator parse + vals := map[string]string{} + var k string + var v Any + next, hasNext := any.IterateObject() + should.True(hasNext) + + k, v, hasNext = next() + should.True(hasNext) + vals[k] = v.ToString() + + // trigger full parse + should.Equal(2, len(any.Keys())) + + k, v, hasNext = next() + should.False(hasNext) + vals[k] = v.ToString() + + should.Equal(map[string]string{"a":"b", "c":"d"}, vals) + vals = map[string]string{} + for next, hasNext := any.IterateObject(); hasNext; k, v, hasNext = next() { + vals[k] = v.ToString() + } + should.Equal(map[string]string{"a":"b", "c":"d"}, vals) +} + + +func Test_object_any_with_two_lazy_iterators(t *testing.T) { + should := require.New(t) + any, err := UnmarshalAnyFromString(`{"a":"b","c":"d","e":"f"}`) + should.Nil(err) + var k string + var v Any + next1, hasNext1 := any.IterateObject() + next2, hasNext2 := any.IterateObject() + should.True(hasNext1) + k, v, hasNext1 = next1() + should.True(hasNext1) + should.Equal("a", k) + should.Equal("b", v.ToString()) + + should.True(hasNext2) + k, v, hasNext2 = next2() + should.True(hasNext2) + should.Equal("a", k) + should.Equal("b", v.ToString()) + + k, v, hasNext1 = next1() + should.True(hasNext1) + should.Equal("c", k) + should.Equal("d", v.ToString()) + + k, v, hasNext2 = next2() + should.True(hasNext2) + should.Equal("c", k) + should.Equal("d", v.ToString()) +} + func Test_write_object(t *testing.T) { should := require.New(t) buf := &bytes.Buffer{}