diff --git a/feature_iter_object.go b/feature_iter_object.go index 242d414..d1e1fd5 100644 --- a/feature_iter_object.go +++ b/feature_iter_object.go @@ -55,7 +55,7 @@ func (iter *Iterator) readObjectStart() bool { } func (iter *Iterator) readObjectField() (ret string) { - str := iter.readStringAsBytes() + str := iter.ReadStringAsSlice() if iter.skipWhitespacesWithoutLoadMore() { if ret == "" { ret = string(str) diff --git a/feature_iter_string.go b/feature_iter_string.go index 748c5a9..da96a26 100644 --- a/feature_iter_string.go +++ b/feature_iter_string.go @@ -27,6 +27,15 @@ func (iter *Iterator) ReadString() (ret string) { copied[j] = c j++ } + if i == iter.tail { + if iter.loadMore() { + i = iter.head + continue + } else { + iter.reportError("ReadString", "incomplete string") + return + } + } iter.head = i if j == len(copied) { newBuf := make([]byte, len(copied) * 2) @@ -108,22 +117,23 @@ func (iter *Iterator) readStringSlowPath(str []byte) (ret string) { return } -func (iter *Iterator) readStringAsBytes() (ret []byte) { +func (iter *Iterator) ReadStringAsSlice() (ret []byte) { c := iter.nextToken() if c == '"' { for i := iter.head; i < iter.tail; i++ { - c := iter.buf[i] - if c == '"' { + // require ascii string and no escape + // for: field name, base64, number + if iter.buf[i] == '"' { // fast path: reuse the underlying buffer ret = iter.buf[iter.head : i] iter.head = i + 1 return ret } } - iter.head = iter.tail readLen := iter.tail - iter.head copied := make([]byte, readLen, readLen * 2) copy(copied, iter.buf[iter.head:iter.tail]) + iter.head = iter.tail for iter.Error == nil { c := iter.readByte() if c == '"' { @@ -131,7 +141,7 @@ func (iter *Iterator) readStringAsBytes() (ret []byte) { } copied = append(copied, c) } - return + return copied } if c == 'n' { iter.skipUntilBreak() diff --git a/iterator.go b/iterator.go index 6fcfa56..39be666 100644 --- a/iterator.go +++ b/iterator.go @@ -275,7 +275,7 @@ func (iter *Iterator) ReadBool() (ret bool) { // ReadBase64 reads a json object as Base64 in byte slice func (iter *Iterator) ReadBase64() (ret []byte) { - src := iter.readStringAsBytes() + src := iter.ReadStringAsSlice() if iter.Error != nil { return } diff --git a/jsoniter_string_test.go b/jsoniter_string_test.go index e139b5c..6834e36 100644 --- a/jsoniter_string_test.go +++ b/jsoniter_string_test.go @@ -5,89 +5,57 @@ import ( "encoding/json" "testing" "github.com/json-iterator/go/require" + "fmt" ) -func Test_read_large_string(t *testing.T) { - should := require.New(t) - iter := ParseString(`"0123456789012345678901234567890123456789"`) - should.Equal("0123456789012345678901234567890123456789", iter.ReadString()) -} - -func Test_decode_string_empty(t *testing.T) { - iter := Parse(bytes.NewBufferString(`""`), 4096) - val := iter.ReadString() - if iter.Error != nil { - t.Fatal(iter.Error) +func Test_read_normal_string(t *testing.T) { + cases := map[string]string{ + `"0123456789012345678901234567890123456789"`: `0123456789012345678901234567890123456789`, + `""`: ``, + `"hello"`: `hello`, } - if val != "" { - t.Fatal(val) + for input, output := range cases { + t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) { + should := require.New(t) + iter := ParseString(input) + should.Equal(output, iter.ReadString()) + }) + t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) { + should := require.New(t) + iter := Parse(bytes.NewBufferString(input), 2) + should.Equal(output, iter.ReadString()) + }) + t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) { + should := require.New(t) + iter := ParseString(input) + should.Equal(output, string(iter.ReadStringAsSlice())) + }) + t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) { + should := require.New(t) + iter := Parse(bytes.NewBufferString(input), 2) + should.Equal(output, string(iter.ReadStringAsSlice())) + }) } } -func Test_decode_string_hello(t *testing.T) { - iter := Parse(bytes.NewBufferString(`"hello"`), 4096) - val := iter.ReadString() - if iter.Error != nil { - t.Fatal(iter.Error) +func Test_read_exotic_string(t *testing.T) { + cases := map[string]string{ + `"hel\"lo"`: `hel"lo`, + `"hel\nlo"`: "hel\nlo", + `"\u4e2d\u6587"`: "中文", + `"\ud83d\udc4a"`: "\xf0\x9f\x91\x8a", // surrogate } - if val != "hello" { - t.Fatal(val) - } -} - -func Test_decode_string_escape_quote(t *testing.T) { - iter := Parse(bytes.NewBufferString(`"hel\"lo"`), 4096) - val := iter.ReadString() - if iter.Error != nil { - t.Fatal(iter.Error) - } - if val != `hel"lo` { - t.Fatal(val) - } -} - -func Test_decode_string_escape_newline(t *testing.T) { - iter := Parse(bytes.NewBufferString(`"hel\nlo"`), 4096) - val := iter.ReadString() - if iter.Error != nil { - t.Fatal(iter.Error) - } - if val != "hel\nlo" { - t.Fatal(val) - } -} - -func Test_decode_string_escape_unicode(t *testing.T) { - iter := Parse(bytes.NewBufferString(`"\u4e2d\u6587"`), 4096) - val := iter.ReadString() - if iter.Error != nil { - t.Fatal(iter.Error) - } - if val != "中文" { - t.Fatal(val) - } -} - -func Test_decode_string_escape_unicode_with_surrogate(t *testing.T) { - iter := Parse(bytes.NewBufferString(`"\ud83d\udc4a"`), 4096) - val := iter.ReadString() - if iter.Error != nil { - t.Fatal(iter.Error) - } - if val != "\xf0\x9f\x91\x8a" { - t.Fatal(val) - } -} - -func Test_decode_string_as_bytes(t *testing.T) { - iter := Parse(bytes.NewBufferString(`"hello""world"`), 4096) - val := string(iter.readStringAsBytes()) - if val != "hello" { - t.Fatal(val) - } - val = string(iter.readStringAsBytes()) - if val != "world" { - t.Fatal(val) + for input, output := range cases { + t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) { + should := require.New(t) + iter := ParseString(input) + should.Equal(output, iter.ReadString()) + }) + t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) { + should := require.New(t) + iter := Parse(bytes.NewBufferString(input), 2) + should.Equal(output, iter.ReadString()) + }) } } @@ -133,7 +101,7 @@ func Benchmark_jsoniter_string_as_bytes(b *testing.B) { b.ResetTimer() for n := 0; n < b.N; n++ { iter.ResetBytes(iter.buf) - iter.readStringAsBytes() + iter.ReadStringAsSlice() } }