From c44e7c496a1687a442222e937b2755ea8d1f7af9 Mon Sep 17 00:00:00 2001 From: Tao Wen Date: Tue, 6 Dec 2016 14:23:59 +0800 Subject: [PATCH] optimize read string --- jsoniter.go | 47 ++++++++++++++++++++++++++++++++++++++--- jsoniter_object_test.go | 2 ++ jsoniter_string_test.go | 25 +++++++++++++++++++++- 3 files changed, 70 insertions(+), 4 deletions(-) diff --git a/jsoniter.go b/jsoniter.go index 7644515..b1d5ce3 100644 --- a/jsoniter.go +++ b/jsoniter.go @@ -271,7 +271,40 @@ func (iter *Iterator) ReadInt64() (ret int64) { } func (iter *Iterator) ReadString() (ret string) { - str := make([]byte, 0, 8) + return string(iter.ReadStringAsBytes()) +} + +// Tries to find the end of string +// Support if string contains escaped quote symbols. +func stringEnd(data []byte) (int, bool) { + escaped := false + for i, c := range data { + if c == '"' { + if !escaped { + return i + 1, false + } else { + j := i - 1 + for { + if j < 0 || data[j] != '\\' { + return i + 1, true // even number of backslashes + } + j-- + if j < 0 || data[j] != '\\' { + break // odd number of backslashes + } + j-- + + } + } + } else if c == '\\' { + escaped = true + } + } + + return -1, escaped +} + +func (iter *Iterator) ReadStringAsBytes() (ret []byte) { c := iter.readByte() if c == 'n' { iter.skipNull() @@ -281,10 +314,17 @@ func (iter *Iterator) ReadString() (ret string) { iter.ReportError("ReadString", `expects " or n`) return } + end, escaped := stringEnd(iter.buf[iter.head:]) + if end != -1 && !escaped { + ret = iter.buf[iter.head:iter.head+end-1] + iter.head += end + return ret + } + str := make([]byte, 0, 8) for iter.Error == nil { c = iter.readByte() if c == '"' { - return string(str) + return str } if c == '\\' { c = iter.readByte() @@ -543,7 +583,8 @@ func (iter *Iterator) ReadObject() (ret string) { } func (iter *Iterator) readObjectField() (ret string) { - field := iter.ReadString() + str := iter.ReadStringAsBytes() + field := *(*string)(unsafe.Pointer(&str)) if iter.Error != nil { return } diff --git a/jsoniter_object_test.go b/jsoniter_object_test.go index 6b6f9e7..f5db2a5 100644 --- a/jsoniter_object_test.go +++ b/jsoniter_object_test.go @@ -3,6 +3,7 @@ package jsoniter import ( "testing" "encoding/json" + "fmt" ) func Test_empty_object(t *testing.T) { @@ -17,6 +18,7 @@ func Test_one_field(t *testing.T) { iter := ParseString(`{"a": "b"}`) field := iter.ReadObject() if field != "a" { + fmt.Println(iter.Error) t.Fatal(field) } value := iter.ReadString() diff --git a/jsoniter_string_test.go b/jsoniter_string_test.go index f7adefb..bde8f09 100644 --- a/jsoniter_string_test.go +++ b/jsoniter_string_test.go @@ -72,6 +72,18 @@ func Test_string_escape_unicode_with_surrogate(t *testing.T) { } } +func Test_string_as_bytes(t *testing.T) { + iter := Parse(bytes.NewBufferString(`"hello""world"`), 4096) + val := string(iter.ReadStringAsBytes()) + if val != "hello" { + t.Fatal(val) + } + val = string(iter.ReadStringAsBytes()) + if val != "world" { + t.Fatal(val) + } +} + func Benchmark_jsoniter_unicode(b *testing.B) { for n := 0; n < b.N; n++ { iter := ParseString(`"\ud83d\udc4a"`) @@ -80,12 +92,23 @@ func Benchmark_jsoniter_unicode(b *testing.B) { } func Benchmark_jsoniter_ascii(b *testing.B) { + iter := ParseString(`"hello, world!"`) + b.ResetTimer() for n := 0; n < b.N; n++ { - iter := ParseString(`"hello"`) + iter.Reuse(iter.buf) iter.ReadString() } } +func Benchmark_jsoniter_string_as_bytes(b *testing.B) { + iter := ParseString(`"hello, world!"`) + b.ResetTimer() + for n := 0; n < b.N; n++ { + iter.Reuse(iter.buf) + iter.ReadStringAsBytes() + } +} + func Benchmark_json_unicode(b *testing.B) { for n := 0; n < b.N; n++ { result := ""