From ea435e34738757d174dd0991ff385fcbef9a0bf8 Mon Sep 17 00:00:00 2001 From: Tao Wen Date: Tue, 6 Dec 2016 23:31:11 +0800 Subject: [PATCH] optimize skip --- jsoniter.go | 179 +++++++++++++++----------------------- jsoniter_find_end_test.go | 138 +++++++++++++++++++++++++++++ jsoniter_skip_test.go | 49 +++++++---- 3 files changed, 236 insertions(+), 130 deletions(-) create mode 100644 jsoniter_find_end_test.go diff --git a/jsoniter.go b/jsoniter.go index 9655ed7..92f5a2f 100644 --- a/jsoniter.go +++ b/jsoniter.go @@ -303,50 +303,89 @@ func (iter *Iterator) ReadString() (ret string) { return string(iter.ReadStringAsBytes()) } +// adapted from: https://github.com/buger/jsonparser/blob/master/parser.go // Tries to find the end of string // Support if string contains escaped quote symbols. -func stringEnd(data []byte) (int, bool) { +func (iter *Iterator) findStringEnd() (int, bool) { escaped := false - for i, c := range data { + for i := iter.head; i < iter.tail; i++ { + c := iter.buf[i] if c == '"' { if !escaped { return i + 1, false } else { j := i - 1 for { - if j < 0 || data[j] != '\\' { - return i + 1, true // even number of backslashes + if j < iter.head || iter.buf[j] != '\\' { + // even number of backslashes + // either end of buffer, or " found + return i + 1, true } j-- - if j < 0 || data[j] != '\\' { - break // odd number of backslashes + if j < iter.head || iter.buf[j] != '\\' { + // odd number of backslashes + // it is \" or \\\" + break } j-- - } } } else if c == '\\' { escaped = true } } + j := iter.tail - 1 + for { + if j < iter.head || iter.buf[j] != '\\' { + // even number of backslashes + // either end of buffer, or " found + return -1, false // do not end with \ + } + j-- + if j < iter.head || iter.buf[j] != '\\' { + // odd number of backslashes + // it is \" or \\\" + break + } + j-- - return -1, escaped + } + return -1, true // end with \ +} + + +func (iter *Iterator) skipUntilBreak() { + // true, false, null, number + for { + for i := iter.head; i < iter.tail; i++ { + c := iter.buf[i] + switch c { + case ' ', '\n', '\r', '\t', ',', '}', ']': + iter.head = i + return + } + } + if (!iter.loadMore()) { + return + } + } } func (iter *Iterator) ReadStringAsBytes() (ret []byte) { c := iter.readByte() if c == 'n' { - iter.skipNull() + iter.skipUntilBreak() return } if c != '"' { iter.ReportError("ReadString", `expects " or n`) return } - end, escaped := stringEnd(iter.buf[iter.head:iter.tail]) + end, escaped := iter.findStringEnd() if end != -1 && !escaped { - ret = iter.buf[iter.head:iter.head+end-1] - iter.head += end + // fast path: reuse the underlying buffer + ret = iter.buf[iter.head:end-1] + iter.head = end return ret } str := make([]byte, 0, 8) @@ -506,7 +545,7 @@ func (iter *Iterator) ReadArray() (ret bool) { } switch c { case 'n': { - iter.skipNull() + iter.skipUntilBreak() return false // null } case '[': { @@ -534,7 +573,7 @@ func (iter *Iterator) ReadArray() (ret bool) { func (iter *Iterator) ReadArrayCB(cb func()) { c := iter.nextToken() if c == 'n' { - iter.skipNull() + iter.skipUntilBreak() return // null } if c != '[' { @@ -567,7 +606,7 @@ func (iter *Iterator) ReadArrayCB(cb func()) { func (iter *Iterator) ReadObjectCB(cb func(string)) { c := iter.nextToken() if c == 'n' { - iter.skipNull() + iter.skipUntilBreak() return // null } if c != '{' { @@ -605,7 +644,7 @@ func (iter *Iterator) ReadObject() (ret string) { } switch c { case 'n': { - iter.skipNull() + iter.skipUntilBreak() if iter.Error != nil { return } @@ -703,13 +742,13 @@ func (iter *Iterator) ReadBool() (ret bool) { } switch c { case 't': - iter.skipTrue() + iter.skipUntilBreak() if iter.Error != nil { return } return true case 'f': - iter.skipFalse() + iter.skipUntilBreak() if iter.Error != nil { return } @@ -720,84 +759,27 @@ func (iter *Iterator) ReadBool() (ret bool) { } } -func (iter *Iterator) skipTrue() { - for { - for i := iter.head; i < iter.tail; i++ { - c := iter.buf[i] - switch c { - case 'r', 'u', 'e': - continue - } - iter.head = i - return - } - if !iter.loadMore() { - return - } - } -} - -func (iter *Iterator) skipFalse() { - for { - for i := iter.head; i < iter.tail; i++ { - c := iter.buf[i] - switch c { - case 'a', 'l', 's', 'e': - continue - } - iter.head = i - return - } - if !iter.loadMore() { - return - } - } -} - func (iter *Iterator) ReadNull() (ret bool) { c := iter.readByte() if c == 'n' { - iter.skipNull() + iter.skipUntilBreak() return true } iter.unreadByte() return false } -func (iter *Iterator) skipNull() { - for { - for i := iter.head; i < iter.tail; i++ { - c := iter.buf[i] - switch c { - case 'u', 'l': - continue - } - iter.head = i - return - } - if !iter.loadMore() { - return - } - } -} - func (iter *Iterator) Skip() { c := iter.readByte() switch c { case '"': iter.skipString() - case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - iter.skipNumber() + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n': + iter.skipUntilBreak() case '[': iter.skipArray() case '{': iter.skipObject() - case 't': - iter.skipTrue() - case 'f': - iter.skipFalse() - case 'n': - iter.skipNull() default: iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c)) return @@ -805,42 +787,17 @@ func (iter *Iterator) Skip() { } func (iter *Iterator) skipString() { - escaped := false for { - for i := iter.head; i < iter.tail; i++ { - c := iter.buf[i] - switch c { - case '"': - if escaped { - escaped = false - } else { - iter.head = i+1 - return - } - case '\\': - escaped = !escaped - default: - escaped= false + end, escaped := iter.findStringEnd() + if end == -1 { + if !iter.loadMore() { + return } - } - if !iter.loadMore() { - return - } - } -} - -func (iter *Iterator) skipNumber() { - for { - for i := iter.head; i < iter.tail; i++ { - c := iter.buf[i] - switch c { - case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - continue + if escaped { + iter.head = 1 // skip the first char as last char read is \ } - iter.head = i - return - } - if !iter.loadMore() { + } else { + iter.head = end return } } diff --git a/jsoniter_find_end_test.go b/jsoniter_find_end_test.go new file mode 100644 index 0000000..a7e328a --- /dev/null +++ b/jsoniter_find_end_test.go @@ -0,0 +1,138 @@ +package jsoniter + +import ( + "testing" + "io" +) + +func Test_string_end(t *testing.T) { + end, escaped := ParseString(`abc"`).findStringEnd() + if end != 4 { + t.Fatal(end) + } + if escaped != false { + t.Fatal(escaped) + } + end, escaped = ParseString(`abc\\"`).findStringEnd() + if end != 6 { + t.Fatal(end) + } + if escaped != true { + t.Fatal(escaped) + } + end, escaped = ParseString(`abc\\\\"`).findStringEnd() + if end != 8 { + t.Fatal(end) + } + if escaped != true { + t.Fatal(escaped) + } + end, escaped = ParseString(`abc\"`).findStringEnd() + if end != -1 { + t.Fatal(end) + } + if escaped != false { + t.Fatal(escaped) + } + end, escaped = ParseString(`abc\`).findStringEnd() + if end != -1 { + t.Fatal(end) + } + if escaped != true { + t.Fatal(escaped) + } + end, escaped = ParseString(`abc\\`).findStringEnd() + if end != -1 { + t.Fatal(end) + } + if escaped != false { + t.Fatal(escaped) + } + end, escaped = ParseString(`\\`).findStringEnd() + if end != -1 { + t.Fatal(end) + } + if escaped != false { + t.Fatal(escaped) + } + end, escaped = ParseString(`\`).findStringEnd() + if end != -1 { + t.Fatal(end) + } + if escaped != true { + t.Fatal(escaped) + } +} + +type StagedReader struct { + r1 string + r2 string + r3 string + r int +} + +func (reader *StagedReader) Read(p []byte) (n int, err error) { + reader.r++ + switch reader.r { + case 1: + copy(p, []byte(reader.r1)) + return len(reader.r1), nil + case 2: + copy(p, []byte(reader.r2)) + return len(reader.r2), nil + case 3: + copy(p, []byte(reader.r3)) + return len(reader.r3), nil + default: + return 0, io.EOF + } +} + +func Test_skip_string(t *testing.T) { + iter := ParseString(`"abc`) + iter.skipString() + if iter.head != 1 { + t.Fatal(iter.head) + } + iter = ParseString(`\""abc`) + iter.skipString() + if iter.head != 3 { + t.Fatal(iter.head) + } + reader := &StagedReader{ + r1: `abc`, + r2: `"`, + } + iter = Parse(reader, 4096) + iter.skipString() + if iter.head != 1 { + t.Fatal(iter.head) + } + reader = &StagedReader{ + r1: `abc`, + r2: `1"`, + } + iter = Parse(reader, 4096) + iter.skipString() + if iter.head != 2 { + t.Fatal(iter.head) + } + reader = &StagedReader{ + r1: `abc\`, + r2: `"`, + } + iter = Parse(reader, 4096) + iter.skipString() + if iter.Error != io.EOF { + t.Fatal(iter.Error) + } + reader = &StagedReader{ + r1: `abc\`, + r2: `""`, + } + iter = Parse(reader, 4096) + iter.skipString() + if iter.head != 2 { + t.Fatal(iter.head) + } +} diff --git a/jsoniter_skip_test.go b/jsoniter_skip_test.go index b5eb8cd..4070cc2 100644 --- a/jsoniter_skip_test.go +++ b/jsoniter_skip_test.go @@ -5,25 +5,6 @@ import ( "encoding/json" ) -func Test_skip_string(t *testing.T) { - iter := ParseString(`["a", "b"]`) - iter.ReadArray() - iter.Skip() - iter.ReadArray() - if iter.ReadString() != "b" { - t.FailNow() - } -} - -func Test_skip_string_with_escape(t *testing.T) { - iter := ParseString(`["a\"", "b"]`) - iter.ReadArray() - iter.Skip() - iter.ReadArray() - if iter.ReadString() != "b" { - t.FailNow() - } -} func Test_skip_number(t *testing.T) { iter := ParseString(`[-0.12, "b"]`) @@ -35,6 +16,36 @@ func Test_skip_number(t *testing.T) { } } +func Test_skip_null(t *testing.T) { + iter := ParseString(`[null , "b"]`) + iter.ReadArray() + iter.Skip() + iter.ReadArray() + if iter.ReadString() != "b" { + t.FailNow() + } +} + +func Test_skip_true(t *testing.T) { + iter := ParseString(`[true , "b"]`) + iter.ReadArray() + iter.Skip() + iter.ReadArray() + if iter.ReadString() != "b" { + t.FailNow() + } +} + +func Test_skip_false(t *testing.T) { + iter := ParseString(`[false , "b"]`) + iter.ReadArray() + iter.Skip() + iter.ReadArray() + if iter.ReadString() != "b" { + t.FailNow() + } +} + func Test_skip_array(t *testing.T) { iter := ParseString(`[[1, [2, [3], 4]], "b"]`) iter.ReadArray()