1
0
mirror of https://github.com/json-iterator/go.git synced 2025-03-20 20:54:55 +02:00

optimize skip

This commit is contained in:
Tao Wen 2016-12-06 23:31:11 +08:00
parent 4863661c51
commit ea435e3473
3 changed files with 236 additions and 130 deletions

View File

@ -303,50 +303,89 @@ func (iter *Iterator) ReadString() (ret string) {
return string(iter.ReadStringAsBytes())
}
// adapted from: https://github.com/buger/jsonparser/blob/master/parser.go
// Tries to find the end of string
// Support if string contains escaped quote symbols.
func stringEnd(data []byte) (int, bool) {
func (iter *Iterator) findStringEnd() (int, bool) {
escaped := false
for i, c := range data {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
if c == '"' {
if !escaped {
return i + 1, false
} else {
j := i - 1
for {
if j < 0 || data[j] != '\\' {
return i + 1, true // even number of backslashes
if j < iter.head || iter.buf[j] != '\\' {
// even number of backslashes
// either end of buffer, or " found
return i + 1, true
}
j--
if j < 0 || data[j] != '\\' {
break // odd number of backslashes
if j < iter.head || iter.buf[j] != '\\' {
// odd number of backslashes
// it is \" or \\\"
break
}
j--
}
}
} else if c == '\\' {
escaped = true
}
}
j := iter.tail - 1
for {
if j < iter.head || iter.buf[j] != '\\' {
// even number of backslashes
// either end of buffer, or " found
return -1, false // do not end with \
}
j--
if j < iter.head || iter.buf[j] != '\\' {
// odd number of backslashes
// it is \" or \\\"
break
}
j--
return -1, escaped
}
return -1, true // end with \
}
func (iter *Iterator) skipUntilBreak() {
// true, false, null, number
for {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case ' ', '\n', '\r', '\t', ',', '}', ']':
iter.head = i
return
}
}
if (!iter.loadMore()) {
return
}
}
}
func (iter *Iterator) ReadStringAsBytes() (ret []byte) {
c := iter.readByte()
if c == 'n' {
iter.skipNull()
iter.skipUntilBreak()
return
}
if c != '"' {
iter.ReportError("ReadString", `expects " or n`)
return
}
end, escaped := stringEnd(iter.buf[iter.head:iter.tail])
end, escaped := iter.findStringEnd()
if end != -1 && !escaped {
ret = iter.buf[iter.head:iter.head+end-1]
iter.head += end
// fast path: reuse the underlying buffer
ret = iter.buf[iter.head:end-1]
iter.head = end
return ret
}
str := make([]byte, 0, 8)
@ -506,7 +545,7 @@ func (iter *Iterator) ReadArray() (ret bool) {
}
switch c {
case 'n': {
iter.skipNull()
iter.skipUntilBreak()
return false // null
}
case '[': {
@ -534,7 +573,7 @@ func (iter *Iterator) ReadArray() (ret bool) {
func (iter *Iterator) ReadArrayCB(cb func()) {
c := iter.nextToken()
if c == 'n' {
iter.skipNull()
iter.skipUntilBreak()
return // null
}
if c != '[' {
@ -567,7 +606,7 @@ func (iter *Iterator) ReadArrayCB(cb func()) {
func (iter *Iterator) ReadObjectCB(cb func(string)) {
c := iter.nextToken()
if c == 'n' {
iter.skipNull()
iter.skipUntilBreak()
return // null
}
if c != '{' {
@ -605,7 +644,7 @@ func (iter *Iterator) ReadObject() (ret string) {
}
switch c {
case 'n': {
iter.skipNull()
iter.skipUntilBreak()
if iter.Error != nil {
return
}
@ -703,13 +742,13 @@ func (iter *Iterator) ReadBool() (ret bool) {
}
switch c {
case 't':
iter.skipTrue()
iter.skipUntilBreak()
if iter.Error != nil {
return
}
return true
case 'f':
iter.skipFalse()
iter.skipUntilBreak()
if iter.Error != nil {
return
}
@ -720,84 +759,27 @@ func (iter *Iterator) ReadBool() (ret bool) {
}
}
func (iter *Iterator) skipTrue() {
for {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case 'r', 'u', 'e':
continue
}
iter.head = i
return
}
if !iter.loadMore() {
return
}
}
}
func (iter *Iterator) skipFalse() {
for {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case 'a', 'l', 's', 'e':
continue
}
iter.head = i
return
}
if !iter.loadMore() {
return
}
}
}
func (iter *Iterator) ReadNull() (ret bool) {
c := iter.readByte()
if c == 'n' {
iter.skipNull()
iter.skipUntilBreak()
return true
}
iter.unreadByte()
return false
}
func (iter *Iterator) skipNull() {
for {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case 'u', 'l':
continue
}
iter.head = i
return
}
if !iter.loadMore() {
return
}
}
}
func (iter *Iterator) Skip() {
c := iter.readByte()
switch c {
case '"':
iter.skipString()
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
iter.skipNumber()
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n':
iter.skipUntilBreak()
case '[':
iter.skipArray()
case '{':
iter.skipObject()
case 't':
iter.skipTrue()
case 'f':
iter.skipFalse()
case 'n':
iter.skipNull()
default:
iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
return
@ -805,42 +787,17 @@ func (iter *Iterator) Skip() {
}
func (iter *Iterator) skipString() {
escaped := false
for {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case '"':
if escaped {
escaped = false
} else {
iter.head = i+1
return
}
case '\\':
escaped = !escaped
default:
escaped= false
end, escaped := iter.findStringEnd()
if end == -1 {
if !iter.loadMore() {
return
}
}
if !iter.loadMore() {
return
}
}
}
func (iter *Iterator) skipNumber() {
for {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
continue
if escaped {
iter.head = 1 // skip the first char as last char read is \
}
iter.head = i
return
}
if !iter.loadMore() {
} else {
iter.head = end
return
}
}

138
jsoniter_find_end_test.go Normal file
View File

@ -0,0 +1,138 @@
package jsoniter
import (
"testing"
"io"
)
func Test_string_end(t *testing.T) {
end, escaped := ParseString(`abc"`).findStringEnd()
if end != 4 {
t.Fatal(end)
}
if escaped != false {
t.Fatal(escaped)
}
end, escaped = ParseString(`abc\\"`).findStringEnd()
if end != 6 {
t.Fatal(end)
}
if escaped != true {
t.Fatal(escaped)
}
end, escaped = ParseString(`abc\\\\"`).findStringEnd()
if end != 8 {
t.Fatal(end)
}
if escaped != true {
t.Fatal(escaped)
}
end, escaped = ParseString(`abc\"`).findStringEnd()
if end != -1 {
t.Fatal(end)
}
if escaped != false {
t.Fatal(escaped)
}
end, escaped = ParseString(`abc\`).findStringEnd()
if end != -1 {
t.Fatal(end)
}
if escaped != true {
t.Fatal(escaped)
}
end, escaped = ParseString(`abc\\`).findStringEnd()
if end != -1 {
t.Fatal(end)
}
if escaped != false {
t.Fatal(escaped)
}
end, escaped = ParseString(`\\`).findStringEnd()
if end != -1 {
t.Fatal(end)
}
if escaped != false {
t.Fatal(escaped)
}
end, escaped = ParseString(`\`).findStringEnd()
if end != -1 {
t.Fatal(end)
}
if escaped != true {
t.Fatal(escaped)
}
}
type StagedReader struct {
r1 string
r2 string
r3 string
r int
}
func (reader *StagedReader) Read(p []byte) (n int, err error) {
reader.r++
switch reader.r {
case 1:
copy(p, []byte(reader.r1))
return len(reader.r1), nil
case 2:
copy(p, []byte(reader.r2))
return len(reader.r2), nil
case 3:
copy(p, []byte(reader.r3))
return len(reader.r3), nil
default:
return 0, io.EOF
}
}
func Test_skip_string(t *testing.T) {
iter := ParseString(`"abc`)
iter.skipString()
if iter.head != 1 {
t.Fatal(iter.head)
}
iter = ParseString(`\""abc`)
iter.skipString()
if iter.head != 3 {
t.Fatal(iter.head)
}
reader := &StagedReader{
r1: `abc`,
r2: `"`,
}
iter = Parse(reader, 4096)
iter.skipString()
if iter.head != 1 {
t.Fatal(iter.head)
}
reader = &StagedReader{
r1: `abc`,
r2: `1"`,
}
iter = Parse(reader, 4096)
iter.skipString()
if iter.head != 2 {
t.Fatal(iter.head)
}
reader = &StagedReader{
r1: `abc\`,
r2: `"`,
}
iter = Parse(reader, 4096)
iter.skipString()
if iter.Error != io.EOF {
t.Fatal(iter.Error)
}
reader = &StagedReader{
r1: `abc\`,
r2: `""`,
}
iter = Parse(reader, 4096)
iter.skipString()
if iter.head != 2 {
t.Fatal(iter.head)
}
}

View File

@ -5,25 +5,6 @@ import (
"encoding/json"
)
func Test_skip_string(t *testing.T) {
iter := ParseString(`["a", "b"]`)
iter.ReadArray()
iter.Skip()
iter.ReadArray()
if iter.ReadString() != "b" {
t.FailNow()
}
}
func Test_skip_string_with_escape(t *testing.T) {
iter := ParseString(`["a\"", "b"]`)
iter.ReadArray()
iter.Skip()
iter.ReadArray()
if iter.ReadString() != "b" {
t.FailNow()
}
}
func Test_skip_number(t *testing.T) {
iter := ParseString(`[-0.12, "b"]`)
@ -35,6 +16,36 @@ func Test_skip_number(t *testing.T) {
}
}
func Test_skip_null(t *testing.T) {
iter := ParseString(`[null , "b"]`)
iter.ReadArray()
iter.Skip()
iter.ReadArray()
if iter.ReadString() != "b" {
t.FailNow()
}
}
func Test_skip_true(t *testing.T) {
iter := ParseString(`[true , "b"]`)
iter.ReadArray()
iter.Skip()
iter.ReadArray()
if iter.ReadString() != "b" {
t.FailNow()
}
}
func Test_skip_false(t *testing.T) {
iter := ParseString(`[false , "b"]`)
iter.ReadArray()
iter.Skip()
iter.ReadArray()
if iter.ReadString() != "b" {
t.FailNow()
}
}
func Test_skip_array(t *testing.T) {
iter := ParseString(`[[1, [2, [3], 4]], "b"]`)
iter.ReadArray()