You've already forked json-iterator
mirror of
https://github.com/json-iterator/go.git
synced 2025-07-15 23:54:21 +02:00
#137 fix unicode surrogate incompatibility
This commit is contained in:
@ -42,58 +42,7 @@ func (iter *Iterator) readStringSlowPath() (ret string) {
|
|||||||
}
|
}
|
||||||
if c == '\\' {
|
if c == '\\' {
|
||||||
c = iter.readByte()
|
c = iter.readByte()
|
||||||
switch c {
|
str = iter.readEscapedChar(c, str)
|
||||||
case 'u', 'U':
|
|
||||||
r := iter.readU4()
|
|
||||||
if utf16.IsSurrogate(r) {
|
|
||||||
c = iter.readByte()
|
|
||||||
if iter.Error != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if c != '\\' {
|
|
||||||
iter.ReportError("ReadString",
|
|
||||||
`expects \u after utf16 surrogate, but \ not found`)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
c = iter.readByte()
|
|
||||||
if iter.Error != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if c != 'u' && c != 'U' {
|
|
||||||
iter.ReportError("ReadString",
|
|
||||||
`expects \u after utf16 surrogate, but \u not found`)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
r2 := iter.readU4()
|
|
||||||
if iter.Error != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
combined := utf16.DecodeRune(r, r2)
|
|
||||||
str = appendRune(str, combined)
|
|
||||||
} else {
|
|
||||||
str = appendRune(str, r)
|
|
||||||
}
|
|
||||||
case '"':
|
|
||||||
str = append(str, '"')
|
|
||||||
case '\\':
|
|
||||||
str = append(str, '\\')
|
|
||||||
case '/':
|
|
||||||
str = append(str, '/')
|
|
||||||
case 'b':
|
|
||||||
str = append(str, '\b')
|
|
||||||
case 'f':
|
|
||||||
str = append(str, '\f')
|
|
||||||
case 'n':
|
|
||||||
str = append(str, '\n')
|
|
||||||
case 'r':
|
|
||||||
str = append(str, '\r')
|
|
||||||
case 't':
|
|
||||||
str = append(str, '\t')
|
|
||||||
default:
|
|
||||||
iter.ReportError("ReadString",
|
|
||||||
`invalid escape char after \`)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
str = append(str, c)
|
str = append(str, c)
|
||||||
}
|
}
|
||||||
@ -102,6 +51,66 @@ func (iter *Iterator) readStringSlowPath() (ret string) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
|
||||||
|
switch c {
|
||||||
|
case 'u':
|
||||||
|
r := iter.readU4()
|
||||||
|
if utf16.IsSurrogate(r) {
|
||||||
|
c = iter.readByte()
|
||||||
|
if iter.Error != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if c != '\\' {
|
||||||
|
iter.unreadByte()
|
||||||
|
str = appendRune(str, r)
|
||||||
|
return str
|
||||||
|
}
|
||||||
|
c = iter.readByte()
|
||||||
|
if iter.Error != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if c != 'u' {
|
||||||
|
str = appendRune(str, r)
|
||||||
|
return iter.readEscapedChar(c, str)
|
||||||
|
}
|
||||||
|
r2 := iter.readU4()
|
||||||
|
if iter.Error != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
combined := utf16.DecodeRune(r, r2)
|
||||||
|
if combined == '\uFFFD' {
|
||||||
|
str = appendRune(str, r)
|
||||||
|
str = appendRune(str, r2)
|
||||||
|
} else {
|
||||||
|
str = appendRune(str, combined)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
str = appendRune(str, r)
|
||||||
|
}
|
||||||
|
case '"':
|
||||||
|
str = append(str, '"')
|
||||||
|
case '\\':
|
||||||
|
str = append(str, '\\')
|
||||||
|
case '/':
|
||||||
|
str = append(str, '/')
|
||||||
|
case 'b':
|
||||||
|
str = append(str, '\b')
|
||||||
|
case 'f':
|
||||||
|
str = append(str, '\f')
|
||||||
|
case 'n':
|
||||||
|
str = append(str, '\n')
|
||||||
|
case 'r':
|
||||||
|
str = append(str, '\r')
|
||||||
|
case 't':
|
||||||
|
str = append(str, '\t')
|
||||||
|
default:
|
||||||
|
iter.ReportError("ReadString",
|
||||||
|
`invalid escape char after \`)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return str
|
||||||
|
}
|
||||||
|
|
||||||
// ReadStringAsSlice read string from iterator without copying into string form.
|
// ReadStringAsSlice read string from iterator without copying into string form.
|
||||||
// The []byte can not be kept, as it will change after next iterator call.
|
// The []byte can not be kept, as it will change after next iterator call.
|
||||||
func (iter *Iterator) ReadStringAsSlice() (ret []byte) {
|
func (iter *Iterator) ReadStringAsSlice() (ret []byte) {
|
||||||
|
@ -19,6 +19,8 @@ func Test_read_string(t *testing.T) {
|
|||||||
`"\"`,
|
`"\"`,
|
||||||
`"\\\"`,
|
`"\\\"`,
|
||||||
"\"\n\"",
|
"\"\n\"",
|
||||||
|
`"\U0001f64f"`,
|
||||||
|
`"\uD83D\u00"`,
|
||||||
}
|
}
|
||||||
for i := 0; i < 32; i++ {
|
for i := 0; i < 32; i++ {
|
||||||
// control characters are invalid
|
// control characters are invalid
|
||||||
@ -39,6 +41,11 @@ func Test_read_string(t *testing.T) {
|
|||||||
{`"a"`, "a"},
|
{`"a"`, "a"},
|
||||||
{`null`, ""},
|
{`null`, ""},
|
||||||
{`"Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"`, "Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"},
|
{`"Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"`, "Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"},
|
||||||
|
{`"\uD83D"`, string([]byte{239, 191, 189})},
|
||||||
|
{`"\uD83D\\"`, string([]byte{239, 191, 189, '\\'})},
|
||||||
|
{`"\uD83D\ub000"`, string([]byte{239, 191, 189, 235, 128, 128})},
|
||||||
|
{`"\uD83D\ude04"`, "😄"},
|
||||||
|
{`"\uDEADBEEF"`, string([]byte{239, 191, 189, 66, 69, 69, 70})},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range goodInputs {
|
for _, tc := range goodInputs {
|
||||||
@ -111,7 +118,9 @@ func Test_read_exotic_string(t *testing.T) {
|
|||||||
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
|
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
|
||||||
should := require.New(t)
|
should := require.New(t)
|
||||||
iter := ParseString(ConfigDefault, input)
|
iter := ParseString(ConfigDefault, input)
|
||||||
should.Equal(output, iter.ReadString())
|
var v string
|
||||||
|
should.Nil(json.Unmarshal([]byte(input), &v))
|
||||||
|
should.Equal(v, iter.ReadString())
|
||||||
})
|
})
|
||||||
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
|
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
|
||||||
should := require.New(t)
|
should := require.New(t)
|
||||||
|
Reference in New Issue
Block a user