You've already forked json-iterator
mirror of
https://github.com/json-iterator/go.git
synced 2025-07-15 23:54:21 +02:00
#137 fix unicode surrogate incompatibility
This commit is contained in:
@ -42,58 +42,7 @@ func (iter *Iterator) readStringSlowPath() (ret string) {
|
||||
}
|
||||
if c == '\\' {
|
||||
c = iter.readByte()
|
||||
switch c {
|
||||
case 'u', 'U':
|
||||
r := iter.readU4()
|
||||
if utf16.IsSurrogate(r) {
|
||||
c = iter.readByte()
|
||||
if iter.Error != nil {
|
||||
return
|
||||
}
|
||||
if c != '\\' {
|
||||
iter.ReportError("ReadString",
|
||||
`expects \u after utf16 surrogate, but \ not found`)
|
||||
return
|
||||
}
|
||||
c = iter.readByte()
|
||||
if iter.Error != nil {
|
||||
return
|
||||
}
|
||||
if c != 'u' && c != 'U' {
|
||||
iter.ReportError("ReadString",
|
||||
`expects \u after utf16 surrogate, but \u not found`)
|
||||
return
|
||||
}
|
||||
r2 := iter.readU4()
|
||||
if iter.Error != nil {
|
||||
return
|
||||
}
|
||||
combined := utf16.DecodeRune(r, r2)
|
||||
str = appendRune(str, combined)
|
||||
} else {
|
||||
str = appendRune(str, r)
|
||||
}
|
||||
case '"':
|
||||
str = append(str, '"')
|
||||
case '\\':
|
||||
str = append(str, '\\')
|
||||
case '/':
|
||||
str = append(str, '/')
|
||||
case 'b':
|
||||
str = append(str, '\b')
|
||||
case 'f':
|
||||
str = append(str, '\f')
|
||||
case 'n':
|
||||
str = append(str, '\n')
|
||||
case 'r':
|
||||
str = append(str, '\r')
|
||||
case 't':
|
||||
str = append(str, '\t')
|
||||
default:
|
||||
iter.ReportError("ReadString",
|
||||
`invalid escape char after \`)
|
||||
return
|
||||
}
|
||||
str = iter.readEscapedChar(c, str)
|
||||
} else {
|
||||
str = append(str, c)
|
||||
}
|
||||
@ -102,6 +51,66 @@ func (iter *Iterator) readStringSlowPath() (ret string) {
|
||||
return
|
||||
}
|
||||
|
||||
func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
|
||||
switch c {
|
||||
case 'u':
|
||||
r := iter.readU4()
|
||||
if utf16.IsSurrogate(r) {
|
||||
c = iter.readByte()
|
||||
if iter.Error != nil {
|
||||
return nil
|
||||
}
|
||||
if c != '\\' {
|
||||
iter.unreadByte()
|
||||
str = appendRune(str, r)
|
||||
return str
|
||||
}
|
||||
c = iter.readByte()
|
||||
if iter.Error != nil {
|
||||
return nil
|
||||
}
|
||||
if c != 'u' {
|
||||
str = appendRune(str, r)
|
||||
return iter.readEscapedChar(c, str)
|
||||
}
|
||||
r2 := iter.readU4()
|
||||
if iter.Error != nil {
|
||||
return nil
|
||||
}
|
||||
combined := utf16.DecodeRune(r, r2)
|
||||
if combined == '\uFFFD' {
|
||||
str = appendRune(str, r)
|
||||
str = appendRune(str, r2)
|
||||
} else {
|
||||
str = appendRune(str, combined)
|
||||
}
|
||||
} else {
|
||||
str = appendRune(str, r)
|
||||
}
|
||||
case '"':
|
||||
str = append(str, '"')
|
||||
case '\\':
|
||||
str = append(str, '\\')
|
||||
case '/':
|
||||
str = append(str, '/')
|
||||
case 'b':
|
||||
str = append(str, '\b')
|
||||
case 'f':
|
||||
str = append(str, '\f')
|
||||
case 'n':
|
||||
str = append(str, '\n')
|
||||
case 'r':
|
||||
str = append(str, '\r')
|
||||
case 't':
|
||||
str = append(str, '\t')
|
||||
default:
|
||||
iter.ReportError("ReadString",
|
||||
`invalid escape char after \`)
|
||||
return nil
|
||||
}
|
||||
return str
|
||||
}
|
||||
|
||||
// ReadStringAsSlice read string from iterator without copying into string form.
|
||||
// The []byte can not be kept, as it will change after next iterator call.
|
||||
func (iter *Iterator) ReadStringAsSlice() (ret []byte) {
|
||||
|
@ -19,6 +19,8 @@ func Test_read_string(t *testing.T) {
|
||||
`"\"`,
|
||||
`"\\\"`,
|
||||
"\"\n\"",
|
||||
`"\U0001f64f"`,
|
||||
`"\uD83D\u00"`,
|
||||
}
|
||||
for i := 0; i < 32; i++ {
|
||||
// control characters are invalid
|
||||
@ -39,6 +41,11 @@ func Test_read_string(t *testing.T) {
|
||||
{`"a"`, "a"},
|
||||
{`null`, ""},
|
||||
{`"Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"`, "Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"},
|
||||
{`"\uD83D"`, string([]byte{239, 191, 189})},
|
||||
{`"\uD83D\\"`, string([]byte{239, 191, 189, '\\'})},
|
||||
{`"\uD83D\ub000"`, string([]byte{239, 191, 189, 235, 128, 128})},
|
||||
{`"\uD83D\ude04"`, "😄"},
|
||||
{`"\uDEADBEEF"`, string([]byte{239, 191, 189, 66, 69, 69, 70})},
|
||||
}
|
||||
|
||||
for _, tc := range goodInputs {
|
||||
@ -111,7 +118,9 @@ func Test_read_exotic_string(t *testing.T) {
|
||||
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
|
||||
should := require.New(t)
|
||||
iter := ParseString(ConfigDefault, input)
|
||||
should.Equal(output, iter.ReadString())
|
||||
var v string
|
||||
should.Nil(json.Unmarshal([]byte(input), &v))
|
||||
should.Equal(v, iter.ReadString())
|
||||
})
|
||||
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
|
||||
should := require.New(t)
|
||||
|
Reference in New Issue
Block a user