1
0
mirror of https://github.com/json-iterator/go.git synced 2025-07-15 23:54:21 +02:00

#137 fix unicode surrogate incompatibility

This commit is contained in:
Tao Wen
2017-07-18 23:17:52 +08:00
parent 6b6938829d
commit 156284b028
2 changed files with 71 additions and 53 deletions

View File

@ -42,34 +42,48 @@ func (iter *Iterator) readStringSlowPath() (ret string) {
}
if c == '\\' {
c = iter.readByte()
str = iter.readEscapedChar(c, str)
} else {
str = append(str, c)
}
}
iter.ReportError("ReadString", "unexpected end of input")
return
}
func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
switch c {
case 'u', 'U':
case 'u':
r := iter.readU4()
if utf16.IsSurrogate(r) {
c = iter.readByte()
if iter.Error != nil {
return
return nil
}
if c != '\\' {
iter.ReportError("ReadString",
`expects \u after utf16 surrogate, but \ not found`)
return
iter.unreadByte()
str = appendRune(str, r)
return str
}
c = iter.readByte()
if iter.Error != nil {
return
return nil
}
if c != 'u' && c != 'U' {
iter.ReportError("ReadString",
`expects \u after utf16 surrogate, but \u not found`)
return
if c != 'u' {
str = appendRune(str, r)
return iter.readEscapedChar(c, str)
}
r2 := iter.readU4()
if iter.Error != nil {
return
return nil
}
combined := utf16.DecodeRune(r, r2)
if combined == '\uFFFD' {
str = appendRune(str, r)
str = appendRune(str, r2)
} else {
str = appendRune(str, combined)
}
} else {
str = appendRune(str, r)
}
@ -92,14 +106,9 @@ func (iter *Iterator) readStringSlowPath() (ret string) {
default:
iter.ReportError("ReadString",
`invalid escape char after \`)
return
return nil
}
} else {
str = append(str, c)
}
}
iter.ReportError("ReadString", "unexpected end of input")
return
return str
}
// ReadStringAsSlice read string from iterator without copying into string form.

View File

@ -19,6 +19,8 @@ func Test_read_string(t *testing.T) {
`"\"`,
`"\\\"`,
"\"\n\"",
`"\U0001f64f"`,
`"\uD83D\u00"`,
}
for i := 0; i < 32; i++ {
// control characters are invalid
@ -39,6 +41,11 @@ func Test_read_string(t *testing.T) {
{`"a"`, "a"},
{`null`, ""},
{`"Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"`, "Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"},
{`"\uD83D"`, string([]byte{239, 191, 189})},
{`"\uD83D\\"`, string([]byte{239, 191, 189, '\\'})},
{`"\uD83D\ub000"`, string([]byte{239, 191, 189, 235, 128, 128})},
{`"\uD83D\ude04"`, "😄"},
{`"\uDEADBEEF"`, string([]byte{239, 191, 189, 66, 69, 69, 70})},
}
for _, tc := range goodInputs {
@ -111,7 +118,9 @@ func Test_read_exotic_string(t *testing.T) {
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
should := require.New(t)
iter := ParseString(ConfigDefault, input)
should.Equal(output, iter.ReadString())
var v string
should.Nil(json.Unmarshal([]byte(input), &v))
should.Equal(v, iter.ReadString())
})
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
should := require.New(t)