1
0
mirror of https://github.com/json-iterator/go.git synced 2025-07-15 23:54:21 +02:00

#137 fix unicode surrogate incompatibility

This commit is contained in:
Tao Wen
2017-07-18 23:17:52 +08:00
parent 6b6938829d
commit 156284b028
2 changed files with 71 additions and 53 deletions

View File

@ -42,58 +42,7 @@ func (iter *Iterator) readStringSlowPath() (ret string) {
}
if c == '\\' {
c = iter.readByte()
switch c {
case 'u', 'U':
r := iter.readU4()
if utf16.IsSurrogate(r) {
c = iter.readByte()
if iter.Error != nil {
return
}
if c != '\\' {
iter.ReportError("ReadString",
`expects \u after utf16 surrogate, but \ not found`)
return
}
c = iter.readByte()
if iter.Error != nil {
return
}
if c != 'u' && c != 'U' {
iter.ReportError("ReadString",
`expects \u after utf16 surrogate, but \u not found`)
return
}
r2 := iter.readU4()
if iter.Error != nil {
return
}
combined := utf16.DecodeRune(r, r2)
str = appendRune(str, combined)
} else {
str = appendRune(str, r)
}
case '"':
str = append(str, '"')
case '\\':
str = append(str, '\\')
case '/':
str = append(str, '/')
case 'b':
str = append(str, '\b')
case 'f':
str = append(str, '\f')
case 'n':
str = append(str, '\n')
case 'r':
str = append(str, '\r')
case 't':
str = append(str, '\t')
default:
iter.ReportError("ReadString",
`invalid escape char after \`)
return
}
str = iter.readEscapedChar(c, str)
} else {
str = append(str, c)
}
@ -102,6 +51,66 @@ func (iter *Iterator) readStringSlowPath() (ret string) {
return
}
func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
switch c {
case 'u':
r := iter.readU4()
if utf16.IsSurrogate(r) {
c = iter.readByte()
if iter.Error != nil {
return nil
}
if c != '\\' {
iter.unreadByte()
str = appendRune(str, r)
return str
}
c = iter.readByte()
if iter.Error != nil {
return nil
}
if c != 'u' {
str = appendRune(str, r)
return iter.readEscapedChar(c, str)
}
r2 := iter.readU4()
if iter.Error != nil {
return nil
}
combined := utf16.DecodeRune(r, r2)
if combined == '\uFFFD' {
str = appendRune(str, r)
str = appendRune(str, r2)
} else {
str = appendRune(str, combined)
}
} else {
str = appendRune(str, r)
}
case '"':
str = append(str, '"')
case '\\':
str = append(str, '\\')
case '/':
str = append(str, '/')
case 'b':
str = append(str, '\b')
case 'f':
str = append(str, '\f')
case 'n':
str = append(str, '\n')
case 'r':
str = append(str, '\r')
case 't':
str = append(str, '\t')
default:
iter.ReportError("ReadString",
`invalid escape char after \`)
return nil
}
return str
}
// ReadStringAsSlice read string from iterator without copying into string form.
// The []byte can not be kept, as it will change after next iterator call.
func (iter *Iterator) ReadStringAsSlice() (ret []byte) {

View File

@ -19,6 +19,8 @@ func Test_read_string(t *testing.T) {
`"\"`,
`"\\\"`,
"\"\n\"",
`"\U0001f64f"`,
`"\uD83D\u00"`,
}
for i := 0; i < 32; i++ {
// control characters are invalid
@ -39,6 +41,11 @@ func Test_read_string(t *testing.T) {
{`"a"`, "a"},
{`null`, ""},
{`"Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"`, "Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"},
{`"\uD83D"`, string([]byte{239, 191, 189})},
{`"\uD83D\\"`, string([]byte{239, 191, 189, '\\'})},
{`"\uD83D\ub000"`, string([]byte{239, 191, 189, 235, 128, 128})},
{`"\uD83D\ude04"`, "😄"},
{`"\uDEADBEEF"`, string([]byte{239, 191, 189, 66, 69, 69, 70})},
}
for _, tc := range goodInputs {
@ -111,7 +118,9 @@ func Test_read_exotic_string(t *testing.T) {
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
should := require.New(t)
iter := ParseString(ConfigDefault, input)
should.Equal(output, iter.ReadString())
var v string
should.Nil(json.Unmarshal([]byte(input), &v))
should.Equal(v, iter.ReadString())
})
t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) {
should := require.New(t)