1
0
mirror of https://github.com/json-iterator/go.git synced 2025-06-15 22:50:24 +02:00

2 Commits

Author SHA1 Message Date
ebed7df895 fix unicode and escape 2017-06-22 16:00:47 +08:00
caaa04195e #69 fix unicode support 2017-06-21 18:25:37 +08:00
3 changed files with 109 additions and 39 deletions

View File

@ -49,6 +49,7 @@ var ConfigCompatibleWithStandardLibrary = Config{
}.Froze() }.Froze()
var ConfigFastest = Config{ var ConfigFastest = Config{
EscapeHtml: false,
MarshalFloatWith6Digits: true, MarshalFloatWith6Digits: true,
}.Froze() }.Froze()

View File

@ -232,7 +232,7 @@ func (stream *Stream) WriteStringWithHtmlEscaped(s string) {
i := 0 i := 0
for ; i < toWriteLen; i++ { for ; i < toWriteLen; i++ {
c := s[i] c := s[i]
if c <= utf8.RuneSelf && htmlSafeSet[c] { if c < utf8.RuneSelf && htmlSafeSet[c] {
stream.buf[n] = c stream.buf[n] = c
n++ n++
} else { } else {
@ -246,47 +246,15 @@ func (stream *Stream) WriteStringWithHtmlEscaped(s string) {
return return
} }
stream.n = n stream.n = n
writeStringSlowPath(stream, htmlSafeSet, i, s, valLen) writeStringSlowPathWithHtmlEscaped(stream, i, s, valLen)
} }
func (stream *Stream) WriteString(s string) { func writeStringSlowPathWithHtmlEscaped(stream *Stream, i int, s string, valLen int) {
stream.ensure(32)
valLen := len(s)
toWriteLen := valLen
bufLengthMinusTwo := len(stream.buf) - 2 // make room for the quotes
if stream.n+toWriteLen > bufLengthMinusTwo {
toWriteLen = bufLengthMinusTwo - stream.n
}
n := stream.n
stream.buf[n] = '"'
n++
// write string, the fast path, without utf8 and escape support
i := 0
for ; i < toWriteLen; i++ {
c := s[i]
if c > 31 && c != '"' && c != '\\' {
stream.buf[n] = c
n++
} else {
break
}
}
if i == valLen {
stream.buf[n] = '"'
n++
stream.n = n
return
}
stream.n = n
writeStringSlowPath(stream, safeSet, i, s, valLen)
}
func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s string, valLen int) {
start := i start := i
// for the remaining parts, we process them char by char // for the remaining parts, we process them char by char
for ; i < valLen; i++ { for ; i < valLen; {
if b := s[i]; b < utf8.RuneSelf { if b := s[i]; b < utf8.RuneSelf {
if safeSet[b] { if htmlSafeSet[b] {
i++ i++
continue continue
} }
@ -320,8 +288,6 @@ func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s s
if start < i { if start < i {
stream.WriteRaw(s[start:i]) stream.WriteRaw(s[start:i])
} }
stream.WriteRaw(`\ufffd`)
i += size
start = i start = i
continue continue
} }
@ -349,3 +315,78 @@ func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s s
} }
stream.writeByte('"') stream.writeByte('"')
} }
func (stream *Stream) WriteString(s string) {
stream.ensure(32)
valLen := len(s)
toWriteLen := valLen
bufLengthMinusTwo := len(stream.buf) - 2 // make room for the quotes
if stream.n+toWriteLen > bufLengthMinusTwo {
toWriteLen = bufLengthMinusTwo - stream.n
}
n := stream.n
stream.buf[n] = '"'
n++
// write string, the fast path, without utf8 and escape support
i := 0
for ; i < toWriteLen; i++ {
c := s[i]
if c > 31 && c != '"' && c != '\\' {
stream.buf[n] = c
n++
} else {
break
}
}
if i == valLen {
stream.buf[n] = '"'
n++
stream.n = n
return
}
stream.n = n
writeStringSlowPath(stream, i, s, valLen)
}
func writeStringSlowPath(stream *Stream, i int, s string, valLen int) {
start := i
// for the remaining parts, we process them char by char
for ; i < valLen; {
if b := s[i]; b < utf8.RuneSelf {
if safeSet[b] {
i++
continue
}
if start < i {
stream.WriteRaw(s[start:i])
}
switch b {
case '\\', '"':
stream.writeTwoBytes('\\', b)
case '\n':
stream.writeTwoBytes('\\', 'n')
case '\r':
stream.writeTwoBytes('\\', 'r')
case '\t':
stream.writeTwoBytes('\\', 't')
default:
// This encodes bytes < 0x20 except for \t, \n and \r.
// If escapeHTML is set, it also escapes <, >, and &
// because they can lead to security holes when
// user-controlled strings are rendered into JSON
// and served to some browsers.
stream.WriteRaw(`\u00`)
stream.writeTwoBytes(hex[b>>4], hex[b&0xF])
}
i++
start = i
continue
}
i++
continue
}
if start < len(s) {
stream.WriteRaw(s[start:])
}
stream.writeByte('"')
}

View File

@ -136,6 +136,34 @@ func Test_string_encode_with_std_without_html_escape(t *testing.T) {
} }
} }
func Test_unicode(t *testing.T) {
should := require.New(t)
output , _ := MarshalToString(map[string]interface{}{"a": "数字山谷"})
should.Equal(`{"a":"数字山谷"}`, output)
output , _ = Config{EscapeHtml: false}.Froze().MarshalToString(map[string]interface{}{"a": "数字山谷"})
should.Equal(`{"a":"数字山谷"}`, output)
}
func Test_unicode_and_escape(t *testing.T) {
should := require.New(t)
output , err := MarshalToString(`"数字山谷"`)
should.Nil(err)
should.Equal(`"\"数字山谷\""`, output)
output , err = ConfigFastest.MarshalToString(`"数字山谷"`)
should.Nil(err)
should.Equal(`"\"数字山谷\""`, output)
}
func Test_unsafe_unicode(t *testing.T) {
should := require.New(t)
output , err := MarshalToString("he\u2029\u2028he")
should.Nil(err)
should.Equal(`"he\u2029\u2028he"`, output)
output , err = ConfigFastest.MarshalToString("he\u2029\u2028he")
should.Nil(err)
should.Equal("\"he\u2029\u2028he\"", output)
}
func Benchmark_jsoniter_unicode(b *testing.B) { func Benchmark_jsoniter_unicode(b *testing.B) {
for n := 0; n < b.N; n++ { for n := 0; n < b.N; n++ {
iter := ParseString(ConfigDefault, `"\ud83d\udc4a"`) iter := ParseString(ConfigDefault, `"\ud83d\udc4a"`)