1
0
mirror of https://github.com/json-iterator/go.git synced 2025-03-23 21:09:11 +02:00

#69 fix unicode support

This commit is contained in:
Tao Wen 2017-06-21 18:25:37 +08:00
parent ff3c624fa9
commit caaa04195e
3 changed files with 90 additions and 37 deletions

View File

@ -49,6 +49,7 @@ var ConfigCompatibleWithStandardLibrary = Config{
}.Froze()
var ConfigFastest = Config{
EscapeHtml: false,
MarshalFloatWith6Digits: true,
}.Froze()

View File

@ -246,47 +246,15 @@ func (stream *Stream) WriteStringWithHtmlEscaped(s string) {
return
}
stream.n = n
writeStringSlowPath(stream, htmlSafeSet, i, s, valLen)
writeStringSlowPathWithHtmlEscaped(stream, i, s, valLen)
}
func (stream *Stream) WriteString(s string) {
stream.ensure(32)
valLen := len(s)
toWriteLen := valLen
bufLengthMinusTwo := len(stream.buf) - 2 // make room for the quotes
if stream.n+toWriteLen > bufLengthMinusTwo {
toWriteLen = bufLengthMinusTwo - stream.n
}
n := stream.n
stream.buf[n] = '"'
n++
// write string, the fast path, without utf8 and escape support
i := 0
for ; i < toWriteLen; i++ {
c := s[i]
if c > 31 && c != '"' && c != '\\' {
stream.buf[n] = c
n++
} else {
break
}
}
if i == valLen {
stream.buf[n] = '"'
n++
stream.n = n
return
}
stream.n = n
writeStringSlowPath(stream, safeSet, i, s, valLen)
}
func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s string, valLen int) {
func writeStringSlowPathWithHtmlEscaped(stream *Stream, i int, s string, valLen int) {
start := i
// for the remaining parts, we process them char by char
for ; i < valLen; i++ {
if b := s[i]; b < utf8.RuneSelf {
if safeSet[b] {
if htmlSafeSet[b] {
i++
continue
}
@ -320,8 +288,6 @@ func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s s
if start < i {
stream.WriteRaw(s[start:i])
}
stream.WriteRaw(`\ufffd`)
i += size
start = i
continue
}
@ -349,3 +315,81 @@ func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s s
}
stream.writeByte('"')
}
func (stream *Stream) WriteString(s string) {
stream.ensure(32)
valLen := len(s)
toWriteLen := valLen
bufLengthMinusTwo := len(stream.buf) - 2 // make room for the quotes
if stream.n+toWriteLen > bufLengthMinusTwo {
toWriteLen = bufLengthMinusTwo - stream.n
}
n := stream.n
stream.buf[n] = '"'
n++
// write string, the fast path, without utf8 and escape support
i := 0
for ; i < toWriteLen; i++ {
c := s[i]
if c > 31 && c != '"' && c != '\\' {
stream.buf[n] = c
n++
} else {
break
}
}
if i == valLen {
stream.buf[n] = '"'
n++
stream.n = n
return
}
stream.n = n
writeStringSlowPath(stream, i, s, valLen)
}
func writeStringSlowPath(stream *Stream, i int, s string, valLen int) {
start := i
// for the remaining parts, we process them char by char
for ; i < valLen; i++ {
if b := s[i]; b < utf8.RuneSelf {
if safeSet[b] {
i++
continue
}
if start < i {
stream.WriteRaw(s[start:i])
}
switch b {
case '\\', '"':
stream.writeTwoBytes('\\', b)
case '\n':
stream.writeTwoBytes('\\', 'n')
case '\r':
stream.writeTwoBytes('\\', 'r')
case '\t':
stream.writeTwoBytes('\\', 't')
default:
// This encodes bytes < 0x20 except for \t, \n and \r.
// If escapeHTML is set, it also escapes <, >, and &
// because they can lead to security holes when
// user-controlled strings are rendered into JSON
// and served to some browsers.
stream.WriteRaw(`\u00`)
stream.writeTwoBytes(hex[b>>4], hex[b&0xF])
}
i++
start = i
continue
}
if start < i {
stream.WriteRaw(s[start:i])
}
start = i
continue
}
if start < len(s) {
stream.WriteRaw(s[start:])
}
stream.writeByte('"')
}

View File

@ -136,6 +136,14 @@ func Test_string_encode_with_std_without_html_escape(t *testing.T) {
}
}
func Test_unicode(t *testing.T) {
should := require.New(t)
output , _ := MarshalToString(map[string]interface{}{"a": "数字山谷"})
should.Equal(`{"a":"数字山谷"}`, output)
output , _ = Config{EscapeHtml: false}.Froze().MarshalToString(map[string]interface{}{"a": "数字山谷"})
should.Equal(`{"a":"数字山谷"}`, output)
}
func Benchmark_jsoniter_unicode(b *testing.B) {
for n := 0; n < b.N; n++ {
iter := ParseString(ConfigDefault, `"\ud83d\udc4a"`)