From caaa04195e7fe0d7ebdfdf60614060e88453e864 Mon Sep 17 00:00:00 2001 From: Tao Wen Date: Wed, 21 Jun 2017 18:25:37 +0800 Subject: [PATCH] #69 fix unicode support --- feature_config.go | 1 + feature_stream_string.go | 118 +++++++++++++++++++++++++++------------ jsoniter_string_test.go | 8 +++ 3 files changed, 90 insertions(+), 37 deletions(-) diff --git a/feature_config.go b/feature_config.go index 270a290..217c956 100644 --- a/feature_config.go +++ b/feature_config.go @@ -49,6 +49,7 @@ var ConfigCompatibleWithStandardLibrary = Config{ }.Froze() var ConfigFastest = Config{ + EscapeHtml: false, MarshalFloatWith6Digits: true, }.Froze() diff --git a/feature_stream_string.go b/feature_stream_string.go index ffce47b..0e052ae 100644 --- a/feature_stream_string.go +++ b/feature_stream_string.go @@ -246,47 +246,15 @@ func (stream *Stream) WriteStringWithHtmlEscaped(s string) { return } stream.n = n - writeStringSlowPath(stream, htmlSafeSet, i, s, valLen) + writeStringSlowPathWithHtmlEscaped(stream, i, s, valLen) } -func (stream *Stream) WriteString(s string) { - stream.ensure(32) - valLen := len(s) - toWriteLen := valLen - bufLengthMinusTwo := len(stream.buf) - 2 // make room for the quotes - if stream.n+toWriteLen > bufLengthMinusTwo { - toWriteLen = bufLengthMinusTwo - stream.n - } - n := stream.n - stream.buf[n] = '"' - n++ - // write string, the fast path, without utf8 and escape support - i := 0 - for ; i < toWriteLen; i++ { - c := s[i] - if c > 31 && c != '"' && c != '\\' { - stream.buf[n] = c - n++ - } else { - break - } - } - if i == valLen { - stream.buf[n] = '"' - n++ - stream.n = n - return - } - stream.n = n - writeStringSlowPath(stream, safeSet, i, s, valLen) -} - -func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s string, valLen int) { +func writeStringSlowPathWithHtmlEscaped(stream *Stream, i int, s string, valLen int) { start := i // for the remaining parts, we process them char by char for ; i < valLen; i++ { if b := s[i]; b < utf8.RuneSelf { - if safeSet[b] { + if htmlSafeSet[b] { i++ continue } @@ -320,8 +288,6 @@ func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s s if start < i { stream.WriteRaw(s[start:i]) } - stream.WriteRaw(`\ufffd`) - i += size start = i continue } @@ -349,3 +315,81 @@ func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s s } stream.writeByte('"') } + +func (stream *Stream) WriteString(s string) { + stream.ensure(32) + valLen := len(s) + toWriteLen := valLen + bufLengthMinusTwo := len(stream.buf) - 2 // make room for the quotes + if stream.n+toWriteLen > bufLengthMinusTwo { + toWriteLen = bufLengthMinusTwo - stream.n + } + n := stream.n + stream.buf[n] = '"' + n++ + // write string, the fast path, without utf8 and escape support + i := 0 + for ; i < toWriteLen; i++ { + c := s[i] + if c > 31 && c != '"' && c != '\\' { + stream.buf[n] = c + n++ + } else { + break + } + } + if i == valLen { + stream.buf[n] = '"' + n++ + stream.n = n + return + } + stream.n = n + writeStringSlowPath(stream, i, s, valLen) +} + +func writeStringSlowPath(stream *Stream, i int, s string, valLen int) { + start := i + // for the remaining parts, we process them char by char + for ; i < valLen; i++ { + if b := s[i]; b < utf8.RuneSelf { + if safeSet[b] { + i++ + continue + } + if start < i { + stream.WriteRaw(s[start:i]) + } + switch b { + case '\\', '"': + stream.writeTwoBytes('\\', b) + case '\n': + stream.writeTwoBytes('\\', 'n') + case '\r': + stream.writeTwoBytes('\\', 'r') + case '\t': + stream.writeTwoBytes('\\', 't') + default: + // This encodes bytes < 0x20 except for \t, \n and \r. + // If escapeHTML is set, it also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. + stream.WriteRaw(`\u00`) + stream.writeTwoBytes(hex[b>>4], hex[b&0xF]) + } + i++ + start = i + continue + } + if start < i { + stream.WriteRaw(s[start:i]) + } + start = i + continue + } + if start < len(s) { + stream.WriteRaw(s[start:]) + } + stream.writeByte('"') +} diff --git a/jsoniter_string_test.go b/jsoniter_string_test.go index 523fa06..22ae441 100644 --- a/jsoniter_string_test.go +++ b/jsoniter_string_test.go @@ -136,6 +136,14 @@ func Test_string_encode_with_std_without_html_escape(t *testing.T) { } } +func Test_unicode(t *testing.T) { + should := require.New(t) + output , _ := MarshalToString(map[string]interface{}{"a": "数字山谷"}) + should.Equal(`{"a":"数字山谷"}`, output) + output , _ = Config{EscapeHtml: false}.Froze().MarshalToString(map[string]interface{}{"a": "数字山谷"}) + should.Equal(`{"a":"数字山谷"}`, output) +} + func Benchmark_jsoniter_unicode(b *testing.B) { for n := 0; n < b.N; n++ { iter := ParseString(ConfigDefault, `"\ud83d\udc4a"`)