You've already forked json-iterator
							
							
				mirror of
				https://github.com/json-iterator/go.git
				synced 2025-10-31 00:07:40 +02:00 
			
		
		
		
	#137 fix unicode surrogate incompatibility
This commit is contained in:
		| @@ -42,58 +42,7 @@ func (iter *Iterator) readStringSlowPath() (ret string) { | ||||
| 		} | ||||
| 		if c == '\\' { | ||||
| 			c = iter.readByte() | ||||
| 			switch c { | ||||
| 			case 'u', 'U': | ||||
| 				r := iter.readU4() | ||||
| 				if utf16.IsSurrogate(r) { | ||||
| 					c = iter.readByte() | ||||
| 					if iter.Error != nil { | ||||
| 						return | ||||
| 					} | ||||
| 					if c != '\\' { | ||||
| 						iter.ReportError("ReadString", | ||||
| 							`expects \u after utf16 surrogate, but \ not found`) | ||||
| 						return | ||||
| 					} | ||||
| 					c = iter.readByte() | ||||
| 					if iter.Error != nil { | ||||
| 						return | ||||
| 					} | ||||
| 					if c != 'u' && c != 'U' { | ||||
| 						iter.ReportError("ReadString", | ||||
| 							`expects \u after utf16 surrogate, but \u not found`) | ||||
| 						return | ||||
| 					} | ||||
| 					r2 := iter.readU4() | ||||
| 					if iter.Error != nil { | ||||
| 						return | ||||
| 					} | ||||
| 					combined := utf16.DecodeRune(r, r2) | ||||
| 					str = appendRune(str, combined) | ||||
| 				} else { | ||||
| 					str = appendRune(str, r) | ||||
| 				} | ||||
| 			case '"': | ||||
| 				str = append(str, '"') | ||||
| 			case '\\': | ||||
| 				str = append(str, '\\') | ||||
| 			case '/': | ||||
| 				str = append(str, '/') | ||||
| 			case 'b': | ||||
| 				str = append(str, '\b') | ||||
| 			case 'f': | ||||
| 				str = append(str, '\f') | ||||
| 			case 'n': | ||||
| 				str = append(str, '\n') | ||||
| 			case 'r': | ||||
| 				str = append(str, '\r') | ||||
| 			case 't': | ||||
| 				str = append(str, '\t') | ||||
| 			default: | ||||
| 				iter.ReportError("ReadString", | ||||
| 					`invalid escape char after \`) | ||||
| 				return | ||||
| 			} | ||||
| 			str = iter.readEscapedChar(c, str) | ||||
| 		} else { | ||||
| 			str = append(str, c) | ||||
| 		} | ||||
| @@ -102,6 +51,66 @@ func (iter *Iterator) readStringSlowPath() (ret string) { | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte { | ||||
| 	switch c { | ||||
| 	case 'u': | ||||
| 		r := iter.readU4() | ||||
| 		if utf16.IsSurrogate(r) { | ||||
| 			c = iter.readByte() | ||||
| 			if iter.Error != nil { | ||||
| 				return nil | ||||
| 			} | ||||
| 			if c != '\\' { | ||||
| 				iter.unreadByte() | ||||
| 				str = appendRune(str, r) | ||||
| 				return str | ||||
| 			} | ||||
| 			c = iter.readByte() | ||||
| 			if iter.Error != nil { | ||||
| 				return nil | ||||
| 			} | ||||
| 			if c != 'u' { | ||||
| 				str = appendRune(str, r) | ||||
| 				return iter.readEscapedChar(c, str) | ||||
| 			} | ||||
| 			r2 := iter.readU4() | ||||
| 			if iter.Error != nil { | ||||
| 				return nil | ||||
| 			} | ||||
| 			combined := utf16.DecodeRune(r, r2) | ||||
| 			if combined == '\uFFFD' { | ||||
| 				str = appendRune(str, r) | ||||
| 				str = appendRune(str, r2) | ||||
| 			} else { | ||||
| 				str = appendRune(str, combined) | ||||
| 			} | ||||
| 		} else { | ||||
| 			str = appendRune(str, r) | ||||
| 		} | ||||
| 	case '"': | ||||
| 		str = append(str, '"') | ||||
| 	case '\\': | ||||
| 		str = append(str, '\\') | ||||
| 	case '/': | ||||
| 		str = append(str, '/') | ||||
| 	case 'b': | ||||
| 		str = append(str, '\b') | ||||
| 	case 'f': | ||||
| 		str = append(str, '\f') | ||||
| 	case 'n': | ||||
| 		str = append(str, '\n') | ||||
| 	case 'r': | ||||
| 		str = append(str, '\r') | ||||
| 	case 't': | ||||
| 		str = append(str, '\t') | ||||
| 	default: | ||||
| 		iter.ReportError("ReadString", | ||||
| 			`invalid escape char after \`) | ||||
| 		return nil | ||||
| 	} | ||||
| 	return str | ||||
| } | ||||
|  | ||||
| // ReadStringAsSlice read string from iterator without copying into string form. | ||||
| // The []byte can not be kept, as it will change after next iterator call. | ||||
| func (iter *Iterator) ReadStringAsSlice() (ret []byte) { | ||||
|   | ||||
| @@ -19,6 +19,8 @@ func Test_read_string(t *testing.T) { | ||||
| 		`"\"`, | ||||
| 		`"\\\"`, | ||||
| 		"\"\n\"", | ||||
| 		`"\U0001f64f"`, | ||||
| 		`"\uD83D\u00"`, | ||||
| 	} | ||||
| 	for i := 0; i < 32; i++ { | ||||
| 		// control characters are invalid | ||||
| @@ -39,6 +41,11 @@ func Test_read_string(t *testing.T) { | ||||
| 		{`"a"`, "a"}, | ||||
| 		{`null`, ""}, | ||||
| 		{`"Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"`, "Iñtërnâtiônàlizætiøn,💝🐹🌇⛔"}, | ||||
| 		{`"\uD83D"`, string([]byte{239, 191, 189})}, | ||||
| 		{`"\uD83D\\"`, string([]byte{239, 191, 189, '\\'})}, | ||||
| 		{`"\uD83D\ub000"`, string([]byte{239, 191, 189, 235, 128, 128})}, | ||||
| 		{`"\uD83D\ude04"`, "😄"}, | ||||
| 		{`"\uDEADBEEF"`, string([]byte{239, 191, 189, 66, 69, 69, 70})}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tc := range goodInputs { | ||||
| @@ -111,7 +118,9 @@ func Test_read_exotic_string(t *testing.T) { | ||||
| 		t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) { | ||||
| 			should := require.New(t) | ||||
| 			iter := ParseString(ConfigDefault, input) | ||||
| 			should.Equal(output, iter.ReadString()) | ||||
| 			var v string | ||||
| 			should.Nil(json.Unmarshal([]byte(input), &v)) | ||||
| 			should.Equal(v, iter.ReadString()) | ||||
| 		}) | ||||
| 		t.Run(fmt.Sprintf("%v:%v", input, output), func(t *testing.T) { | ||||
| 			should := require.New(t) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user