mirror of
https://github.com/json-iterator/go.git
synced 2025-03-23 21:09:11 +02:00
277 lines
5.2 KiB
Go
277 lines
5.2 KiB
Go
package jsoniter
|
|
|
|
import (
|
|
"io"
|
|
"errors"
|
|
"fmt"
|
|
"unicode/utf16"
|
|
)
|
|
|
|
type Lexer struct {
|
|
reader io.Reader
|
|
buf []byte
|
|
head int
|
|
tail int
|
|
}
|
|
|
|
func NewLexer(reader io.Reader, bufSize int) *Lexer {
|
|
return &Lexer{
|
|
reader: reader,
|
|
buf: make([]byte, bufSize),
|
|
head: 0,
|
|
tail: 0,
|
|
}
|
|
}
|
|
|
|
func NewLexerWithArray(input []byte) *Lexer {
|
|
return &Lexer{
|
|
reader: nil,
|
|
buf: input,
|
|
head: 0,
|
|
tail: len(input),
|
|
}
|
|
}
|
|
|
|
func (lexer *Lexer) readByte() (byte, error) {
|
|
if lexer.head == lexer.tail {
|
|
if lexer.reader == nil {
|
|
return 0, io.EOF
|
|
}
|
|
n, err := lexer.reader.Read(lexer.buf)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if n == 0 {
|
|
return 0, io.EOF
|
|
}
|
|
lexer.head = 0
|
|
lexer.tail = n
|
|
}
|
|
b := lexer.buf[lexer.head]
|
|
lexer.head += 1
|
|
return b, nil
|
|
}
|
|
|
|
func (lexer *Lexer) unreadByte() error {
|
|
if lexer.head == 0 {
|
|
return errors.New("unread too many bytes")
|
|
}
|
|
lexer.head -= 1
|
|
return nil
|
|
}
|
|
|
|
const maxUint64 = (1 << 64 - 1)
|
|
const cutoffUint64 = maxUint64 / 10 + 1
|
|
const maxUint32 = (1 << 32 - 1)
|
|
const cutoffUint32 = maxUint32 / 10 + 1
|
|
|
|
func (lexer *Lexer) LexUin64() (uint64, error) {
|
|
var n uint64
|
|
c, err := lexer.readByte()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
/* a single zero, or a series of integers */
|
|
if c == '0' {
|
|
c, err = lexer.readByte()
|
|
if err != nil && err != io.EOF {
|
|
return 0, err
|
|
}
|
|
} else if c >= '1' && c <= '9' {
|
|
for c >= '0' && c <= '9' {
|
|
var v byte
|
|
v = c - '0'
|
|
if n >= cutoffUint64 {
|
|
return 0, errors.New("overflow")
|
|
}
|
|
n = n * uint64(10) + uint64(v)
|
|
c, err = lexer.readByte()
|
|
if err != nil && err != io.EOF {
|
|
return 0, err
|
|
}
|
|
}
|
|
lexer.unreadByte()
|
|
} else {
|
|
lexer.unreadByte()
|
|
return 0, errors.New("unexpected")
|
|
}
|
|
return n, nil
|
|
}
|
|
|
|
func (lexer *Lexer) LexInt64() (int64, error) {
|
|
c, err := lexer.readByte()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
/* optional leading minus */
|
|
if c == '-' {
|
|
n, err := lexer.LexUin64()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return -int64(n), nil
|
|
} else {
|
|
lexer.unreadByte()
|
|
n, err := lexer.LexUin64()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return int64(n), nil
|
|
}
|
|
}
|
|
|
|
func (lexer *Lexer) LexString() (string, error) {
|
|
str := make([]byte, 0, 10)
|
|
c, err := lexer.readByte()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if c != '"' {
|
|
return "", errors.New("unexpected")
|
|
}
|
|
for {
|
|
c, err = lexer.readByte()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
switch c {
|
|
case '\\':
|
|
c, err = lexer.readByte()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
switch c {
|
|
case 'u':
|
|
r, err := lexer.readU4()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if utf16.IsSurrogate(r) {
|
|
c, err = lexer.readByte()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if c != '\\' {
|
|
return "", fmt.Errorf("unexpected: %v", c)
|
|
}
|
|
c, err = lexer.readByte()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if c != 'u' {
|
|
return "", fmt.Errorf("unexpected: %v", c)
|
|
}
|
|
r2, err := lexer.readU4()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
combined := utf16.DecodeRune(r, r2)
|
|
str = appendRune(str, combined)
|
|
} else {
|
|
str = appendRune(str, r)
|
|
}
|
|
case '"':
|
|
str = append(str, '"')
|
|
case '\\':
|
|
str = append(str, '\\')
|
|
case '/':
|
|
str = append(str, '/')
|
|
case 'b':
|
|
str = append(str, '\b')
|
|
case 'f':
|
|
str = append(str, '\f')
|
|
case 'n':
|
|
str = append(str, '\n')
|
|
case 'r':
|
|
str = append(str, '\r')
|
|
case 't':
|
|
str = append(str, '\t')
|
|
default:
|
|
return "", errors.New("unexpected")
|
|
}
|
|
case '"':
|
|
return string(str), nil
|
|
default:
|
|
str = append(str, c)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (lexer *Lexer) readU4() (rune, error) {
|
|
var u4 rune
|
|
for i := 0; i < 4; i++ {
|
|
c, err := lexer.readByte()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if (c >= '0' && c <= '9') {
|
|
if u4 >= cutoffUint32 {
|
|
return 0, errors.New("overflow")
|
|
}
|
|
u4 = u4 * 16 + rune(c - '0')
|
|
} else if ((c >= 'a' && c <= 'f') ) {
|
|
if u4 >= cutoffUint32 {
|
|
return 0, errors.New("overflow")
|
|
}
|
|
u4 = u4 * 16 + rune(c - 'a' + 10)
|
|
} else {
|
|
return 0, fmt.Errorf("unexpected: %v", c)
|
|
}
|
|
}
|
|
return u4, nil
|
|
}
|
|
|
|
const (
|
|
t1 = 0x00 // 0000 0000
|
|
tx = 0x80 // 1000 0000
|
|
t2 = 0xC0 // 1100 0000
|
|
t3 = 0xE0 // 1110 0000
|
|
t4 = 0xF0 // 1111 0000
|
|
t5 = 0xF8 // 1111 1000
|
|
|
|
maskx = 0x3F // 0011 1111
|
|
mask2 = 0x1F // 0001 1111
|
|
mask3 = 0x0F // 0000 1111
|
|
mask4 = 0x07 // 0000 0111
|
|
|
|
rune1Max = 1 << 7 - 1
|
|
rune2Max = 1 << 11 - 1
|
|
rune3Max = 1 << 16 - 1
|
|
|
|
surrogateMin = 0xD800
|
|
surrogateMax = 0xDFFF
|
|
|
|
MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
|
|
RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
|
|
)
|
|
|
|
func appendRune(p []byte, r rune) []byte {
|
|
// Negative values are erroneous. Making it unsigned addresses the problem.
|
|
switch i := uint32(r); {
|
|
case i <= rune1Max:
|
|
p = append(p, byte(r))
|
|
return p
|
|
case i <= rune2Max:
|
|
p = append(p, t2 | byte(r >> 6))
|
|
p = append(p, tx | byte(r) & maskx)
|
|
return p
|
|
case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
|
|
r = RuneError
|
|
fallthrough
|
|
case i <= rune3Max:
|
|
p = append(p, t3 | byte(r >> 12))
|
|
p = append(p, tx | byte(r >> 6) & maskx)
|
|
p = append(p, tx | byte(r) & maskx)
|
|
return p
|
|
default:
|
|
p = append(p, t4 | byte(r >> 18))
|
|
p = append(p, tx | byte(r >> 12) & maskx)
|
|
p = append(p, tx | byte(r >> 6) & maskx)
|
|
p = append(p, tx | byte(r) & maskx)
|
|
return p
|
|
}
|
|
}
|
|
|