gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

decode.go (8820B)


      1 // Copyright 2018 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package json
      6 
      7 import (
      8 	"bytes"
      9 	"fmt"
     10 	"io"
     11 	"regexp"
     12 	"unicode/utf8"
     13 
     14 	"google.golang.org/protobuf/internal/errors"
     15 )
     16 
     17 // call specifies which Decoder method was invoked.
     18 type call uint8
     19 
     20 const (
     21 	readCall call = iota
     22 	peekCall
     23 )
     24 
     25 const unexpectedFmt = "unexpected token %s"
     26 
     27 // ErrUnexpectedEOF means that EOF was encountered in the middle of the input.
     28 var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF)
     29 
     30 // Decoder is a token-based JSON decoder.
     31 type Decoder struct {
     32 	// lastCall is last method called, either readCall or peekCall.
     33 	// Initial value is readCall.
     34 	lastCall call
     35 
     36 	// lastToken contains the last read token.
     37 	lastToken Token
     38 
     39 	// lastErr contains the last read error.
     40 	lastErr error
     41 
     42 	// openStack is a stack containing ObjectOpen and ArrayOpen values. The
     43 	// top of stack represents the object or the array the current value is
     44 	// directly located in.
     45 	openStack []Kind
     46 
     47 	// orig is used in reporting line and column.
     48 	orig []byte
     49 	// in contains the unconsumed input.
     50 	in []byte
     51 }
     52 
     53 // NewDecoder returns a Decoder to read the given []byte.
     54 func NewDecoder(b []byte) *Decoder {
     55 	return &Decoder{orig: b, in: b}
     56 }
     57 
     58 // Peek looks ahead and returns the next token kind without advancing a read.
     59 func (d *Decoder) Peek() (Token, error) {
     60 	defer func() { d.lastCall = peekCall }()
     61 	if d.lastCall == readCall {
     62 		d.lastToken, d.lastErr = d.Read()
     63 	}
     64 	return d.lastToken, d.lastErr
     65 }
     66 
     67 // Read returns the next JSON token.
     68 // It will return an error if there is no valid token.
     69 func (d *Decoder) Read() (Token, error) {
     70 	const scalar = Null | Bool | Number | String
     71 
     72 	defer func() { d.lastCall = readCall }()
     73 	if d.lastCall == peekCall {
     74 		return d.lastToken, d.lastErr
     75 	}
     76 
     77 	tok, err := d.parseNext()
     78 	if err != nil {
     79 		return Token{}, err
     80 	}
     81 
     82 	switch tok.kind {
     83 	case EOF:
     84 		if len(d.openStack) != 0 ||
     85 			d.lastToken.kind&scalar|ObjectClose|ArrayClose == 0 {
     86 			return Token{}, ErrUnexpectedEOF
     87 		}
     88 
     89 	case Null:
     90 		if !d.isValueNext() {
     91 			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
     92 		}
     93 
     94 	case Bool, Number:
     95 		if !d.isValueNext() {
     96 			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
     97 		}
     98 
     99 	case String:
    100 		if d.isValueNext() {
    101 			break
    102 		}
    103 		// This string token should only be for a field name.
    104 		if d.lastToken.kind&(ObjectOpen|comma) == 0 {
    105 			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
    106 		}
    107 		if len(d.in) == 0 {
    108 			return Token{}, ErrUnexpectedEOF
    109 		}
    110 		if c := d.in[0]; c != ':' {
    111 			return Token{}, d.newSyntaxError(d.currPos(), `unexpected character %s, missing ":" after field name`, string(c))
    112 		}
    113 		tok.kind = Name
    114 		d.consume(1)
    115 
    116 	case ObjectOpen, ArrayOpen:
    117 		if !d.isValueNext() {
    118 			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
    119 		}
    120 		d.openStack = append(d.openStack, tok.kind)
    121 
    122 	case ObjectClose:
    123 		if len(d.openStack) == 0 ||
    124 			d.lastToken.kind == comma ||
    125 			d.openStack[len(d.openStack)-1] != ObjectOpen {
    126 			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
    127 		}
    128 		d.openStack = d.openStack[:len(d.openStack)-1]
    129 
    130 	case ArrayClose:
    131 		if len(d.openStack) == 0 ||
    132 			d.lastToken.kind == comma ||
    133 			d.openStack[len(d.openStack)-1] != ArrayOpen {
    134 			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
    135 		}
    136 		d.openStack = d.openStack[:len(d.openStack)-1]
    137 
    138 	case comma:
    139 		if len(d.openStack) == 0 ||
    140 			d.lastToken.kind&(scalar|ObjectClose|ArrayClose) == 0 {
    141 			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
    142 		}
    143 	}
    144 
    145 	// Update d.lastToken only after validating token to be in the right sequence.
    146 	d.lastToken = tok
    147 
    148 	if d.lastToken.kind == comma {
    149 		return d.Read()
    150 	}
    151 	return tok, nil
    152 }
    153 
    154 // Any sequence that looks like a non-delimiter (for error reporting).
    155 var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
    156 
    157 // parseNext parses for the next JSON token. It returns a Token object for
    158 // different types, except for Name. It does not handle whether the next token
    159 // is in a valid sequence or not.
    160 func (d *Decoder) parseNext() (Token, error) {
    161 	// Trim leading spaces.
    162 	d.consume(0)
    163 
    164 	in := d.in
    165 	if len(in) == 0 {
    166 		return d.consumeToken(EOF, 0), nil
    167 	}
    168 
    169 	switch in[0] {
    170 	case 'n':
    171 		if n := matchWithDelim("null", in); n != 0 {
    172 			return d.consumeToken(Null, n), nil
    173 		}
    174 
    175 	case 't':
    176 		if n := matchWithDelim("true", in); n != 0 {
    177 			return d.consumeBoolToken(true, n), nil
    178 		}
    179 
    180 	case 'f':
    181 		if n := matchWithDelim("false", in); n != 0 {
    182 			return d.consumeBoolToken(false, n), nil
    183 		}
    184 
    185 	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    186 		if n, ok := parseNumber(in); ok {
    187 			return d.consumeToken(Number, n), nil
    188 		}
    189 
    190 	case '"':
    191 		s, n, err := d.parseString(in)
    192 		if err != nil {
    193 			return Token{}, err
    194 		}
    195 		return d.consumeStringToken(s, n), nil
    196 
    197 	case '{':
    198 		return d.consumeToken(ObjectOpen, 1), nil
    199 
    200 	case '}':
    201 		return d.consumeToken(ObjectClose, 1), nil
    202 
    203 	case '[':
    204 		return d.consumeToken(ArrayOpen, 1), nil
    205 
    206 	case ']':
    207 		return d.consumeToken(ArrayClose, 1), nil
    208 
    209 	case ',':
    210 		return d.consumeToken(comma, 1), nil
    211 	}
    212 	return Token{}, d.newSyntaxError(d.currPos(), "invalid value %s", errRegexp.Find(in))
    213 }
    214 
    215 // newSyntaxError returns an error with line and column information useful for
    216 // syntax errors.
    217 func (d *Decoder) newSyntaxError(pos int, f string, x ...interface{}) error {
    218 	e := errors.New(f, x...)
    219 	line, column := d.Position(pos)
    220 	return errors.New("syntax error (line %d:%d): %v", line, column, e)
    221 }
    222 
    223 // Position returns line and column number of given index of the original input.
    224 // It will panic if index is out of range.
    225 func (d *Decoder) Position(idx int) (line int, column int) {
    226 	b := d.orig[:idx]
    227 	line = bytes.Count(b, []byte("\n")) + 1
    228 	if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
    229 		b = b[i+1:]
    230 	}
    231 	column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
    232 	return line, column
    233 }
    234 
    235 // currPos returns the current index position of d.in from d.orig.
    236 func (d *Decoder) currPos() int {
    237 	return len(d.orig) - len(d.in)
    238 }
    239 
    240 // matchWithDelim matches s with the input b and verifies that the match
    241 // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
    242 // As a special case, EOF is considered a delimiter. It returns the length of s
    243 // if there is a match, else 0.
    244 func matchWithDelim(s string, b []byte) int {
    245 	if !bytes.HasPrefix(b, []byte(s)) {
    246 		return 0
    247 	}
    248 
    249 	n := len(s)
    250 	if n < len(b) && isNotDelim(b[n]) {
    251 		return 0
    252 	}
    253 	return n
    254 }
    255 
    256 // isNotDelim returns true if given byte is a not delimiter character.
    257 func isNotDelim(c byte) bool {
    258 	return (c == '-' || c == '+' || c == '.' || c == '_' ||
    259 		('a' <= c && c <= 'z') ||
    260 		('A' <= c && c <= 'Z') ||
    261 		('0' <= c && c <= '9'))
    262 }
    263 
    264 // consume consumes n bytes of input and any subsequent whitespace.
    265 func (d *Decoder) consume(n int) {
    266 	d.in = d.in[n:]
    267 	for len(d.in) > 0 {
    268 		switch d.in[0] {
    269 		case ' ', '\n', '\r', '\t':
    270 			d.in = d.in[1:]
    271 		default:
    272 			return
    273 		}
    274 	}
    275 }
    276 
    277 // isValueNext returns true if next type should be a JSON value: Null,
    278 // Number, String or Bool.
    279 func (d *Decoder) isValueNext() bool {
    280 	if len(d.openStack) == 0 {
    281 		return d.lastToken.kind == 0
    282 	}
    283 
    284 	start := d.openStack[len(d.openStack)-1]
    285 	switch start {
    286 	case ObjectOpen:
    287 		return d.lastToken.kind&Name != 0
    288 	case ArrayOpen:
    289 		return d.lastToken.kind&(ArrayOpen|comma) != 0
    290 	}
    291 	panic(fmt.Sprintf(
    292 		"unreachable logic in Decoder.isValueNext, lastToken.kind: %v, openStack: %v",
    293 		d.lastToken.kind, start))
    294 }
    295 
    296 // consumeToken constructs a Token for given Kind with raw value derived from
    297 // current d.in and given size, and consumes the given size-length of it.
    298 func (d *Decoder) consumeToken(kind Kind, size int) Token {
    299 	tok := Token{
    300 		kind: kind,
    301 		raw:  d.in[:size],
    302 		pos:  len(d.orig) - len(d.in),
    303 	}
    304 	d.consume(size)
    305 	return tok
    306 }
    307 
    308 // consumeBoolToken constructs a Token for a Bool kind with raw value derived from
    309 // current d.in and given size.
    310 func (d *Decoder) consumeBoolToken(b bool, size int) Token {
    311 	tok := Token{
    312 		kind: Bool,
    313 		raw:  d.in[:size],
    314 		pos:  len(d.orig) - len(d.in),
    315 		boo:  b,
    316 	}
    317 	d.consume(size)
    318 	return tok
    319 }
    320 
    321 // consumeStringToken constructs a Token for a String kind with raw value derived
    322 // from current d.in and given size.
    323 func (d *Decoder) consumeStringToken(s string, size int) Token {
    324 	tok := Token{
    325 		kind: String,
    326 		raw:  d.in[:size],
    327 		pos:  len(d.orig) - len(d.in),
    328 		str:  s,
    329 	}
    330 	d.consume(size)
    331 	return tok
    332 }
    333 
    334 // Clone returns a copy of the Decoder for use in reading ahead the next JSON
    335 // object, array or other values without affecting current Decoder.
    336 func (d *Decoder) Clone() *Decoder {
    337 	ret := *d
    338 	ret.openStack = append([]Kind(nil), ret.openStack...)
    339 	return &ret
    340 }