stream.go (11888B)
1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 import ( 8 "bytes" 9 "errors" 10 "io" 11 ) 12 13 // A Decoder reads and decodes JSON objects from an input stream. 14 type Decoder struct { 15 r io.Reader 16 buf []byte 17 d decodeState 18 scanp int // start of unread data in buf 19 scan scanner 20 err error 21 22 tokenState int 23 tokenStack []int 24 } 25 26 // NewDecoder returns a new decoder that reads from r. 27 // 28 // The decoder introduces its own buffering and may 29 // read data from r beyond the JSON values requested. 30 func NewDecoder(r io.Reader) *Decoder { 31 return &Decoder{r: r} 32 } 33 34 // Deprecated: Use `SetNumberType` instead 35 // UseNumber causes the Decoder to unmarshal a number into an interface{} as a 36 // Number instead of as a float64. 37 func (dec *Decoder) UseNumber() { dec.d.numberType = UnmarshalJSONNumber } 38 39 // SetNumberType causes the Decoder to unmarshal a number into an interface{} as a 40 // Number, float64 or int64 depending on `t` enum value. 41 func (dec *Decoder) SetNumberType(t NumberUnmarshalType) { dec.d.numberType = t } 42 43 // Decode reads the next JSON-encoded value from its 44 // input and stores it in the value pointed to by v. 45 // 46 // See the documentation for Unmarshal for details about 47 // the conversion of JSON into a Go value. 48 func (dec *Decoder) Decode(v interface{}) error { 49 if dec.err != nil { 50 return dec.err 51 } 52 53 if err := dec.tokenPrepareForDecode(); err != nil { 54 return err 55 } 56 57 if !dec.tokenValueAllowed() { 58 return &SyntaxError{msg: "not at beginning of value"} 59 } 60 61 // Read whole value into buffer. 62 n, err := dec.readValue() 63 if err != nil { 64 return err 65 } 66 dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) 67 dec.scanp += n 68 69 // Don't save err from unmarshal into dec.err: 70 // the connection is still usable since we read a complete JSON 71 // object from it before the error happened. 72 err = dec.d.unmarshal(v) 73 74 // fixup token streaming state 75 dec.tokenValueEnd() 76 77 return err 78 } 79 80 // Buffered returns a reader of the data remaining in the Decoder's 81 // buffer. The reader is valid until the next call to Decode. 82 func (dec *Decoder) Buffered() io.Reader { 83 return bytes.NewReader(dec.buf[dec.scanp:]) 84 } 85 86 // readValue reads a JSON value into dec.buf. 87 // It returns the length of the encoding. 88 func (dec *Decoder) readValue() (int, error) { 89 dec.scan.reset() 90 91 scanp := dec.scanp 92 var err error 93 Input: 94 for { 95 // Look in the buffer for a new value. 96 for i, c := range dec.buf[scanp:] { 97 dec.scan.bytes++ 98 v := dec.scan.step(&dec.scan, c) 99 if v == scanEnd { 100 scanp += i 101 break Input 102 } 103 // scanEnd is delayed one byte. 104 // We might block trying to get that byte from src, 105 // so instead invent a space byte. 106 if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd { 107 scanp += i + 1 108 break Input 109 } 110 if v == scanError { 111 dec.err = dec.scan.err 112 return 0, dec.scan.err 113 } 114 } 115 scanp = len(dec.buf) 116 117 // Did the last read have an error? 118 // Delayed until now to allow buffer scan. 119 if err != nil { 120 if err == io.EOF { 121 if dec.scan.step(&dec.scan, ' ') == scanEnd { 122 break Input 123 } 124 if nonSpace(dec.buf) { 125 err = io.ErrUnexpectedEOF 126 } 127 } 128 dec.err = err 129 return 0, err 130 } 131 132 n := scanp - dec.scanp 133 err = dec.refill() 134 scanp = dec.scanp + n 135 } 136 return scanp - dec.scanp, nil 137 } 138 139 func (dec *Decoder) refill() error { 140 // Make room to read more into the buffer. 141 // First slide down data already consumed. 142 if dec.scanp > 0 { 143 n := copy(dec.buf, dec.buf[dec.scanp:]) 144 dec.buf = dec.buf[:n] 145 dec.scanp = 0 146 } 147 148 // Grow buffer if not large enough. 149 const minRead = 512 150 if cap(dec.buf)-len(dec.buf) < minRead { 151 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) 152 copy(newBuf, dec.buf) 153 dec.buf = newBuf 154 } 155 156 // Read. Delay error for next iteration (after scan). 157 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) 158 dec.buf = dec.buf[0 : len(dec.buf)+n] 159 160 return err 161 } 162 163 func nonSpace(b []byte) bool { 164 for _, c := range b { 165 if !isSpace(c) { 166 return true 167 } 168 } 169 return false 170 } 171 172 // An Encoder writes JSON objects to an output stream. 173 type Encoder struct { 174 w io.Writer 175 err error 176 } 177 178 // NewEncoder returns a new encoder that writes to w. 179 func NewEncoder(w io.Writer) *Encoder { 180 return &Encoder{w: w} 181 } 182 183 // Encode writes the JSON encoding of v to the stream, 184 // followed by a newline character. 185 // 186 // See the documentation for Marshal for details about the 187 // conversion of Go values to JSON. 188 func (enc *Encoder) Encode(v interface{}) error { 189 if enc.err != nil { 190 return enc.err 191 } 192 e := newEncodeState() 193 err := e.marshal(v) 194 if err != nil { 195 return err 196 } 197 198 // Terminate each value with a newline. 199 // This makes the output look a little nicer 200 // when debugging, and some kind of space 201 // is required if the encoded value was a number, 202 // so that the reader knows there aren't more 203 // digits coming. 204 e.WriteByte('\n') 205 206 if _, err = enc.w.Write(e.Bytes()); err != nil { 207 enc.err = err 208 } 209 encodeStatePool.Put(e) 210 return err 211 } 212 213 // RawMessage is a raw encoded JSON object. 214 // It implements Marshaler and Unmarshaler and can 215 // be used to delay JSON decoding or precompute a JSON encoding. 216 type RawMessage []byte 217 218 // MarshalJSON returns *m as the JSON encoding of m. 219 func (m *RawMessage) MarshalJSON() ([]byte, error) { 220 return *m, nil 221 } 222 223 // UnmarshalJSON sets *m to a copy of data. 224 func (m *RawMessage) UnmarshalJSON(data []byte) error { 225 if m == nil { 226 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") 227 } 228 *m = append((*m)[0:0], data...) 229 return nil 230 } 231 232 var _ Marshaler = (*RawMessage)(nil) 233 var _ Unmarshaler = (*RawMessage)(nil) 234 235 // A Token holds a value of one of these types: 236 // 237 // Delim, for the four JSON delimiters [ ] { } 238 // bool, for JSON booleans 239 // float64, for JSON numbers 240 // Number, for JSON numbers 241 // string, for JSON string literals 242 // nil, for JSON null 243 // 244 type Token interface{} 245 246 const ( 247 tokenTopValue = iota 248 tokenArrayStart 249 tokenArrayValue 250 tokenArrayComma 251 tokenObjectStart 252 tokenObjectKey 253 tokenObjectColon 254 tokenObjectValue 255 tokenObjectComma 256 ) 257 258 // advance tokenstate from a separator state to a value state 259 func (dec *Decoder) tokenPrepareForDecode() error { 260 // Note: Not calling peek before switch, to avoid 261 // putting peek into the standard Decode path. 262 // peek is only called when using the Token API. 263 switch dec.tokenState { 264 case tokenArrayComma: 265 c, err := dec.peek() 266 if err != nil { 267 return err 268 } 269 if c != ',' { 270 return &SyntaxError{"expected comma after array element", 0} 271 } 272 dec.scanp++ 273 dec.tokenState = tokenArrayValue 274 case tokenObjectColon: 275 c, err := dec.peek() 276 if err != nil { 277 return err 278 } 279 if c != ':' { 280 return &SyntaxError{"expected colon after object key", 0} 281 } 282 dec.scanp++ 283 dec.tokenState = tokenObjectValue 284 } 285 return nil 286 } 287 288 func (dec *Decoder) tokenValueAllowed() bool { 289 switch dec.tokenState { 290 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: 291 return true 292 } 293 return false 294 } 295 296 func (dec *Decoder) tokenValueEnd() { 297 switch dec.tokenState { 298 case tokenArrayStart, tokenArrayValue: 299 dec.tokenState = tokenArrayComma 300 case tokenObjectValue: 301 dec.tokenState = tokenObjectComma 302 } 303 } 304 305 // A Delim is a JSON array or object delimiter, one of [ ] { or }. 306 type Delim rune 307 308 func (d Delim) String() string { 309 return string(d) 310 } 311 312 // Token returns the next JSON token in the input stream. 313 // At the end of the input stream, Token returns nil, io.EOF. 314 // 315 // Token guarantees that the delimiters [ ] { } it returns are 316 // properly nested and matched: if Token encounters an unexpected 317 // delimiter in the input, it will return an error. 318 // 319 // The input stream consists of basic JSON values—bool, string, 320 // number, and null—along with delimiters [ ] { } of type Delim 321 // to mark the start and end of arrays and objects. 322 // Commas and colons are elided. 323 func (dec *Decoder) Token() (Token, error) { 324 for { 325 c, err := dec.peek() 326 if err != nil { 327 return nil, err 328 } 329 switch c { 330 case '[': 331 if !dec.tokenValueAllowed() { 332 return dec.tokenError(c) 333 } 334 dec.scanp++ 335 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 336 dec.tokenState = tokenArrayStart 337 return Delim('['), nil 338 339 case ']': 340 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { 341 return dec.tokenError(c) 342 } 343 dec.scanp++ 344 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 345 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 346 dec.tokenValueEnd() 347 return Delim(']'), nil 348 349 case '{': 350 if !dec.tokenValueAllowed() { 351 return dec.tokenError(c) 352 } 353 dec.scanp++ 354 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 355 dec.tokenState = tokenObjectStart 356 return Delim('{'), nil 357 358 case '}': 359 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { 360 return dec.tokenError(c) 361 } 362 dec.scanp++ 363 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 364 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 365 dec.tokenValueEnd() 366 return Delim('}'), nil 367 368 case ':': 369 if dec.tokenState != tokenObjectColon { 370 return dec.tokenError(c) 371 } 372 dec.scanp++ 373 dec.tokenState = tokenObjectValue 374 continue 375 376 case ',': 377 if dec.tokenState == tokenArrayComma { 378 dec.scanp++ 379 dec.tokenState = tokenArrayValue 380 continue 381 } 382 if dec.tokenState == tokenObjectComma { 383 dec.scanp++ 384 dec.tokenState = tokenObjectKey 385 continue 386 } 387 return dec.tokenError(c) 388 389 case '"': 390 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { 391 var x string 392 old := dec.tokenState 393 dec.tokenState = tokenTopValue 394 err := dec.Decode(&x) 395 dec.tokenState = old 396 if err != nil { 397 clearOffset(err) 398 return nil, err 399 } 400 dec.tokenState = tokenObjectColon 401 return x, nil 402 } 403 fallthrough 404 405 default: 406 if !dec.tokenValueAllowed() { 407 return dec.tokenError(c) 408 } 409 var x interface{} 410 if err := dec.Decode(&x); err != nil { 411 clearOffset(err) 412 return nil, err 413 } 414 return x, nil 415 } 416 } 417 } 418 419 func clearOffset(err error) { 420 if s, ok := err.(*SyntaxError); ok { 421 s.Offset = 0 422 } 423 } 424 425 func (dec *Decoder) tokenError(c byte) (Token, error) { 426 var context string 427 switch dec.tokenState { 428 case tokenTopValue: 429 context = " looking for beginning of value" 430 case tokenArrayStart, tokenArrayValue, tokenObjectValue: 431 context = " looking for beginning of value" 432 case tokenArrayComma: 433 context = " after array element" 434 case tokenObjectKey: 435 context = " looking for beginning of object key string" 436 case tokenObjectColon: 437 context = " after object key" 438 case tokenObjectComma: 439 context = " after object key:value pair" 440 } 441 return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0} 442 } 443 444 // More reports whether there is another element in the 445 // current array or object being parsed. 446 func (dec *Decoder) More() bool { 447 c, err := dec.peek() 448 return err == nil && c != ']' && c != '}' 449 } 450 451 func (dec *Decoder) peek() (byte, error) { 452 var err error 453 for { 454 for i := dec.scanp; i < len(dec.buf); i++ { 455 c := dec.buf[i] 456 if isSpace(c) { 457 continue 458 } 459 dec.scanp = i 460 return c, nil 461 } 462 // buffer has been scanned, now report any error 463 if err != nil { 464 return 0, err 465 } 466 err = dec.refill() 467 } 468 } 469 470 /* 471 TODO 472 473 // EncodeToken writes the given JSON token to the stream. 474 // It returns an error if the delimiters [ ] { } are not properly used. 475 // 476 // EncodeToken does not call Flush, because usually it is part of 477 // a larger operation such as Encode, and those will call Flush when finished. 478 // Callers that create an Encoder and then invoke EncodeToken directly, 479 // without using Encode, need to call Flush when finished to ensure that 480 // the JSON is written to the underlying writer. 481 func (e *Encoder) EncodeToken(t Token) error { 482 ... 483 } 484 485 */