reader.go (13198B)
1 // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved. 2 // Use of this source code is governed by a MIT license found in the LICENSE file. 3 4 package codec 5 6 import ( 7 "bufio" 8 "bytes" 9 "io" 10 "strings" 11 ) 12 13 // decReader abstracts the reading source, allowing implementations that can 14 // read from an io.Reader or directly off a byte slice with zero-copying. 15 type decReader interface { 16 // readx will return a view of the []byte if decoding from a []byte, OR 17 // read into the implementation scratch buffer if possible i.e. n < len(scratchbuf), OR 18 // create a new []byte and read into that 19 readx(n uint) []byte 20 21 readb([]byte) 22 23 readn1() byte 24 readn2() [2]byte 25 readn3() [3]byte 26 readn4() [4]byte 27 readn8() [8]byte 28 // readn1eof() (v uint8, eof bool) 29 30 // // read up to 8 bytes at a time 31 // readn(num uint8) (v [8]byte) 32 33 numread() uint // number of bytes read 34 35 // skip any whitespace characters, and return the first non-matching byte 36 skipWhitespace() (token byte) 37 38 // jsonReadNum will include last read byte in first element of slice, 39 // and continue numeric characters until it sees a non-numeric char 40 // or EOF. If it sees a non-numeric character, it will unread that. 41 jsonReadNum() []byte 42 43 // jsonReadAsisChars will read json plain characters (anything but " or \) 44 // and return a slice terminated by a non-json asis character. 45 jsonReadAsisChars() []byte 46 47 // skip will skip any byte that matches, and return the first non-matching byte 48 // skip(accept *bitset256) (token byte) 49 50 // readTo will read any byte that matches, stopping once no-longer matching. 51 // readTo(accept *bitset256) (out []byte) 52 53 // readUntil will read, only stopping once it matches the 'stop' byte (which it excludes). 54 readUntil(stop byte) (out []byte) 55 } 56 57 // ------------------------------------------------ 58 59 type unreadByteStatus uint8 60 61 // unreadByteStatus goes from 62 // undefined (when initialized) -- (read) --> canUnread -- (unread) --> canRead ... 63 const ( 64 unreadByteUndefined unreadByteStatus = iota 65 unreadByteCanRead 66 unreadByteCanUnread 67 ) 68 69 // const defBufReaderSize = 4096 70 71 // -------------------- 72 73 // ioReaderByteScanner contains the io.Reader and io.ByteScanner interfaces 74 type ioReaderByteScanner interface { 75 io.Reader 76 io.ByteScanner 77 // ReadByte() (byte, error) 78 // UnreadByte() error 79 // Read(p []byte) (n int, err error) 80 } 81 82 // ioReaderByteScannerT does a simple wrapper of a io.ByteScanner 83 // over a io.Reader 84 type ioReaderByteScannerT struct { 85 r io.Reader 86 87 l byte // last byte 88 ls unreadByteStatus // last byte status 89 90 _ [2]byte // padding 91 b [4]byte // tiny buffer for reading single bytes 92 } 93 94 func (z *ioReaderByteScannerT) ReadByte() (c byte, err error) { 95 if z.ls == unreadByteCanRead { 96 z.ls = unreadByteCanUnread 97 c = z.l 98 } else { 99 _, err = z.Read(z.b[:1]) 100 c = z.b[0] 101 } 102 return 103 } 104 105 func (z *ioReaderByteScannerT) UnreadByte() (err error) { 106 switch z.ls { 107 case unreadByteCanUnread: 108 z.ls = unreadByteCanRead 109 case unreadByteCanRead: 110 err = errDecUnreadByteLastByteNotRead 111 case unreadByteUndefined: 112 err = errDecUnreadByteNothingToRead 113 default: 114 err = errDecUnreadByteUnknown 115 } 116 return 117 } 118 119 func (z *ioReaderByteScannerT) Read(p []byte) (n int, err error) { 120 if len(p) == 0 { 121 return 122 } 123 var firstByte bool 124 if z.ls == unreadByteCanRead { 125 z.ls = unreadByteCanUnread 126 p[0] = z.l 127 if len(p) == 1 { 128 n = 1 129 return 130 } 131 firstByte = true 132 p = p[1:] 133 } 134 n, err = z.r.Read(p) 135 if n > 0 { 136 if err == io.EOF && n == len(p) { 137 err = nil // read was successful, so postpone EOF (till next time) 138 } 139 z.l = p[n-1] 140 z.ls = unreadByteCanUnread 141 } 142 if firstByte { 143 n++ 144 } 145 return 146 } 147 148 func (z *ioReaderByteScannerT) reset(r io.Reader) { 149 z.r = r 150 z.ls = unreadByteUndefined 151 z.l = 0 152 } 153 154 // ioDecReader is a decReader that reads off an io.Reader. 155 type ioDecReader struct { 156 rr ioReaderByteScannerT // the reader passed in, wrapped into a reader+bytescanner 157 158 n uint // num read 159 160 blist *bytesFreelist 161 162 bufr []byte // buffer for readTo/readUntil 163 br ioReaderByteScanner // main reader used for Read|ReadByte|UnreadByte 164 bb *bufio.Reader // created internally, and reused on reset if needed 165 166 x [64 + 40]byte // for: get struct field name, swallow valueTypeBytes, etc 167 } 168 169 func (z *ioDecReader) reset(r io.Reader, bufsize int, blist *bytesFreelist) { 170 z.blist = blist 171 z.n = 0 172 z.bufr = z.blist.check(z.bufr, 256) 173 z.br = nil 174 175 var ok bool 176 177 if bufsize <= 0 { 178 z.br, ok = r.(ioReaderByteScanner) 179 if !ok { 180 z.rr.reset(r) 181 z.br = &z.rr 182 } 183 return 184 } 185 186 // bufsize > 0 ... 187 188 // if bytes.[Buffer|Reader], no value in adding extra buffer 189 // if bufio.Reader, no value in extra buffer unless size changes 190 switch bb := r.(type) { 191 case *strings.Reader: 192 z.br = bb 193 case *bytes.Buffer: 194 z.br = bb 195 case *bytes.Reader: 196 z.br = bb 197 case *bufio.Reader: 198 if bb.Size() == bufsize { 199 z.br = bb 200 } 201 } 202 203 if z.br == nil { 204 if z.bb != nil && z.bb.Size() == bufsize { 205 z.bb.Reset(r) 206 } else { 207 z.bb = bufio.NewReaderSize(r, bufsize) 208 } 209 z.br = z.bb 210 } 211 } 212 213 func (z *ioDecReader) numread() uint { 214 return z.n 215 } 216 217 func (z *ioDecReader) readn1() (b uint8) { 218 b, err := z.br.ReadByte() 219 halt.onerror(err) 220 z.n++ 221 return 222 } 223 224 func (z *ioDecReader) readn2() (bs [2]byte) { 225 z.readb(bs[:]) 226 return 227 } 228 229 func (z *ioDecReader) readn3() (bs [3]byte) { 230 z.readb(bs[:]) 231 return 232 } 233 234 func (z *ioDecReader) readn4() (bs [4]byte) { 235 z.readb(bs[:]) 236 return 237 } 238 239 func (z *ioDecReader) readn8() (bs [8]byte) { 240 z.readb(bs[:]) 241 return 242 } 243 244 func (z *ioDecReader) readx(n uint) (bs []byte) { 245 if n == 0 { 246 return zeroByteSlice 247 } 248 if n < uint(len(z.x)) { 249 bs = z.x[:n] 250 } else { 251 bs = make([]byte, n) 252 } 253 nn, err := readFull(z.br, bs) 254 z.n += nn 255 halt.onerror(err) 256 return 257 } 258 259 func (z *ioDecReader) readb(bs []byte) { 260 if len(bs) == 0 { 261 return 262 } 263 nn, err := readFull(z.br, bs) 264 z.n += nn 265 halt.onerror(err) 266 } 267 268 // func (z *ioDecReader) readn1eof() (b uint8, eof bool) { 269 // b, err := z.br.ReadByte() 270 // if err == nil { 271 // z.n++ 272 // } else if err == io.EOF { 273 // eof = true 274 // } else { 275 // halt.onerror(err) 276 // } 277 // return 278 // } 279 280 func (z *ioDecReader) jsonReadNum() (bs []byte) { 281 z.unreadn1() 282 z.bufr = z.bufr[:0] 283 LOOP: 284 // i, eof := z.readn1eof() 285 i, err := z.br.ReadByte() 286 if err == io.EOF { 287 return z.bufr 288 } 289 if err != nil { 290 halt.onerror(err) 291 } 292 z.n++ 293 if isNumberChar(i) { 294 z.bufr = append(z.bufr, i) 295 goto LOOP 296 } 297 z.unreadn1() 298 return z.bufr 299 } 300 301 func (z *ioDecReader) jsonReadAsisChars() (bs []byte) { 302 z.bufr = z.bufr[:0] 303 LOOP: 304 i := z.readn1() 305 z.bufr = append(z.bufr, i) 306 if i == '"' || i == '\\' { 307 return z.bufr 308 } 309 goto LOOP 310 } 311 312 func (z *ioDecReader) skipWhitespace() (token byte) { 313 LOOP: 314 token = z.readn1() 315 if isWhitespaceChar(token) { 316 goto LOOP 317 } 318 return 319 } 320 321 // func (z *ioDecReader) readUntil(stop byte) []byte { 322 // z.bufr = z.bufr[:0] 323 // LOOP: 324 // token := z.readn1() 325 // z.bufr = append(z.bufr, token) 326 // if token == stop { 327 // return z.bufr[:len(z.bufr)-1] 328 // } 329 // goto LOOP 330 // } 331 332 func (z *ioDecReader) readUntil(stop byte) []byte { 333 z.bufr = z.bufr[:0] 334 LOOP: 335 token := z.readn1() 336 if token == stop { 337 return z.bufr 338 } 339 z.bufr = append(z.bufr, token) 340 goto LOOP 341 } 342 343 func (z *ioDecReader) unreadn1() { 344 err := z.br.UnreadByte() 345 halt.onerror(err) 346 z.n-- 347 } 348 349 // ------------------------------------ 350 351 // bytesDecReader is a decReader that reads off a byte slice with zero copying 352 // 353 // Note: we do not try to convert index'ing out of bounds to an io.EOF. 354 // instead, we let it bubble up to the exported Encode/Decode method 355 // and recover it as an io.EOF. 356 // 357 // Every function here MUST defensively check bounds either explicitly 358 // or via a bounds check. 359 // 360 // see panicValToErr(...) function in helper.go. 361 type bytesDecReader struct { 362 b []byte // data 363 c uint // cursor 364 } 365 366 func (z *bytesDecReader) reset(in []byte) { 367 z.b = in[:len(in):len(in)] // reslicing must not go past capacity 368 z.c = 0 369 } 370 371 func (z *bytesDecReader) numread() uint { 372 return z.c 373 } 374 375 // Note: slicing from a non-constant start position is more expensive, 376 // as more computation is required to decipher the pointer start position. 377 // However, we do it only once, and it's better than reslicing both z.b and return value. 378 379 func (z *bytesDecReader) readx(n uint) (bs []byte) { 380 // x := z.c + n 381 // bs = z.b[z.c:x] 382 // z.c = x 383 bs = z.b[z.c : z.c+n] 384 z.c += n 385 return 386 } 387 388 func (z *bytesDecReader) readb(bs []byte) { 389 copy(bs, z.readx(uint(len(bs)))) 390 } 391 392 // MARKER: do not use this - as it calls into memmove (as the size of data to move is unknown) 393 // func (z *bytesDecReader) readnn(bs []byte, n uint) { 394 // x := z.c 395 // copy(bs, z.b[x:x+n]) 396 // z.c += n 397 // } 398 399 // func (z *bytesDecReader) readn(num uint8) (bs [8]byte) { 400 // x := z.c + uint(num) 401 // copy(bs[:], z.b[z.c:x]) // slice z.b completely, so we get bounds error if past 402 // z.c = x 403 // return 404 // } 405 406 // func (z *bytesDecReader) readn1() uint8 { 407 // z.c++ 408 // return z.b[z.c-1] 409 // } 410 411 // MARKER: readn{1,2,3,4,8} should throw an out of bounds error if past length. 412 // MARKER: readn1: explicitly ensure bounds check is done 413 // MARKER: readn{2,3,4,8}: ensure you slice z.b completely so we get bounds error if past end. 414 415 func (z *bytesDecReader) readn1() (v uint8) { 416 v = z.b[z.c] 417 z.c++ 418 return 419 } 420 421 func (z *bytesDecReader) readn2() (bs [2]byte) { 422 // copy(bs[:], z.b[z.c:z.c+2]) 423 // bs[1] = z.b[z.c+1] 424 // bs[0] = z.b[z.c] 425 bs = okBytes2(z.b[z.c : z.c+2]) 426 z.c += 2 427 return 428 } 429 430 func (z *bytesDecReader) readn3() (bs [3]byte) { 431 // copy(bs[1:], z.b[z.c:z.c+3]) 432 bs = okBytes3(z.b[z.c : z.c+3]) 433 z.c += 3 434 return 435 } 436 437 func (z *bytesDecReader) readn4() (bs [4]byte) { 438 // copy(bs[:], z.b[z.c:z.c+4]) 439 bs = okBytes4(z.b[z.c : z.c+4]) 440 z.c += 4 441 return 442 } 443 444 func (z *bytesDecReader) readn8() (bs [8]byte) { 445 // copy(bs[:], z.b[z.c:z.c+8]) 446 bs = okBytes8(z.b[z.c : z.c+8]) 447 z.c += 8 448 return 449 } 450 451 func (z *bytesDecReader) jsonReadNum() []byte { 452 z.c-- // unread 453 i := z.c 454 LOOP: 455 // gracefully handle end of slice, as end of stream is meaningful here 456 if i < uint(len(z.b)) && isNumberChar(z.b[i]) { 457 i++ 458 goto LOOP 459 } 460 z.c, i = i, z.c 461 // MARKER: 20230103: byteSliceOf here prevents inlining of jsonReadNum 462 // return byteSliceOf(z.b, i, z.c) 463 return z.b[i:z.c] 464 } 465 466 func (z *bytesDecReader) jsonReadAsisChars() []byte { 467 i := z.c 468 LOOP: 469 token := z.b[i] 470 i++ 471 if token == '"' || token == '\\' { 472 z.c, i = i, z.c 473 return byteSliceOf(z.b, i, z.c) 474 // return z.b[i:z.c] 475 } 476 goto LOOP 477 } 478 479 func (z *bytesDecReader) skipWhitespace() (token byte) { 480 i := z.c 481 LOOP: 482 token = z.b[i] 483 if isWhitespaceChar(token) { 484 i++ 485 goto LOOP 486 } 487 z.c = i + 1 488 return 489 } 490 491 func (z *bytesDecReader) readUntil(stop byte) (out []byte) { 492 i := z.c 493 LOOP: 494 if z.b[i] == stop { 495 out = byteSliceOf(z.b, z.c, i) 496 // out = z.b[z.c:i] 497 z.c = i + 1 498 return 499 } 500 i++ 501 goto LOOP 502 } 503 504 // -------------- 505 506 type decRd struct { 507 rb bytesDecReader 508 ri *ioDecReader 509 510 decReader 511 512 bytes bool // is bytes reader 513 514 // MARKER: these fields below should belong directly in Encoder. 515 // we pack them here for space efficiency and cache-line optimization. 516 517 mtr bool // is maptype a known type? 518 str bool // is slicetype a known type? 519 520 be bool // is binary encoding 521 js bool // is json handle 522 jsms bool // is json handle, and MapKeyAsString 523 cbor bool // is cbor handle 524 525 cbreak bool // is a check breaker 526 527 } 528 529 // From out benchmarking, we see the following impact performance: 530 // 531 // - functions that are too big to inline 532 // - interface calls (as no inlining can occur) 533 // 534 // decRd is designed to embed a decReader, and then re-implement some of the decReader 535 // methods using a conditional branch. 536 // 537 // We only override the ones where the bytes version is inlined AND the wrapper method 538 // (containing the bytes version alongside a conditional branch) is also inlined. 539 // 540 // We use ./run.sh -z to check. 541 // 542 // Right now, only numread and "carefully crafted" readn1 can be inlined. 543 544 func (z *decRd) numread() uint { 545 if z.bytes { 546 return z.rb.numread() 547 } 548 return z.ri.numread() 549 } 550 551 func (z *decRd) readn1() (v uint8) { 552 if z.bytes { 553 // return z.rb.readn1() 554 // MARKER: calling z.rb.readn1() prevents decRd.readn1 from being inlined. 555 // copy code, to manually inline and explicitly return here. 556 // Keep in sync with bytesDecReader.readn1 557 v = z.rb.b[z.rb.c] 558 z.rb.c++ 559 return 560 } 561 return z.ri.readn1() 562 } 563 564 // func (z *decRd) readn4() [4]byte { 565 // if z.bytes { 566 // return z.rb.readn4() 567 // } 568 // return z.ri.readn4() 569 // } 570 571 // func (z *decRd) readn3() [3]byte { 572 // if z.bytes { 573 // return z.rb.readn3() 574 // } 575 // return z.ri.readn3() 576 // } 577 578 // func (z *decRd) skipWhitespace() byte { 579 // if z.bytes { 580 // return z.rb.skipWhitespace() 581 // } 582 // return z.ri.skipWhitespace() 583 // } 584 585 type devNullReader struct{} 586 587 func (devNullReader) Read(p []byte) (int, error) { return 0, io.EOF } 588 func (devNullReader) Close() error { return nil } 589 590 func readFull(r io.Reader, bs []byte) (n uint, err error) { 591 var nn int 592 for n < uint(len(bs)) && err == nil { 593 nn, err = r.Read(bs[n:]) 594 if nn > 0 { 595 if err == io.EOF { 596 // leave EOF for next time 597 err = nil 598 } 599 n += uint(nn) 600 } 601 } 602 // do not do this below - it serves no purpose 603 // if n != len(bs) && err == io.EOF { err = io.ErrUnexpectedEOF } 604 return 605 } 606 607 var _ decReader = (*decRd)(nil)