gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

reader.go (13198B)


      1 // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
      2 // Use of this source code is governed by a MIT license found in the LICENSE file.
      3 
      4 package codec
      5 
      6 import (
      7 	"bufio"
      8 	"bytes"
      9 	"io"
     10 	"strings"
     11 )
     12 
     13 // decReader abstracts the reading source, allowing implementations that can
     14 // read from an io.Reader or directly off a byte slice with zero-copying.
     15 type decReader interface {
     16 	// readx will return a view of the []byte if decoding from a []byte, OR
     17 	// read into the implementation scratch buffer if possible i.e. n < len(scratchbuf), OR
     18 	// create a new []byte and read into that
     19 	readx(n uint) []byte
     20 
     21 	readb([]byte)
     22 
     23 	readn1() byte
     24 	readn2() [2]byte
     25 	readn3() [3]byte
     26 	readn4() [4]byte
     27 	readn8() [8]byte
     28 	// readn1eof() (v uint8, eof bool)
     29 
     30 	// // read up to 8 bytes at a time
     31 	// readn(num uint8) (v [8]byte)
     32 
     33 	numread() uint // number of bytes read
     34 
     35 	// skip any whitespace characters, and return the first non-matching byte
     36 	skipWhitespace() (token byte)
     37 
     38 	// jsonReadNum will include last read byte in first element of slice,
     39 	// and continue numeric characters until it sees a non-numeric char
     40 	// or EOF. If it sees a non-numeric character, it will unread that.
     41 	jsonReadNum() []byte
     42 
     43 	// jsonReadAsisChars will read json plain characters (anything but " or \)
     44 	// and return a slice terminated by a non-json asis character.
     45 	jsonReadAsisChars() []byte
     46 
     47 	// skip will skip any byte that matches, and return the first non-matching byte
     48 	// skip(accept *bitset256) (token byte)
     49 
     50 	// readTo will read any byte that matches, stopping once no-longer matching.
     51 	// readTo(accept *bitset256) (out []byte)
     52 
     53 	// readUntil will read, only stopping once it matches the 'stop' byte (which it excludes).
     54 	readUntil(stop byte) (out []byte)
     55 }
     56 
     57 // ------------------------------------------------
     58 
     59 type unreadByteStatus uint8
     60 
     61 // unreadByteStatus goes from
     62 // undefined (when initialized) -- (read) --> canUnread -- (unread) --> canRead ...
     63 const (
     64 	unreadByteUndefined unreadByteStatus = iota
     65 	unreadByteCanRead
     66 	unreadByteCanUnread
     67 )
     68 
     69 // const defBufReaderSize = 4096
     70 
     71 // --------------------
     72 
     73 // ioReaderByteScanner contains the io.Reader and io.ByteScanner interfaces
     74 type ioReaderByteScanner interface {
     75 	io.Reader
     76 	io.ByteScanner
     77 	// ReadByte() (byte, error)
     78 	// UnreadByte() error
     79 	// Read(p []byte) (n int, err error)
     80 }
     81 
     82 // ioReaderByteScannerT does a simple wrapper of a io.ByteScanner
     83 // over a io.Reader
     84 type ioReaderByteScannerT struct {
     85 	r io.Reader
     86 
     87 	l  byte             // last byte
     88 	ls unreadByteStatus // last byte status
     89 
     90 	_ [2]byte // padding
     91 	b [4]byte // tiny buffer for reading single bytes
     92 }
     93 
     94 func (z *ioReaderByteScannerT) ReadByte() (c byte, err error) {
     95 	if z.ls == unreadByteCanRead {
     96 		z.ls = unreadByteCanUnread
     97 		c = z.l
     98 	} else {
     99 		_, err = z.Read(z.b[:1])
    100 		c = z.b[0]
    101 	}
    102 	return
    103 }
    104 
    105 func (z *ioReaderByteScannerT) UnreadByte() (err error) {
    106 	switch z.ls {
    107 	case unreadByteCanUnread:
    108 		z.ls = unreadByteCanRead
    109 	case unreadByteCanRead:
    110 		err = errDecUnreadByteLastByteNotRead
    111 	case unreadByteUndefined:
    112 		err = errDecUnreadByteNothingToRead
    113 	default:
    114 		err = errDecUnreadByteUnknown
    115 	}
    116 	return
    117 }
    118 
    119 func (z *ioReaderByteScannerT) Read(p []byte) (n int, err error) {
    120 	if len(p) == 0 {
    121 		return
    122 	}
    123 	var firstByte bool
    124 	if z.ls == unreadByteCanRead {
    125 		z.ls = unreadByteCanUnread
    126 		p[0] = z.l
    127 		if len(p) == 1 {
    128 			n = 1
    129 			return
    130 		}
    131 		firstByte = true
    132 		p = p[1:]
    133 	}
    134 	n, err = z.r.Read(p)
    135 	if n > 0 {
    136 		if err == io.EOF && n == len(p) {
    137 			err = nil // read was successful, so postpone EOF (till next time)
    138 		}
    139 		z.l = p[n-1]
    140 		z.ls = unreadByteCanUnread
    141 	}
    142 	if firstByte {
    143 		n++
    144 	}
    145 	return
    146 }
    147 
    148 func (z *ioReaderByteScannerT) reset(r io.Reader) {
    149 	z.r = r
    150 	z.ls = unreadByteUndefined
    151 	z.l = 0
    152 }
    153 
    154 // ioDecReader is a decReader that reads off an io.Reader.
    155 type ioDecReader struct {
    156 	rr ioReaderByteScannerT // the reader passed in, wrapped into a reader+bytescanner
    157 
    158 	n uint // num read
    159 
    160 	blist *bytesFreelist
    161 
    162 	bufr []byte              // buffer for readTo/readUntil
    163 	br   ioReaderByteScanner // main reader used for Read|ReadByte|UnreadByte
    164 	bb   *bufio.Reader       // created internally, and reused on reset if needed
    165 
    166 	x [64 + 40]byte // for: get struct field name, swallow valueTypeBytes, etc
    167 }
    168 
    169 func (z *ioDecReader) reset(r io.Reader, bufsize int, blist *bytesFreelist) {
    170 	z.blist = blist
    171 	z.n = 0
    172 	z.bufr = z.blist.check(z.bufr, 256)
    173 	z.br = nil
    174 
    175 	var ok bool
    176 
    177 	if bufsize <= 0 {
    178 		z.br, ok = r.(ioReaderByteScanner)
    179 		if !ok {
    180 			z.rr.reset(r)
    181 			z.br = &z.rr
    182 		}
    183 		return
    184 	}
    185 
    186 	// bufsize > 0 ...
    187 
    188 	// if bytes.[Buffer|Reader], no value in adding extra buffer
    189 	// if bufio.Reader, no value in extra buffer unless size changes
    190 	switch bb := r.(type) {
    191 	case *strings.Reader:
    192 		z.br = bb
    193 	case *bytes.Buffer:
    194 		z.br = bb
    195 	case *bytes.Reader:
    196 		z.br = bb
    197 	case *bufio.Reader:
    198 		if bb.Size() == bufsize {
    199 			z.br = bb
    200 		}
    201 	}
    202 
    203 	if z.br == nil {
    204 		if z.bb != nil && z.bb.Size() == bufsize {
    205 			z.bb.Reset(r)
    206 		} else {
    207 			z.bb = bufio.NewReaderSize(r, bufsize)
    208 		}
    209 		z.br = z.bb
    210 	}
    211 }
    212 
    213 func (z *ioDecReader) numread() uint {
    214 	return z.n
    215 }
    216 
    217 func (z *ioDecReader) readn1() (b uint8) {
    218 	b, err := z.br.ReadByte()
    219 	halt.onerror(err)
    220 	z.n++
    221 	return
    222 }
    223 
    224 func (z *ioDecReader) readn2() (bs [2]byte) {
    225 	z.readb(bs[:])
    226 	return
    227 }
    228 
    229 func (z *ioDecReader) readn3() (bs [3]byte) {
    230 	z.readb(bs[:])
    231 	return
    232 }
    233 
    234 func (z *ioDecReader) readn4() (bs [4]byte) {
    235 	z.readb(bs[:])
    236 	return
    237 }
    238 
    239 func (z *ioDecReader) readn8() (bs [8]byte) {
    240 	z.readb(bs[:])
    241 	return
    242 }
    243 
    244 func (z *ioDecReader) readx(n uint) (bs []byte) {
    245 	if n == 0 {
    246 		return zeroByteSlice
    247 	}
    248 	if n < uint(len(z.x)) {
    249 		bs = z.x[:n]
    250 	} else {
    251 		bs = make([]byte, n)
    252 	}
    253 	nn, err := readFull(z.br, bs)
    254 	z.n += nn
    255 	halt.onerror(err)
    256 	return
    257 }
    258 
    259 func (z *ioDecReader) readb(bs []byte) {
    260 	if len(bs) == 0 {
    261 		return
    262 	}
    263 	nn, err := readFull(z.br, bs)
    264 	z.n += nn
    265 	halt.onerror(err)
    266 }
    267 
    268 // func (z *ioDecReader) readn1eof() (b uint8, eof bool) {
    269 // 	b, err := z.br.ReadByte()
    270 // 	if err == nil {
    271 // 		z.n++
    272 // 	} else if err == io.EOF {
    273 // 		eof = true
    274 // 	} else {
    275 // 		halt.onerror(err)
    276 // 	}
    277 // 	return
    278 // }
    279 
    280 func (z *ioDecReader) jsonReadNum() (bs []byte) {
    281 	z.unreadn1()
    282 	z.bufr = z.bufr[:0]
    283 LOOP:
    284 	// i, eof := z.readn1eof()
    285 	i, err := z.br.ReadByte()
    286 	if err == io.EOF {
    287 		return z.bufr
    288 	}
    289 	if err != nil {
    290 		halt.onerror(err)
    291 	}
    292 	z.n++
    293 	if isNumberChar(i) {
    294 		z.bufr = append(z.bufr, i)
    295 		goto LOOP
    296 	}
    297 	z.unreadn1()
    298 	return z.bufr
    299 }
    300 
    301 func (z *ioDecReader) jsonReadAsisChars() (bs []byte) {
    302 	z.bufr = z.bufr[:0]
    303 LOOP:
    304 	i := z.readn1()
    305 	z.bufr = append(z.bufr, i)
    306 	if i == '"' || i == '\\' {
    307 		return z.bufr
    308 	}
    309 	goto LOOP
    310 }
    311 
    312 func (z *ioDecReader) skipWhitespace() (token byte) {
    313 LOOP:
    314 	token = z.readn1()
    315 	if isWhitespaceChar(token) {
    316 		goto LOOP
    317 	}
    318 	return
    319 }
    320 
    321 // func (z *ioDecReader) readUntil(stop byte) []byte {
    322 // 	z.bufr = z.bufr[:0]
    323 // LOOP:
    324 // 	token := z.readn1()
    325 // 	z.bufr = append(z.bufr, token)
    326 // 	if token == stop {
    327 // 		return z.bufr[:len(z.bufr)-1]
    328 // 	}
    329 // 	goto LOOP
    330 // }
    331 
    332 func (z *ioDecReader) readUntil(stop byte) []byte {
    333 	z.bufr = z.bufr[:0]
    334 LOOP:
    335 	token := z.readn1()
    336 	if token == stop {
    337 		return z.bufr
    338 	}
    339 	z.bufr = append(z.bufr, token)
    340 	goto LOOP
    341 }
    342 
    343 func (z *ioDecReader) unreadn1() {
    344 	err := z.br.UnreadByte()
    345 	halt.onerror(err)
    346 	z.n--
    347 }
    348 
    349 // ------------------------------------
    350 
    351 // bytesDecReader is a decReader that reads off a byte slice with zero copying
    352 //
    353 // Note: we do not try to convert index'ing out of bounds to an io.EOF.
    354 // instead, we let it bubble up to the exported Encode/Decode method
    355 // and recover it as an io.EOF.
    356 //
    357 // Every function here MUST defensively check bounds either explicitly
    358 // or via a bounds check.
    359 //
    360 // see panicValToErr(...) function in helper.go.
    361 type bytesDecReader struct {
    362 	b []byte // data
    363 	c uint   // cursor
    364 }
    365 
    366 func (z *bytesDecReader) reset(in []byte) {
    367 	z.b = in[:len(in):len(in)] // reslicing must not go past capacity
    368 	z.c = 0
    369 }
    370 
    371 func (z *bytesDecReader) numread() uint {
    372 	return z.c
    373 }
    374 
    375 // Note: slicing from a non-constant start position is more expensive,
    376 // as more computation is required to decipher the pointer start position.
    377 // However, we do it only once, and it's better than reslicing both z.b and return value.
    378 
    379 func (z *bytesDecReader) readx(n uint) (bs []byte) {
    380 	// x := z.c + n
    381 	// bs = z.b[z.c:x]
    382 	// z.c = x
    383 	bs = z.b[z.c : z.c+n]
    384 	z.c += n
    385 	return
    386 }
    387 
    388 func (z *bytesDecReader) readb(bs []byte) {
    389 	copy(bs, z.readx(uint(len(bs))))
    390 }
    391 
    392 // MARKER: do not use this - as it calls into memmove (as the size of data to move is unknown)
    393 // func (z *bytesDecReader) readnn(bs []byte, n uint) {
    394 // 	x := z.c
    395 // 	copy(bs, z.b[x:x+n])
    396 // 	z.c += n
    397 // }
    398 
    399 // func (z *bytesDecReader) readn(num uint8) (bs [8]byte) {
    400 // 	x := z.c + uint(num)
    401 // 	copy(bs[:], z.b[z.c:x]) // slice z.b completely, so we get bounds error if past
    402 // 	z.c = x
    403 // 	return
    404 // }
    405 
    406 // func (z *bytesDecReader) readn1() uint8 {
    407 // 	z.c++
    408 // 	return z.b[z.c-1]
    409 // }
    410 
    411 // MARKER: readn{1,2,3,4,8} should throw an out of bounds error if past length.
    412 // MARKER: readn1: explicitly ensure bounds check is done
    413 // MARKER: readn{2,3,4,8}: ensure you slice z.b completely so we get bounds error if past end.
    414 
    415 func (z *bytesDecReader) readn1() (v uint8) {
    416 	v = z.b[z.c]
    417 	z.c++
    418 	return
    419 }
    420 
    421 func (z *bytesDecReader) readn2() (bs [2]byte) {
    422 	// copy(bs[:], z.b[z.c:z.c+2])
    423 	// bs[1] = z.b[z.c+1]
    424 	// bs[0] = z.b[z.c]
    425 	bs = okBytes2(z.b[z.c : z.c+2])
    426 	z.c += 2
    427 	return
    428 }
    429 
    430 func (z *bytesDecReader) readn3() (bs [3]byte) {
    431 	// copy(bs[1:], z.b[z.c:z.c+3])
    432 	bs = okBytes3(z.b[z.c : z.c+3])
    433 	z.c += 3
    434 	return
    435 }
    436 
    437 func (z *bytesDecReader) readn4() (bs [4]byte) {
    438 	// copy(bs[:], z.b[z.c:z.c+4])
    439 	bs = okBytes4(z.b[z.c : z.c+4])
    440 	z.c += 4
    441 	return
    442 }
    443 
    444 func (z *bytesDecReader) readn8() (bs [8]byte) {
    445 	// copy(bs[:], z.b[z.c:z.c+8])
    446 	bs = okBytes8(z.b[z.c : z.c+8])
    447 	z.c += 8
    448 	return
    449 }
    450 
    451 func (z *bytesDecReader) jsonReadNum() []byte {
    452 	z.c-- // unread
    453 	i := z.c
    454 LOOP:
    455 	// gracefully handle end of slice, as end of stream is meaningful here
    456 	if i < uint(len(z.b)) && isNumberChar(z.b[i]) {
    457 		i++
    458 		goto LOOP
    459 	}
    460 	z.c, i = i, z.c
    461 	// MARKER: 20230103: byteSliceOf here prevents inlining of jsonReadNum
    462 	// return byteSliceOf(z.b, i, z.c)
    463 	return z.b[i:z.c]
    464 }
    465 
    466 func (z *bytesDecReader) jsonReadAsisChars() []byte {
    467 	i := z.c
    468 LOOP:
    469 	token := z.b[i]
    470 	i++
    471 	if token == '"' || token == '\\' {
    472 		z.c, i = i, z.c
    473 		return byteSliceOf(z.b, i, z.c)
    474 		// return z.b[i:z.c]
    475 	}
    476 	goto LOOP
    477 }
    478 
    479 func (z *bytesDecReader) skipWhitespace() (token byte) {
    480 	i := z.c
    481 LOOP:
    482 	token = z.b[i]
    483 	if isWhitespaceChar(token) {
    484 		i++
    485 		goto LOOP
    486 	}
    487 	z.c = i + 1
    488 	return
    489 }
    490 
    491 func (z *bytesDecReader) readUntil(stop byte) (out []byte) {
    492 	i := z.c
    493 LOOP:
    494 	if z.b[i] == stop {
    495 		out = byteSliceOf(z.b, z.c, i)
    496 		// out = z.b[z.c:i]
    497 		z.c = i + 1
    498 		return
    499 	}
    500 	i++
    501 	goto LOOP
    502 }
    503 
    504 // --------------
    505 
    506 type decRd struct {
    507 	rb bytesDecReader
    508 	ri *ioDecReader
    509 
    510 	decReader
    511 
    512 	bytes bool // is bytes reader
    513 
    514 	// MARKER: these fields below should belong directly in Encoder.
    515 	// we pack them here for space efficiency and cache-line optimization.
    516 
    517 	mtr bool // is maptype a known type?
    518 	str bool // is slicetype a known type?
    519 
    520 	be   bool // is binary encoding
    521 	js   bool // is json handle
    522 	jsms bool // is json handle, and MapKeyAsString
    523 	cbor bool // is cbor handle
    524 
    525 	cbreak bool // is a check breaker
    526 
    527 }
    528 
    529 // From out benchmarking, we see the following impact performance:
    530 //
    531 // - functions that are too big to inline
    532 // - interface calls (as no inlining can occur)
    533 //
    534 // decRd is designed to embed a decReader, and then re-implement some of the decReader
    535 // methods using a conditional branch.
    536 //
    537 // We only override the ones where the bytes version is inlined AND the wrapper method
    538 // (containing the bytes version alongside a conditional branch) is also inlined.
    539 //
    540 // We use ./run.sh -z to check.
    541 //
    542 // Right now, only numread and "carefully crafted" readn1 can be inlined.
    543 
    544 func (z *decRd) numread() uint {
    545 	if z.bytes {
    546 		return z.rb.numread()
    547 	}
    548 	return z.ri.numread()
    549 }
    550 
    551 func (z *decRd) readn1() (v uint8) {
    552 	if z.bytes {
    553 		// return z.rb.readn1()
    554 		// MARKER: calling z.rb.readn1() prevents decRd.readn1 from being inlined.
    555 		// copy code, to manually inline and explicitly return here.
    556 		// Keep in sync with bytesDecReader.readn1
    557 		v = z.rb.b[z.rb.c]
    558 		z.rb.c++
    559 		return
    560 	}
    561 	return z.ri.readn1()
    562 }
    563 
    564 // func (z *decRd) readn4() [4]byte {
    565 // 	if z.bytes {
    566 // 		return z.rb.readn4()
    567 // 	}
    568 // 	return z.ri.readn4()
    569 // }
    570 
    571 // func (z *decRd) readn3() [3]byte {
    572 // 	if z.bytes {
    573 // 		return z.rb.readn3()
    574 // 	}
    575 // 	return z.ri.readn3()
    576 // }
    577 
    578 // func (z *decRd) skipWhitespace() byte {
    579 // 	if z.bytes {
    580 // 		return z.rb.skipWhitespace()
    581 // 	}
    582 // 	return z.ri.skipWhitespace()
    583 // }
    584 
    585 type devNullReader struct{}
    586 
    587 func (devNullReader) Read(p []byte) (int, error) { return 0, io.EOF }
    588 func (devNullReader) Close() error               { return nil }
    589 
    590 func readFull(r io.Reader, bs []byte) (n uint, err error) {
    591 	var nn int
    592 	for n < uint(len(bs)) && err == nil {
    593 		nn, err = r.Read(bs[n:])
    594 		if nn > 0 {
    595 			if err == io.EOF {
    596 				// leave EOF for next time
    597 				err = nil
    598 			}
    599 			n += uint(nn)
    600 		}
    601 	}
    602 	// do not do this below - it serves no purpose
    603 	// if n != len(bs) && err == io.EOF { err = io.ErrUnexpectedEOF }
    604 	return
    605 }
    606 
    607 var _ decReader = (*decRd)(nil)