binc.go (32313B)
1 // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved. 2 // Use of this source code is governed by a MIT license found in the LICENSE file. 3 4 package codec 5 6 import ( 7 "math" 8 "reflect" 9 "time" 10 "unicode/utf8" 11 ) 12 13 // Symbol management: 14 // - symbols are stored in a symbol map during encoding and decoding. 15 // - the symbols persist until the (En|De)coder ResetXXX method is called. 16 17 const bincDoPrune = true 18 19 // vd as low 4 bits (there are 16 slots) 20 const ( 21 bincVdSpecial byte = iota 22 bincVdPosInt 23 bincVdNegInt 24 bincVdFloat 25 26 bincVdString 27 bincVdByteArray 28 bincVdArray 29 bincVdMap 30 31 bincVdTimestamp 32 bincVdSmallInt 33 _ // bincVdUnicodeOther 34 bincVdSymbol 35 36 _ // bincVdDecimal 37 _ // open slot 38 _ // open slot 39 bincVdCustomExt = 0x0f 40 ) 41 42 const ( 43 bincSpNil byte = iota 44 bincSpFalse 45 bincSpTrue 46 bincSpNan 47 bincSpPosInf 48 bincSpNegInf 49 bincSpZeroFloat 50 bincSpZero 51 bincSpNegOne 52 ) 53 54 const ( 55 _ byte = iota // bincFlBin16 56 bincFlBin32 57 _ // bincFlBin32e 58 bincFlBin64 59 _ // bincFlBin64e 60 // others not currently supported 61 ) 62 63 const bincBdNil = 0 // bincVdSpecial<<4 | bincSpNil // staticcheck barfs on this (SA4016) 64 65 var ( 66 bincdescSpecialVsNames = map[byte]string{ 67 bincSpNil: "nil", 68 bincSpFalse: "false", 69 bincSpTrue: "true", 70 bincSpNan: "float", 71 bincSpPosInf: "float", 72 bincSpNegInf: "float", 73 bincSpZeroFloat: "float", 74 bincSpZero: "uint", 75 bincSpNegOne: "int", 76 } 77 bincdescVdNames = map[byte]string{ 78 bincVdSpecial: "special", 79 bincVdSmallInt: "uint", 80 bincVdPosInt: "uint", 81 bincVdFloat: "float", 82 bincVdSymbol: "string", 83 bincVdString: "string", 84 bincVdByteArray: "bytes", 85 bincVdTimestamp: "time", 86 bincVdCustomExt: "ext", 87 bincVdArray: "array", 88 bincVdMap: "map", 89 } 90 ) 91 92 func bincdescbd(bd byte) (s string) { 93 return bincdesc(bd>>4, bd&0x0f) 94 } 95 96 func bincdesc(vd, vs byte) (s string) { 97 if vd == bincVdSpecial { 98 s = bincdescSpecialVsNames[vs] 99 } else { 100 s = bincdescVdNames[vd] 101 } 102 if s == "" { 103 s = "unknown" 104 } 105 return 106 } 107 108 type bincEncState struct { 109 m map[string]uint16 // symbols 110 } 111 112 func (e bincEncState) captureState() interface{} { return e.m } 113 func (e *bincEncState) resetState() { e.m = nil } 114 func (e *bincEncState) reset() { e.resetState() } 115 func (e *bincEncState) restoreState(v interface{}) { e.m = v.(map[string]uint16) } 116 117 type bincEncDriver struct { 118 noBuiltInTypes 119 encDriverNoopContainerWriter 120 h *BincHandle 121 bincEncState 122 123 e Encoder 124 } 125 126 func (e *bincEncDriver) encoder() *Encoder { 127 return &e.e 128 } 129 130 func (e *bincEncDriver) EncodeNil() { 131 e.e.encWr.writen1(bincBdNil) 132 } 133 134 func (e *bincEncDriver) EncodeTime(t time.Time) { 135 if t.IsZero() { 136 e.EncodeNil() 137 } else { 138 bs := bincEncodeTime(t) 139 e.e.encWr.writen1(bincVdTimestamp<<4 | uint8(len(bs))) 140 e.e.encWr.writeb(bs) 141 } 142 } 143 144 func (e *bincEncDriver) EncodeBool(b bool) { 145 if b { 146 e.e.encWr.writen1(bincVdSpecial<<4 | bincSpTrue) 147 } else { 148 e.e.encWr.writen1(bincVdSpecial<<4 | bincSpFalse) 149 } 150 } 151 152 func (e *bincEncDriver) encSpFloat(f float64) (done bool) { 153 if f == 0 { 154 e.e.encWr.writen1(bincVdSpecial<<4 | bincSpZeroFloat) 155 } else if math.IsNaN(float64(f)) { 156 e.e.encWr.writen1(bincVdSpecial<<4 | bincSpNan) 157 } else if math.IsInf(float64(f), +1) { 158 e.e.encWr.writen1(bincVdSpecial<<4 | bincSpPosInf) 159 } else if math.IsInf(float64(f), -1) { 160 e.e.encWr.writen1(bincVdSpecial<<4 | bincSpNegInf) 161 } else { 162 return 163 } 164 return true 165 } 166 167 func (e *bincEncDriver) EncodeFloat32(f float32) { 168 if !e.encSpFloat(float64(f)) { 169 e.e.encWr.writen1(bincVdFloat<<4 | bincFlBin32) 170 bigen.writeUint32(e.e.w(), math.Float32bits(f)) 171 } 172 } 173 174 func (e *bincEncDriver) EncodeFloat64(f float64) { 175 if e.encSpFloat(f) { 176 return 177 } 178 b := bigen.PutUint64(math.Float64bits(f)) 179 if bincDoPrune { 180 i := 7 181 for ; i >= 0 && (b[i] == 0); i-- { 182 } 183 i++ 184 if i <= 6 { 185 e.e.encWr.writen1(bincVdFloat<<4 | 0x8 | bincFlBin64) 186 e.e.encWr.writen1(byte(i)) 187 e.e.encWr.writeb(b[:i]) 188 return 189 } 190 } 191 e.e.encWr.writen1(bincVdFloat<<4 | bincFlBin64) 192 e.e.encWr.writen8(b) 193 } 194 195 func (e *bincEncDriver) encIntegerPrune32(bd byte, pos bool, v uint64) { 196 b := bigen.PutUint32(uint32(v)) 197 if bincDoPrune { 198 i := byte(pruneSignExt(b[:], pos)) 199 e.e.encWr.writen1(bd | 3 - i) 200 e.e.encWr.writeb(b[i:]) 201 } else { 202 e.e.encWr.writen1(bd | 3) 203 e.e.encWr.writen4(b) 204 } 205 } 206 207 func (e *bincEncDriver) encIntegerPrune64(bd byte, pos bool, v uint64) { 208 b := bigen.PutUint64(v) 209 if bincDoPrune { 210 i := byte(pruneSignExt(b[:], pos)) 211 e.e.encWr.writen1(bd | 7 - i) 212 e.e.encWr.writeb(b[i:]) 213 } else { 214 e.e.encWr.writen1(bd | 7) 215 e.e.encWr.writen8(b) 216 } 217 } 218 219 func (e *bincEncDriver) EncodeInt(v int64) { 220 if v >= 0 { 221 e.encUint(bincVdPosInt<<4, true, uint64(v)) 222 } else if v == -1 { 223 e.e.encWr.writen1(bincVdSpecial<<4 | bincSpNegOne) 224 } else { 225 e.encUint(bincVdNegInt<<4, false, uint64(-v)) 226 } 227 } 228 229 func (e *bincEncDriver) EncodeUint(v uint64) { 230 e.encUint(bincVdPosInt<<4, true, v) 231 } 232 233 func (e *bincEncDriver) encUint(bd byte, pos bool, v uint64) { 234 if v == 0 { 235 e.e.encWr.writen1(bincVdSpecial<<4 | bincSpZero) 236 } else if pos && v >= 1 && v <= 16 { 237 e.e.encWr.writen1(bincVdSmallInt<<4 | byte(v-1)) 238 } else if v <= math.MaxUint8 { 239 e.e.encWr.writen2(bd|0x0, byte(v)) 240 } else if v <= math.MaxUint16 { 241 e.e.encWr.writen1(bd | 0x01) 242 bigen.writeUint16(e.e.w(), uint16(v)) 243 } else if v <= math.MaxUint32 { 244 e.encIntegerPrune32(bd, pos, v) 245 } else { 246 e.encIntegerPrune64(bd, pos, v) 247 } 248 } 249 250 func (e *bincEncDriver) EncodeExt(v interface{}, basetype reflect.Type, xtag uint64, ext Ext) { 251 var bs0, bs []byte 252 if ext == SelfExt { 253 bs0 = e.e.blist.get(1024) 254 bs = bs0 255 e.e.sideEncode(v, basetype, &bs) 256 } else { 257 bs = ext.WriteExt(v) 258 } 259 if bs == nil { 260 e.EncodeNil() 261 goto END 262 } 263 e.encodeExtPreamble(uint8(xtag), len(bs)) 264 e.e.encWr.writeb(bs) 265 END: 266 if ext == SelfExt { 267 e.e.blist.put(bs) 268 if !byteSliceSameData(bs0, bs) { 269 e.e.blist.put(bs0) 270 } 271 } 272 } 273 274 func (e *bincEncDriver) EncodeRawExt(re *RawExt) { 275 e.encodeExtPreamble(uint8(re.Tag), len(re.Data)) 276 e.e.encWr.writeb(re.Data) 277 } 278 279 func (e *bincEncDriver) encodeExtPreamble(xtag byte, length int) { 280 e.encLen(bincVdCustomExt<<4, uint64(length)) 281 e.e.encWr.writen1(xtag) 282 } 283 284 func (e *bincEncDriver) WriteArrayStart(length int) { 285 e.encLen(bincVdArray<<4, uint64(length)) 286 } 287 288 func (e *bincEncDriver) WriteMapStart(length int) { 289 e.encLen(bincVdMap<<4, uint64(length)) 290 } 291 292 func (e *bincEncDriver) EncodeSymbol(v string) { 293 //symbols only offer benefit when string length > 1. 294 //This is because strings with length 1 take only 2 bytes to store 295 //(bd with embedded length, and single byte for string val). 296 297 l := len(v) 298 if l == 0 { 299 e.encBytesLen(cUTF8, 0) 300 return 301 } else if l == 1 { 302 e.encBytesLen(cUTF8, 1) 303 e.e.encWr.writen1(v[0]) 304 return 305 } 306 if e.m == nil { 307 e.m = make(map[string]uint16, 16) 308 } 309 ui, ok := e.m[v] 310 if ok { 311 if ui <= math.MaxUint8 { 312 e.e.encWr.writen2(bincVdSymbol<<4, byte(ui)) 313 } else { 314 e.e.encWr.writen1(bincVdSymbol<<4 | 0x8) 315 bigen.writeUint16(e.e.w(), ui) 316 } 317 } else { 318 e.e.seq++ 319 ui = e.e.seq 320 e.m[v] = ui 321 var lenprec uint8 322 if l <= math.MaxUint8 { 323 // lenprec = 0 324 } else if l <= math.MaxUint16 { 325 lenprec = 1 326 } else if int64(l) <= math.MaxUint32 { 327 lenprec = 2 328 } else { 329 lenprec = 3 330 } 331 if ui <= math.MaxUint8 { 332 e.e.encWr.writen2(bincVdSymbol<<4|0x0|0x4|lenprec, byte(ui)) 333 } else { 334 e.e.encWr.writen1(bincVdSymbol<<4 | 0x8 | 0x4 | lenprec) 335 bigen.writeUint16(e.e.w(), ui) 336 } 337 if lenprec == 0 { 338 e.e.encWr.writen1(byte(l)) 339 } else if lenprec == 1 { 340 bigen.writeUint16(e.e.w(), uint16(l)) 341 } else if lenprec == 2 { 342 bigen.writeUint32(e.e.w(), uint32(l)) 343 } else { 344 bigen.writeUint64(e.e.w(), uint64(l)) 345 } 346 e.e.encWr.writestr(v) 347 } 348 } 349 350 func (e *bincEncDriver) EncodeString(v string) { 351 if e.h.StringToRaw { 352 e.encLen(bincVdByteArray<<4, uint64(len(v))) 353 if len(v) > 0 { 354 e.e.encWr.writestr(v) 355 } 356 return 357 } 358 e.EncodeStringEnc(cUTF8, v) 359 } 360 361 func (e *bincEncDriver) EncodeStringEnc(c charEncoding, v string) { 362 if e.e.c == containerMapKey && c == cUTF8 && (e.h.AsSymbols == 1) { 363 e.EncodeSymbol(v) 364 return 365 } 366 e.encLen(bincVdString<<4, uint64(len(v))) 367 if len(v) > 0 { 368 e.e.encWr.writestr(v) 369 } 370 } 371 372 func (e *bincEncDriver) EncodeStringBytesRaw(v []byte) { 373 if v == nil { 374 e.EncodeNil() 375 return 376 } 377 e.encLen(bincVdByteArray<<4, uint64(len(v))) 378 if len(v) > 0 { 379 e.e.encWr.writeb(v) 380 } 381 } 382 383 func (e *bincEncDriver) encBytesLen(c charEncoding, length uint64) { 384 // MARKER: we currently only support UTF-8 (string) and RAW (bytearray). 385 // We should consider supporting bincUnicodeOther. 386 387 if c == cRAW { 388 e.encLen(bincVdByteArray<<4, length) 389 } else { 390 e.encLen(bincVdString<<4, length) 391 } 392 } 393 394 func (e *bincEncDriver) encLen(bd byte, l uint64) { 395 if l < 12 { 396 e.e.encWr.writen1(bd | uint8(l+4)) 397 } else { 398 e.encLenNumber(bd, l) 399 } 400 } 401 402 func (e *bincEncDriver) encLenNumber(bd byte, v uint64) { 403 if v <= math.MaxUint8 { 404 e.e.encWr.writen2(bd, byte(v)) 405 } else if v <= math.MaxUint16 { 406 e.e.encWr.writen1(bd | 0x01) 407 bigen.writeUint16(e.e.w(), uint16(v)) 408 } else if v <= math.MaxUint32 { 409 e.e.encWr.writen1(bd | 0x02) 410 bigen.writeUint32(e.e.w(), uint32(v)) 411 } else { 412 e.e.encWr.writen1(bd | 0x03) 413 bigen.writeUint64(e.e.w(), uint64(v)) 414 } 415 } 416 417 //------------------------------------ 418 419 type bincDecState struct { 420 bdRead bool 421 bd byte 422 vd byte 423 vs byte 424 425 _ bool 426 // MARKER: consider using binary search here instead of a map (ie bincDecSymbol) 427 s map[uint16][]byte 428 } 429 430 func (x bincDecState) captureState() interface{} { return x } 431 func (x *bincDecState) resetState() { *x = bincDecState{} } 432 func (x *bincDecState) reset() { x.resetState() } 433 func (x *bincDecState) restoreState(v interface{}) { *x = v.(bincDecState) } 434 435 type bincDecDriver struct { 436 decDriverNoopContainerReader 437 decDriverNoopNumberHelper 438 noBuiltInTypes 439 440 h *BincHandle 441 442 bincDecState 443 d Decoder 444 } 445 446 func (d *bincDecDriver) decoder() *Decoder { 447 return &d.d 448 } 449 450 func (d *bincDecDriver) descBd() string { 451 return sprintf("%v (%s)", d.bd, bincdescbd(d.bd)) 452 } 453 454 func (d *bincDecDriver) readNextBd() { 455 d.bd = d.d.decRd.readn1() 456 d.vd = d.bd >> 4 457 d.vs = d.bd & 0x0f 458 d.bdRead = true 459 } 460 461 func (d *bincDecDriver) advanceNil() (null bool) { 462 if !d.bdRead { 463 d.readNextBd() 464 } 465 if d.bd == bincBdNil { 466 d.bdRead = false 467 return true // null = true 468 } 469 return 470 } 471 472 func (d *bincDecDriver) TryNil() bool { 473 return d.advanceNil() 474 } 475 476 func (d *bincDecDriver) ContainerType() (vt valueType) { 477 if !d.bdRead { 478 d.readNextBd() 479 } 480 if d.bd == bincBdNil { 481 d.bdRead = false 482 return valueTypeNil 483 } else if d.vd == bincVdByteArray { 484 return valueTypeBytes 485 } else if d.vd == bincVdString { 486 return valueTypeString 487 } else if d.vd == bincVdArray { 488 return valueTypeArray 489 } else if d.vd == bincVdMap { 490 return valueTypeMap 491 } 492 return valueTypeUnset 493 } 494 495 func (d *bincDecDriver) DecodeTime() (t time.Time) { 496 if d.advanceNil() { 497 return 498 } 499 if d.vd != bincVdTimestamp { 500 d.d.errorf("cannot decode time - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 501 } 502 t, err := bincDecodeTime(d.d.decRd.readx(uint(d.vs))) 503 halt.onerror(err) 504 d.bdRead = false 505 return 506 } 507 508 func (d *bincDecDriver) decFloatPruned(maxlen uint8) { 509 l := d.d.decRd.readn1() 510 if l > maxlen { 511 d.d.errorf("cannot read float - at most %v bytes used to represent float - received %v bytes", maxlen, l) 512 } 513 for i := l; i < maxlen; i++ { 514 d.d.b[i] = 0 515 } 516 d.d.decRd.readb(d.d.b[0:l]) 517 } 518 519 func (d *bincDecDriver) decFloatPre32() (b [4]byte) { 520 if d.vs&0x8 == 0 { 521 b = d.d.decRd.readn4() 522 } else { 523 d.decFloatPruned(4) 524 copy(b[:], d.d.b[:]) 525 } 526 return 527 } 528 529 func (d *bincDecDriver) decFloatPre64() (b [8]byte) { 530 if d.vs&0x8 == 0 { 531 b = d.d.decRd.readn8() 532 } else { 533 d.decFloatPruned(8) 534 copy(b[:], d.d.b[:]) 535 } 536 return 537 } 538 539 func (d *bincDecDriver) decFloatVal() (f float64) { 540 switch d.vs & 0x7 { 541 case bincFlBin32: 542 f = float64(math.Float32frombits(bigen.Uint32(d.decFloatPre32()))) 543 case bincFlBin64: 544 f = math.Float64frombits(bigen.Uint64(d.decFloatPre64())) 545 default: 546 // ok = false 547 d.d.errorf("read float supports only float32/64 - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 548 } 549 return 550 } 551 552 func (d *bincDecDriver) decUint() (v uint64) { 553 switch d.vs { 554 case 0: 555 v = uint64(d.d.decRd.readn1()) 556 case 1: 557 v = uint64(bigen.Uint16(d.d.decRd.readn2())) 558 case 2: 559 b3 := d.d.decRd.readn3() 560 var b [4]byte 561 copy(b[1:], b3[:]) 562 v = uint64(bigen.Uint32(b)) 563 case 3: 564 v = uint64(bigen.Uint32(d.d.decRd.readn4())) 565 case 4, 5, 6: 566 var b [8]byte 567 lim := 7 - d.vs 568 bs := d.d.b[lim:8] 569 d.d.decRd.readb(bs) 570 copy(b[lim:], bs) 571 v = bigen.Uint64(b) 572 case 7: 573 v = bigen.Uint64(d.d.decRd.readn8()) 574 default: 575 d.d.errorf("unsigned integers with greater than 64 bits of precision not supported: d.vs: %v %x", d.vs, d.vs) 576 } 577 return 578 } 579 580 func (d *bincDecDriver) uintBytes() (bs []byte) { 581 switch d.vs { 582 case 0: 583 bs = d.d.b[:1] 584 bs[0] = d.d.decRd.readn1() 585 case 1: 586 bs = d.d.b[:2] 587 d.d.decRd.readb(bs) 588 case 2: 589 bs = d.d.b[:3] 590 d.d.decRd.readb(bs) 591 case 3: 592 bs = d.d.b[:4] 593 d.d.decRd.readb(bs) 594 case 4, 5, 6: 595 lim := 7 - d.vs 596 bs = d.d.b[lim:8] 597 d.d.decRd.readb(bs) 598 case 7: 599 bs = d.d.b[:8] 600 d.d.decRd.readb(bs) 601 default: 602 d.d.errorf("unsigned integers with greater than 64 bits of precision not supported: d.vs: %v %x", d.vs, d.vs) 603 } 604 return 605 } 606 607 func (d *bincDecDriver) decInteger() (ui uint64, neg, ok bool) { 608 ok = true 609 vd, vs := d.vd, d.vs 610 if vd == bincVdPosInt { 611 ui = d.decUint() 612 } else if vd == bincVdNegInt { 613 ui = d.decUint() 614 neg = true 615 } else if vd == bincVdSmallInt { 616 ui = uint64(d.vs) + 1 617 } else if vd == bincVdSpecial { 618 if vs == bincSpZero { 619 // i = 0 620 } else if vs == bincSpNegOne { 621 neg = true 622 ui = 1 623 } else { 624 ok = false 625 // d.d.errorf("integer decode has invalid special value %x-%x/%s", d.vd, d.vs, bincdesc(d.vd, d.vs)) 626 } 627 } else { 628 ok = false 629 // d.d.errorf("integer can only be decoded from int/uint. d.bd: 0x%x, d.vd: 0x%x", d.bd, d.vd) 630 } 631 return 632 } 633 634 func (d *bincDecDriver) decFloat() (f float64, ok bool) { 635 ok = true 636 vd, vs := d.vd, d.vs 637 if vd == bincVdSpecial { 638 if vs == bincSpNan { 639 f = math.NaN() 640 } else if vs == bincSpPosInf { 641 f = math.Inf(1) 642 } else if vs == bincSpZeroFloat || vs == bincSpZero { 643 644 } else if vs == bincSpNegInf { 645 f = math.Inf(-1) 646 } else { 647 ok = false 648 // d.d.errorf("float - invalid special value %x-%x/%s", d.vd, d.vs, bincdesc(d.vd, d.vs)) 649 } 650 } else if vd == bincVdFloat { 651 f = d.decFloatVal() 652 } else { 653 ok = false 654 } 655 return 656 } 657 658 func (d *bincDecDriver) DecodeInt64() (i int64) { 659 if d.advanceNil() { 660 return 661 } 662 i = decNegintPosintFloatNumberHelper{&d.d}.int64(d.decInteger()) 663 d.bdRead = false 664 return 665 } 666 667 func (d *bincDecDriver) DecodeUint64() (ui uint64) { 668 if d.advanceNil() { 669 return 670 } 671 ui = decNegintPosintFloatNumberHelper{&d.d}.uint64(d.decInteger()) 672 d.bdRead = false 673 return 674 } 675 676 func (d *bincDecDriver) DecodeFloat64() (f float64) { 677 if d.advanceNil() { 678 return 679 } 680 f = decNegintPosintFloatNumberHelper{&d.d}.float64(d.decFloat()) 681 d.bdRead = false 682 return 683 } 684 685 func (d *bincDecDriver) DecodeBool() (b bool) { 686 if d.advanceNil() { 687 return 688 } 689 if d.bd == (bincVdSpecial | bincSpFalse) { 690 // b = false 691 } else if d.bd == (bincVdSpecial | bincSpTrue) { 692 b = true 693 } else { 694 d.d.errorf("bool - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 695 } 696 d.bdRead = false 697 return 698 } 699 700 func (d *bincDecDriver) ReadMapStart() (length int) { 701 if d.advanceNil() { 702 return containerLenNil 703 } 704 if d.vd != bincVdMap { 705 d.d.errorf("map - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 706 } 707 length = d.decLen() 708 d.bdRead = false 709 return 710 } 711 712 func (d *bincDecDriver) ReadArrayStart() (length int) { 713 if d.advanceNil() { 714 return containerLenNil 715 } 716 if d.vd != bincVdArray { 717 d.d.errorf("array - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 718 } 719 length = d.decLen() 720 d.bdRead = false 721 return 722 } 723 724 func (d *bincDecDriver) decLen() int { 725 if d.vs > 3 { 726 return int(d.vs - 4) 727 } 728 return int(d.decLenNumber()) 729 } 730 731 func (d *bincDecDriver) decLenNumber() (v uint64) { 732 if x := d.vs; x == 0 { 733 v = uint64(d.d.decRd.readn1()) 734 } else if x == 1 { 735 v = uint64(bigen.Uint16(d.d.decRd.readn2())) 736 } else if x == 2 { 737 v = uint64(bigen.Uint32(d.d.decRd.readn4())) 738 } else { 739 v = bigen.Uint64(d.d.decRd.readn8()) 740 } 741 return 742 } 743 744 // func (d *bincDecDriver) decStringBytes(bs []byte, zerocopy bool) (bs2 []byte) { 745 func (d *bincDecDriver) DecodeStringAsBytes() (bs2 []byte) { 746 d.d.decByteState = decByteStateNone 747 if d.advanceNil() { 748 return 749 } 750 var slen = -1 751 switch d.vd { 752 case bincVdString, bincVdByteArray: 753 slen = d.decLen() 754 if d.d.bytes { 755 d.d.decByteState = decByteStateZerocopy 756 bs2 = d.d.decRd.rb.readx(uint(slen)) 757 } else { 758 d.d.decByteState = decByteStateReuseBuf 759 bs2 = decByteSlice(d.d.r(), slen, d.d.h.MaxInitLen, d.d.b[:]) 760 } 761 case bincVdSymbol: 762 // zerocopy doesn't apply for symbols, 763 // as the values must be stored in a table for later use. 764 var symbol uint16 765 vs := d.vs 766 if vs&0x8 == 0 { 767 symbol = uint16(d.d.decRd.readn1()) 768 } else { 769 symbol = uint16(bigen.Uint16(d.d.decRd.readn2())) 770 } 771 if d.s == nil { 772 d.s = make(map[uint16][]byte, 16) 773 } 774 775 if vs&0x4 == 0 { 776 bs2 = d.s[symbol] 777 } else { 778 switch vs & 0x3 { 779 case 0: 780 slen = int(d.d.decRd.readn1()) 781 case 1: 782 slen = int(bigen.Uint16(d.d.decRd.readn2())) 783 case 2: 784 slen = int(bigen.Uint32(d.d.decRd.readn4())) 785 case 3: 786 slen = int(bigen.Uint64(d.d.decRd.readn8())) 787 } 788 // As we are using symbols, do not store any part of 789 // the parameter bs in the map, as it might be a shared buffer. 790 bs2 = decByteSlice(d.d.r(), slen, d.d.h.MaxInitLen, nil) 791 d.s[symbol] = bs2 792 } 793 default: 794 d.d.errorf("string/bytes - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 795 } 796 797 if d.h.ValidateUnicode && !utf8.Valid(bs2) { 798 d.d.errorf("DecodeStringAsBytes: invalid UTF-8: %s", bs2) 799 } 800 801 d.bdRead = false 802 return 803 } 804 805 func (d *bincDecDriver) DecodeBytes(bs []byte) (bsOut []byte) { 806 d.d.decByteState = decByteStateNone 807 if d.advanceNil() { 808 return 809 } 810 if d.vd == bincVdArray { 811 if bs == nil { 812 bs = d.d.b[:] 813 d.d.decByteState = decByteStateReuseBuf 814 } 815 slen := d.ReadArrayStart() 816 var changed bool 817 if bs, changed = usableByteSlice(bs, slen); changed { 818 d.d.decByteState = decByteStateNone 819 } 820 for i := 0; i < slen; i++ { 821 bs[i] = uint8(chkOvf.UintV(d.DecodeUint64(), 8)) 822 } 823 for i := len(bs); i < slen; i++ { 824 bs = append(bs, uint8(chkOvf.UintV(d.DecodeUint64(), 8))) 825 } 826 return bs 827 } 828 var clen int 829 if d.vd == bincVdString || d.vd == bincVdByteArray { 830 clen = d.decLen() 831 } else { 832 d.d.errorf("bytes - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 833 } 834 d.bdRead = false 835 if d.d.zerocopy() { 836 d.d.decByteState = decByteStateZerocopy 837 return d.d.decRd.rb.readx(uint(clen)) 838 } 839 if bs == nil { 840 bs = d.d.b[:] 841 d.d.decByteState = decByteStateReuseBuf 842 } 843 return decByteSlice(d.d.r(), clen, d.d.h.MaxInitLen, bs) 844 } 845 846 func (d *bincDecDriver) DecodeExt(rv interface{}, basetype reflect.Type, xtag uint64, ext Ext) { 847 if xtag > 0xff { 848 d.d.errorf("ext: tag must be <= 0xff; got: %v", xtag) 849 } 850 if d.advanceNil() { 851 return 852 } 853 xbs, realxtag1, zerocopy := d.decodeExtV(ext != nil, uint8(xtag)) 854 realxtag := uint64(realxtag1) 855 if ext == nil { 856 re := rv.(*RawExt) 857 re.Tag = realxtag 858 re.setData(xbs, zerocopy) 859 } else if ext == SelfExt { 860 d.d.sideDecode(rv, basetype, xbs) 861 } else { 862 ext.ReadExt(rv, xbs) 863 } 864 } 865 866 func (d *bincDecDriver) decodeExtV(verifyTag bool, tag byte) (xbs []byte, xtag byte, zerocopy bool) { 867 if d.vd == bincVdCustomExt { 868 l := d.decLen() 869 xtag = d.d.decRd.readn1() 870 if verifyTag && xtag != tag { 871 d.d.errorf("wrong extension tag - got %b, expecting: %v", xtag, tag) 872 } 873 if d.d.bytes { 874 xbs = d.d.decRd.rb.readx(uint(l)) 875 zerocopy = true 876 } else { 877 xbs = decByteSlice(d.d.r(), l, d.d.h.MaxInitLen, d.d.b[:]) 878 } 879 } else if d.vd == bincVdByteArray { 880 xbs = d.DecodeBytes(nil) 881 } else { 882 d.d.errorf("ext expects extensions or byte array - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 883 } 884 d.bdRead = false 885 return 886 } 887 888 func (d *bincDecDriver) DecodeNaked() { 889 if !d.bdRead { 890 d.readNextBd() 891 } 892 893 n := d.d.naked() 894 var decodeFurther bool 895 896 switch d.vd { 897 case bincVdSpecial: 898 switch d.vs { 899 case bincSpNil: 900 n.v = valueTypeNil 901 case bincSpFalse: 902 n.v = valueTypeBool 903 n.b = false 904 case bincSpTrue: 905 n.v = valueTypeBool 906 n.b = true 907 case bincSpNan: 908 n.v = valueTypeFloat 909 n.f = math.NaN() 910 case bincSpPosInf: 911 n.v = valueTypeFloat 912 n.f = math.Inf(1) 913 case bincSpNegInf: 914 n.v = valueTypeFloat 915 n.f = math.Inf(-1) 916 case bincSpZeroFloat: 917 n.v = valueTypeFloat 918 n.f = float64(0) 919 case bincSpZero: 920 n.v = valueTypeUint 921 n.u = uint64(0) // int8(0) 922 case bincSpNegOne: 923 n.v = valueTypeInt 924 n.i = int64(-1) // int8(-1) 925 default: 926 d.d.errorf("cannot infer value - unrecognized special value %x-%x/%s", d.vd, d.vs, bincdesc(d.vd, d.vs)) 927 } 928 case bincVdSmallInt: 929 n.v = valueTypeUint 930 n.u = uint64(int8(d.vs)) + 1 // int8(d.vs) + 1 931 case bincVdPosInt: 932 n.v = valueTypeUint 933 n.u = d.decUint() 934 case bincVdNegInt: 935 n.v = valueTypeInt 936 n.i = -(int64(d.decUint())) 937 case bincVdFloat: 938 n.v = valueTypeFloat 939 n.f = d.decFloatVal() 940 case bincVdString: 941 n.v = valueTypeString 942 n.s = d.d.stringZC(d.DecodeStringAsBytes()) 943 case bincVdByteArray: 944 d.d.fauxUnionReadRawBytes(false) 945 case bincVdSymbol: 946 n.v = valueTypeSymbol 947 n.s = d.d.stringZC(d.DecodeStringAsBytes()) 948 case bincVdTimestamp: 949 n.v = valueTypeTime 950 tt, err := bincDecodeTime(d.d.decRd.readx(uint(d.vs))) 951 halt.onerror(err) 952 n.t = tt 953 case bincVdCustomExt: 954 n.v = valueTypeExt 955 l := d.decLen() 956 n.u = uint64(d.d.decRd.readn1()) 957 if d.d.bytes { 958 n.l = d.d.decRd.rb.readx(uint(l)) 959 } else { 960 n.l = decByteSlice(d.d.r(), l, d.d.h.MaxInitLen, d.d.b[:]) 961 } 962 case bincVdArray: 963 n.v = valueTypeArray 964 decodeFurther = true 965 case bincVdMap: 966 n.v = valueTypeMap 967 decodeFurther = true 968 default: 969 d.d.errorf("cannot infer value - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 970 } 971 972 if !decodeFurther { 973 d.bdRead = false 974 } 975 if n.v == valueTypeUint && d.h.SignedInteger { 976 n.v = valueTypeInt 977 n.i = int64(n.u) 978 } 979 } 980 981 func (d *bincDecDriver) nextValueBytes(v0 []byte) (v []byte) { 982 if !d.bdRead { 983 d.readNextBd() 984 } 985 v = v0 986 var h = decNextValueBytesHelper{d: &d.d} 987 var cursor = d.d.rb.c - 1 988 h.append1(&v, d.bd) 989 v = d.nextValueBytesBdReadR(v) 990 d.bdRead = false 991 h.bytesRdV(&v, cursor) 992 return 993 } 994 995 func (d *bincDecDriver) nextValueBytesR(v0 []byte) (v []byte) { 996 d.readNextBd() 997 v = v0 998 var h = decNextValueBytesHelper{d: &d.d} 999 h.append1(&v, d.bd) 1000 return d.nextValueBytesBdReadR(v) 1001 } 1002 1003 func (d *bincDecDriver) nextValueBytesBdReadR(v0 []byte) (v []byte) { 1004 v = v0 1005 var h = decNextValueBytesHelper{d: &d.d} 1006 1007 fnLen := func(vs byte) uint { 1008 switch vs { 1009 case 0: 1010 x := d.d.decRd.readn1() 1011 h.append1(&v, x) 1012 return uint(x) 1013 case 1: 1014 x := d.d.decRd.readn2() 1015 h.appendN(&v, x[:]...) 1016 return uint(bigen.Uint16(x)) 1017 case 2: 1018 x := d.d.decRd.readn4() 1019 h.appendN(&v, x[:]...) 1020 return uint(bigen.Uint32(x)) 1021 case 3: 1022 x := d.d.decRd.readn8() 1023 h.appendN(&v, x[:]...) 1024 return uint(bigen.Uint64(x)) 1025 default: 1026 return uint(vs - 4) 1027 } 1028 } 1029 1030 var clen uint 1031 1032 switch d.vd { 1033 case bincVdSpecial: 1034 switch d.vs { 1035 case bincSpNil, bincSpFalse, bincSpTrue, bincSpNan, bincSpPosInf: // pass 1036 case bincSpNegInf, bincSpZeroFloat, bincSpZero, bincSpNegOne: // pass 1037 default: 1038 d.d.errorf("cannot infer value - unrecognized special value %x-%x/%s", d.vd, d.vs, bincdesc(d.vd, d.vs)) 1039 } 1040 case bincVdSmallInt: // pass 1041 case bincVdPosInt, bincVdNegInt: 1042 bs := d.uintBytes() 1043 h.appendN(&v, bs...) 1044 case bincVdFloat: 1045 fn := func(xlen byte) { 1046 if d.vs&0x8 != 0 { 1047 xlen = d.d.decRd.readn1() 1048 h.append1(&v, xlen) 1049 if xlen > 8 { 1050 d.d.errorf("cannot read float - at most 8 bytes used to represent float - received %v bytes", xlen) 1051 } 1052 } 1053 d.d.decRd.readb(d.d.b[:xlen]) 1054 h.appendN(&v, d.d.b[:xlen]...) 1055 } 1056 switch d.vs & 0x7 { 1057 case bincFlBin32: 1058 fn(4) 1059 case bincFlBin64: 1060 fn(8) 1061 default: 1062 d.d.errorf("read float supports only float32/64 - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 1063 } 1064 case bincVdString, bincVdByteArray: 1065 clen = fnLen(d.vs) 1066 h.appendN(&v, d.d.decRd.readx(clen)...) 1067 case bincVdSymbol: 1068 if d.vs&0x8 == 0 { 1069 h.append1(&v, d.d.decRd.readn1()) 1070 } else { 1071 h.appendN(&v, d.d.decRd.rb.readx(2)...) 1072 } 1073 if d.vs&0x4 != 0 { 1074 clen = fnLen(d.vs & 0x3) 1075 h.appendN(&v, d.d.decRd.readx(clen)...) 1076 } 1077 case bincVdTimestamp: 1078 h.appendN(&v, d.d.decRd.readx(uint(d.vs))...) 1079 case bincVdCustomExt: 1080 clen = fnLen(d.vs) 1081 h.append1(&v, d.d.decRd.readn1()) // tag 1082 h.appendN(&v, d.d.decRd.readx(clen)...) 1083 case bincVdArray: 1084 clen = fnLen(d.vs) 1085 for i := uint(0); i < clen; i++ { 1086 v = d.nextValueBytesR(v) 1087 } 1088 case bincVdMap: 1089 clen = fnLen(d.vs) 1090 for i := uint(0); i < clen; i++ { 1091 v = d.nextValueBytesR(v) 1092 v = d.nextValueBytesR(v) 1093 } 1094 default: 1095 d.d.errorf("cannot infer value - %s %x-%x/%s", msgBadDesc, d.vd, d.vs, bincdesc(d.vd, d.vs)) 1096 } 1097 return 1098 } 1099 1100 //------------------------------------ 1101 1102 // BincHandle is a Handle for the Binc Schema-Free Encoding Format 1103 // defined at https://github.com/ugorji/binc . 1104 // 1105 // BincHandle currently supports all Binc features with the following EXCEPTIONS: 1106 // - only integers up to 64 bits of precision are supported. 1107 // big integers are unsupported. 1108 // - Only IEEE 754 binary32 and binary64 floats are supported (ie Go float32 and float64 types). 1109 // extended precision and decimal IEEE 754 floats are unsupported. 1110 // - Only UTF-8 strings supported. 1111 // Unicode_Other Binc types (UTF16, UTF32) are currently unsupported. 1112 // 1113 // Note that these EXCEPTIONS are temporary and full support is possible and may happen soon. 1114 type BincHandle struct { 1115 BasicHandle 1116 binaryEncodingType 1117 // noElemSeparators 1118 1119 // AsSymbols defines what should be encoded as symbols. 1120 // 1121 // Encoding as symbols can reduce the encoded size significantly. 1122 // 1123 // However, during decoding, each string to be encoded as a symbol must 1124 // be checked to see if it has been seen before. Consequently, encoding time 1125 // will increase if using symbols, because string comparisons has a clear cost. 1126 // 1127 // Values: 1128 // - 0: default: library uses best judgement 1129 // - 1: use symbols 1130 // - 2: do not use symbols 1131 AsSymbols uint8 1132 1133 // AsSymbols: may later on introduce more options ... 1134 // - m: map keys 1135 // - s: struct fields 1136 // - n: none 1137 // - a: all: same as m, s, ... 1138 1139 // _ [7]uint64 // padding (cache-aligned) 1140 } 1141 1142 // Name returns the name of the handle: binc 1143 func (h *BincHandle) Name() string { return "binc" } 1144 1145 func (h *BincHandle) desc(bd byte) string { return bincdesc(bd>>4, bd&0x0f) } 1146 1147 func (h *BincHandle) newEncDriver() encDriver { 1148 var e = &bincEncDriver{h: h} 1149 e.e.e = e 1150 e.e.init(h) 1151 e.reset() 1152 return e 1153 } 1154 1155 func (h *BincHandle) newDecDriver() decDriver { 1156 d := &bincDecDriver{h: h} 1157 d.d.d = d 1158 d.d.init(h) 1159 d.reset() 1160 return d 1161 } 1162 1163 // var timeDigits = [...]byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} 1164 1165 // EncodeTime encodes a time.Time as a []byte, including 1166 // information on the instant in time and UTC offset. 1167 // 1168 // Format Description 1169 // 1170 // A timestamp is composed of 3 components: 1171 // 1172 // - secs: signed integer representing seconds since unix epoch 1173 // - nsces: unsigned integer representing fractional seconds as a 1174 // nanosecond offset within secs, in the range 0 <= nsecs < 1e9 1175 // - tz: signed integer representing timezone offset in minutes east of UTC, 1176 // and a dst (daylight savings time) flag 1177 // 1178 // When encoding a timestamp, the first byte is the descriptor, which 1179 // defines which components are encoded and how many bytes are used to 1180 // encode secs and nsecs components. *If secs/nsecs is 0 or tz is UTC, it 1181 // is not encoded in the byte array explicitly*. 1182 // 1183 // Descriptor 8 bits are of the form `A B C DDD EE`: 1184 // A: Is secs component encoded? 1 = true 1185 // B: Is nsecs component encoded? 1 = true 1186 // C: Is tz component encoded? 1 = true 1187 // DDD: Number of extra bytes for secs (range 0-7). 1188 // If A = 1, secs encoded in DDD+1 bytes. 1189 // If A = 0, secs is not encoded, and is assumed to be 0. 1190 // If A = 1, then we need at least 1 byte to encode secs. 1191 // DDD says the number of extra bytes beyond that 1. 1192 // E.g. if DDD=0, then secs is represented in 1 byte. 1193 // if DDD=2, then secs is represented in 3 bytes. 1194 // EE: Number of extra bytes for nsecs (range 0-3). 1195 // If B = 1, nsecs encoded in EE+1 bytes (similar to secs/DDD above) 1196 // 1197 // Following the descriptor bytes, subsequent bytes are: 1198 // 1199 // secs component encoded in `DDD + 1` bytes (if A == 1) 1200 // nsecs component encoded in `EE + 1` bytes (if B == 1) 1201 // tz component encoded in 2 bytes (if C == 1) 1202 // 1203 // secs and nsecs components are integers encoded in a BigEndian 1204 // 2-complement encoding format. 1205 // 1206 // tz component is encoded as 2 bytes (16 bits). Most significant bit 15 to 1207 // Least significant bit 0 are described below: 1208 // 1209 // Timezone offset has a range of -12:00 to +14:00 (ie -720 to +840 minutes). 1210 // Bit 15 = have\_dst: set to 1 if we set the dst flag. 1211 // Bit 14 = dst\_on: set to 1 if dst is in effect at the time, or 0 if not. 1212 // Bits 13..0 = timezone offset in minutes. It is a signed integer in Big Endian format. 1213 func bincEncodeTime(t time.Time) []byte { 1214 // t := rv2i(rv).(time.Time) 1215 tsecs, tnsecs := t.Unix(), t.Nanosecond() 1216 var ( 1217 bd byte 1218 bs [16]byte 1219 i int = 1 1220 ) 1221 l := t.Location() 1222 if l == time.UTC { 1223 l = nil 1224 } 1225 if tsecs != 0 { 1226 bd = bd | 0x80 1227 btmp := bigen.PutUint64(uint64(tsecs)) 1228 f := pruneSignExt(btmp[:], tsecs >= 0) 1229 bd = bd | (byte(7-f) << 2) 1230 copy(bs[i:], btmp[f:]) 1231 i = i + (8 - f) 1232 } 1233 if tnsecs != 0 { 1234 bd = bd | 0x40 1235 btmp := bigen.PutUint32(uint32(tnsecs)) 1236 f := pruneSignExt(btmp[:4], true) 1237 bd = bd | byte(3-f) 1238 copy(bs[i:], btmp[f:4]) 1239 i = i + (4 - f) 1240 } 1241 if l != nil { 1242 bd = bd | 0x20 1243 // Note that Go Libs do not give access to dst flag. 1244 _, zoneOffset := t.Zone() 1245 // zoneName, zoneOffset := t.Zone() 1246 zoneOffset /= 60 1247 z := uint16(zoneOffset) 1248 btmp := bigen.PutUint16(z) 1249 // clear dst flags 1250 bs[i] = btmp[0] & 0x3f 1251 bs[i+1] = btmp[1] 1252 i = i + 2 1253 } 1254 bs[0] = bd 1255 return bs[0:i] 1256 } 1257 1258 // bincDecodeTime decodes a []byte into a time.Time. 1259 func bincDecodeTime(bs []byte) (tt time.Time, err error) { 1260 bd := bs[0] 1261 var ( 1262 tsec int64 1263 tnsec uint32 1264 tz uint16 1265 i byte = 1 1266 i2 byte 1267 n byte 1268 ) 1269 if bd&(1<<7) != 0 { 1270 var btmp [8]byte 1271 n = ((bd >> 2) & 0x7) + 1 1272 i2 = i + n 1273 copy(btmp[8-n:], bs[i:i2]) 1274 // if first bit of bs[i] is set, then fill btmp[0..8-n] with 0xff (ie sign extend it) 1275 if bs[i]&(1<<7) != 0 { 1276 copy(btmp[0:8-n], bsAll0xff) 1277 } 1278 i = i2 1279 tsec = int64(bigen.Uint64(btmp)) 1280 } 1281 if bd&(1<<6) != 0 { 1282 var btmp [4]byte 1283 n = (bd & 0x3) + 1 1284 i2 = i + n 1285 copy(btmp[4-n:], bs[i:i2]) 1286 i = i2 1287 tnsec = bigen.Uint32(btmp) 1288 } 1289 if bd&(1<<5) == 0 { 1290 tt = time.Unix(tsec, int64(tnsec)).UTC() 1291 return 1292 } 1293 // In stdlib time.Parse, when a date is parsed without a zone name, it uses "" as zone name. 1294 // However, we need name here, so it can be shown when time is printf.d. 1295 // Zone name is in form: UTC-08:00. 1296 // Note that Go Libs do not give access to dst flag, so we ignore dst bits 1297 1298 tz = bigen.Uint16([2]byte{bs[i], bs[i+1]}) 1299 // sign extend sign bit into top 2 MSB (which were dst bits): 1300 if tz&(1<<13) == 0 { // positive 1301 tz = tz & 0x3fff //clear 2 MSBs: dst bits 1302 } else { // negative 1303 tz = tz | 0xc000 //set 2 MSBs: dst bits 1304 } 1305 tzint := int16(tz) 1306 if tzint == 0 { 1307 tt = time.Unix(tsec, int64(tnsec)).UTC() 1308 } else { 1309 // For Go Time, do not use a descriptive timezone. 1310 // It's unnecessary, and makes it harder to do a reflect.DeepEqual. 1311 // The Offset already tells what the offset should be, if not on UTC and unknown zone name. 1312 // var zoneName = timeLocUTCName(tzint) 1313 tt = time.Unix(tsec, int64(tnsec)).In(time.FixedZone("", int(tzint)*60)) 1314 } 1315 return 1316 } 1317 1318 var _ decDriver = (*bincDecDriver)(nil) 1319 var _ encDriver = (*bincEncDriver)(nil)