encode_all.go (26881B)
1 // Copyright 2016 The Snappy-Go Authors. All rights reserved. 2 // Copyright (c) 2019 Klaus Post. All rights reserved. 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 package s2 7 8 import ( 9 "bytes" 10 "encoding/binary" 11 "fmt" 12 "math/bits" 13 ) 14 15 func load32(b []byte, i int) uint32 { 16 return binary.LittleEndian.Uint32(b[i:]) 17 } 18 19 func load64(b []byte, i int) uint64 { 20 return binary.LittleEndian.Uint64(b[i:]) 21 } 22 23 // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. 24 // Preferably h should be a constant and should always be <64. 25 func hash6(u uint64, h uint8) uint32 { 26 const prime6bytes = 227718039650203 27 return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) 28 } 29 30 func encodeGo(dst, src []byte) []byte { 31 if n := MaxEncodedLen(len(src)); n < 0 { 32 panic(ErrTooLarge) 33 } else if len(dst) < n { 34 dst = make([]byte, n) 35 } 36 37 // The block starts with the varint-encoded length of the decompressed bytes. 38 d := binary.PutUvarint(dst, uint64(len(src))) 39 40 if len(src) == 0 { 41 return dst[:d] 42 } 43 if len(src) < minNonLiteralBlockSize { 44 d += emitLiteral(dst[d:], src) 45 return dst[:d] 46 } 47 n := encodeBlockGo(dst[d:], src) 48 if n > 0 { 49 d += n 50 return dst[:d] 51 } 52 // Not compressible 53 d += emitLiteral(dst[d:], src) 54 return dst[:d] 55 } 56 57 // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It 58 // assumes that the varint-encoded length of the decompressed bytes has already 59 // been written. 60 // 61 // It also assumes that: 62 // 63 // len(dst) >= MaxEncodedLen(len(src)) && 64 // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize 65 func encodeBlockGo(dst, src []byte) (d int) { 66 // Initialize the hash table. 67 const ( 68 tableBits = 14 69 maxTableSize = 1 << tableBits 70 71 debug = false 72 ) 73 74 var table [maxTableSize]uint32 75 76 // sLimit is when to stop looking for offset/length copies. The inputMargin 77 // lets us use a fast path for emitLiteral in the main loop, while we are 78 // looking for copies. 79 sLimit := len(src) - inputMargin 80 81 // Bail if we can't compress to at least this. 82 dstLimit := len(src) - len(src)>>5 - 5 83 84 // nextEmit is where in src the next emitLiteral should start from. 85 nextEmit := 0 86 87 // The encoded form must start with a literal, as there are no previous 88 // bytes to copy, so we start looking for hash matches at s == 1. 89 s := 1 90 cv := load64(src, s) 91 92 // We search for a repeat at -1, but don't output repeats when nextEmit == 0 93 repeat := 1 94 95 for { 96 candidate := 0 97 for { 98 // Next src position to check 99 nextS := s + (s-nextEmit)>>6 + 4 100 if nextS > sLimit { 101 goto emitRemainder 102 } 103 hash0 := hash6(cv, tableBits) 104 hash1 := hash6(cv>>8, tableBits) 105 candidate = int(table[hash0]) 106 candidate2 := int(table[hash1]) 107 table[hash0] = uint32(s) 108 table[hash1] = uint32(s + 1) 109 hash2 := hash6(cv>>16, tableBits) 110 111 // Check repeat at offset checkRep. 112 const checkRep = 1 113 if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { 114 base := s + checkRep 115 // Extend back 116 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { 117 i-- 118 base-- 119 } 120 d += emitLiteral(dst[d:], src[nextEmit:base]) 121 122 // Extend forward 123 candidate := s - repeat + 4 + checkRep 124 s += 4 + checkRep 125 for s <= sLimit { 126 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { 127 s += bits.TrailingZeros64(diff) >> 3 128 break 129 } 130 s += 8 131 candidate += 8 132 } 133 if debug { 134 // Validate match. 135 if s <= candidate { 136 panic("s <= candidate") 137 } 138 a := src[base:s] 139 b := src[base-repeat : base-repeat+(s-base)] 140 if !bytes.Equal(a, b) { 141 panic("mismatch") 142 } 143 } 144 if nextEmit > 0 { 145 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. 146 d += emitRepeat(dst[d:], repeat, s-base) 147 } else { 148 // First match, cannot be repeat. 149 d += emitCopy(dst[d:], repeat, s-base) 150 } 151 nextEmit = s 152 if s >= sLimit { 153 goto emitRemainder 154 } 155 156 cv = load64(src, s) 157 continue 158 } 159 160 if uint32(cv) == load32(src, candidate) { 161 break 162 } 163 candidate = int(table[hash2]) 164 if uint32(cv>>8) == load32(src, candidate2) { 165 table[hash2] = uint32(s + 2) 166 candidate = candidate2 167 s++ 168 break 169 } 170 table[hash2] = uint32(s + 2) 171 if uint32(cv>>16) == load32(src, candidate) { 172 s += 2 173 break 174 } 175 176 cv = load64(src, nextS) 177 s = nextS 178 } 179 180 // Extend backwards. 181 // The top bytes will be rechecked to get the full match. 182 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { 183 candidate-- 184 s-- 185 } 186 187 // Bail if we exceed the maximum size. 188 if d+(s-nextEmit) > dstLimit { 189 return 0 190 } 191 192 // A 4-byte match has been found. We'll later see if more than 4 bytes 193 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit 194 // them as literal bytes. 195 196 d += emitLiteral(dst[d:], src[nextEmit:s]) 197 198 // Call emitCopy, and then see if another emitCopy could be our next 199 // move. Repeat until we find no match for the input immediately after 200 // what was consumed by the last emitCopy call. 201 // 202 // If we exit this loop normally then we need to call emitLiteral next, 203 // though we don't yet know how big the literal will be. We handle that 204 // by proceeding to the next iteration of the main loop. We also can 205 // exit this loop via goto if we get close to exhausting the input. 206 for { 207 // Invariant: we have a 4-byte match at s, and no need to emit any 208 // literal bytes prior to s. 209 base := s 210 repeat = base - candidate 211 212 // Extend the 4-byte match as long as possible. 213 s += 4 214 candidate += 4 215 for s <= len(src)-8 { 216 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { 217 s += bits.TrailingZeros64(diff) >> 3 218 break 219 } 220 s += 8 221 candidate += 8 222 } 223 224 d += emitCopy(dst[d:], repeat, s-base) 225 if debug { 226 // Validate match. 227 if s <= candidate { 228 panic("s <= candidate") 229 } 230 a := src[base:s] 231 b := src[base-repeat : base-repeat+(s-base)] 232 if !bytes.Equal(a, b) { 233 panic("mismatch") 234 } 235 } 236 237 nextEmit = s 238 if s >= sLimit { 239 goto emitRemainder 240 } 241 242 if d > dstLimit { 243 // Do we have space for more, if not bail. 244 return 0 245 } 246 // Check for an immediate match, otherwise start search at s+1 247 x := load64(src, s-2) 248 m2Hash := hash6(x, tableBits) 249 currHash := hash6(x>>16, tableBits) 250 candidate = int(table[currHash]) 251 table[m2Hash] = uint32(s - 2) 252 table[currHash] = uint32(s) 253 if debug && s == candidate { 254 panic("s == candidate") 255 } 256 if uint32(x>>16) != load32(src, candidate) { 257 cv = load64(src, s+1) 258 s++ 259 break 260 } 261 } 262 } 263 264 emitRemainder: 265 if nextEmit < len(src) { 266 // Bail if we exceed the maximum size. 267 if d+len(src)-nextEmit > dstLimit { 268 return 0 269 } 270 d += emitLiteral(dst[d:], src[nextEmit:]) 271 } 272 return d 273 } 274 275 func encodeBlockSnappyGo(dst, src []byte) (d int) { 276 // Initialize the hash table. 277 const ( 278 tableBits = 14 279 maxTableSize = 1 << tableBits 280 ) 281 282 var table [maxTableSize]uint32 283 284 // sLimit is when to stop looking for offset/length copies. The inputMargin 285 // lets us use a fast path for emitLiteral in the main loop, while we are 286 // looking for copies. 287 sLimit := len(src) - inputMargin 288 289 // Bail if we can't compress to at least this. 290 dstLimit := len(src) - len(src)>>5 - 5 291 292 // nextEmit is where in src the next emitLiteral should start from. 293 nextEmit := 0 294 295 // The encoded form must start with a literal, as there are no previous 296 // bytes to copy, so we start looking for hash matches at s == 1. 297 s := 1 298 cv := load64(src, s) 299 300 // We search for a repeat at -1, but don't output repeats when nextEmit == 0 301 repeat := 1 302 303 for { 304 candidate := 0 305 for { 306 // Next src position to check 307 nextS := s + (s-nextEmit)>>6 + 4 308 if nextS > sLimit { 309 goto emitRemainder 310 } 311 hash0 := hash6(cv, tableBits) 312 hash1 := hash6(cv>>8, tableBits) 313 candidate = int(table[hash0]) 314 candidate2 := int(table[hash1]) 315 table[hash0] = uint32(s) 316 table[hash1] = uint32(s + 1) 317 hash2 := hash6(cv>>16, tableBits) 318 319 // Check repeat at offset checkRep. 320 const checkRep = 1 321 if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { 322 base := s + checkRep 323 // Extend back 324 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { 325 i-- 326 base-- 327 } 328 d += emitLiteral(dst[d:], src[nextEmit:base]) 329 330 // Extend forward 331 candidate := s - repeat + 4 + checkRep 332 s += 4 + checkRep 333 for s <= sLimit { 334 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { 335 s += bits.TrailingZeros64(diff) >> 3 336 break 337 } 338 s += 8 339 candidate += 8 340 } 341 342 d += emitCopyNoRepeat(dst[d:], repeat, s-base) 343 nextEmit = s 344 if s >= sLimit { 345 goto emitRemainder 346 } 347 348 cv = load64(src, s) 349 continue 350 } 351 352 if uint32(cv) == load32(src, candidate) { 353 break 354 } 355 candidate = int(table[hash2]) 356 if uint32(cv>>8) == load32(src, candidate2) { 357 table[hash2] = uint32(s + 2) 358 candidate = candidate2 359 s++ 360 break 361 } 362 table[hash2] = uint32(s + 2) 363 if uint32(cv>>16) == load32(src, candidate) { 364 s += 2 365 break 366 } 367 368 cv = load64(src, nextS) 369 s = nextS 370 } 371 372 // Extend backwards 373 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { 374 candidate-- 375 s-- 376 } 377 378 // Bail if we exceed the maximum size. 379 if d+(s-nextEmit) > dstLimit { 380 return 0 381 } 382 383 // A 4-byte match has been found. We'll later see if more than 4 bytes 384 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit 385 // them as literal bytes. 386 387 d += emitLiteral(dst[d:], src[nextEmit:s]) 388 389 // Call emitCopy, and then see if another emitCopy could be our next 390 // move. Repeat until we find no match for the input immediately after 391 // what was consumed by the last emitCopy call. 392 // 393 // If we exit this loop normally then we need to call emitLiteral next, 394 // though we don't yet know how big the literal will be. We handle that 395 // by proceeding to the next iteration of the main loop. We also can 396 // exit this loop via goto if we get close to exhausting the input. 397 for { 398 // Invariant: we have a 4-byte match at s, and no need to emit any 399 // literal bytes prior to s. 400 base := s 401 repeat = base - candidate 402 403 // Extend the 4-byte match as long as possible. 404 s += 4 405 candidate += 4 406 for s <= len(src)-8 { 407 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { 408 s += bits.TrailingZeros64(diff) >> 3 409 break 410 } 411 s += 8 412 candidate += 8 413 } 414 415 d += emitCopyNoRepeat(dst[d:], repeat, s-base) 416 if false { 417 // Validate match. 418 a := src[base:s] 419 b := src[base-repeat : base-repeat+(s-base)] 420 if !bytes.Equal(a, b) { 421 panic("mismatch") 422 } 423 } 424 425 nextEmit = s 426 if s >= sLimit { 427 goto emitRemainder 428 } 429 430 if d > dstLimit { 431 // Do we have space for more, if not bail. 432 return 0 433 } 434 // Check for an immediate match, otherwise start search at s+1 435 x := load64(src, s-2) 436 m2Hash := hash6(x, tableBits) 437 currHash := hash6(x>>16, tableBits) 438 candidate = int(table[currHash]) 439 table[m2Hash] = uint32(s - 2) 440 table[currHash] = uint32(s) 441 if uint32(x>>16) != load32(src, candidate) { 442 cv = load64(src, s+1) 443 s++ 444 break 445 } 446 } 447 } 448 449 emitRemainder: 450 if nextEmit < len(src) { 451 // Bail if we exceed the maximum size. 452 if d+len(src)-nextEmit > dstLimit { 453 return 0 454 } 455 d += emitLiteral(dst[d:], src[nextEmit:]) 456 } 457 return d 458 } 459 460 // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It 461 // assumes that the varint-encoded length of the decompressed bytes has already 462 // been written. 463 // 464 // It also assumes that: 465 // 466 // len(dst) >= MaxEncodedLen(len(src)) && 467 // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize 468 func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) { 469 // Initialize the hash table. 470 const ( 471 tableBits = 14 472 maxTableSize = 1 << tableBits 473 maxAhead = 8 // maximum bytes ahead without checking sLimit 474 475 debug = false 476 ) 477 dict.initFast() 478 479 var table [maxTableSize]uint32 480 481 // sLimit is when to stop looking for offset/length copies. The inputMargin 482 // lets us use a fast path for emitLiteral in the main loop, while we are 483 // looking for copies. 484 sLimit := len(src) - inputMargin 485 if sLimit > MaxDictSrcOffset-maxAhead { 486 sLimit = MaxDictSrcOffset - maxAhead 487 } 488 489 // Bail if we can't compress to at least this. 490 dstLimit := len(src) - len(src)>>5 - 5 491 492 // nextEmit is where in src the next emitLiteral should start from. 493 nextEmit := 0 494 495 // The encoded form can start with a dict entry (copy or repeat). 496 s := 0 497 498 // Convert dict repeat to offset 499 repeat := len(dict.dict) - dict.repeat 500 cv := load64(src, 0) 501 502 // While in dict 503 searchDict: 504 for { 505 // Next src position to check 506 nextS := s + (s-nextEmit)>>6 + 4 507 hash0 := hash6(cv, tableBits) 508 hash1 := hash6(cv>>8, tableBits) 509 if nextS > sLimit { 510 if debug { 511 fmt.Println("slimit reached", s, nextS) 512 } 513 break searchDict 514 } 515 candidateDict := int(dict.fastTable[hash0]) 516 candidateDict2 := int(dict.fastTable[hash1]) 517 candidate2 := int(table[hash1]) 518 candidate := int(table[hash0]) 519 table[hash0] = uint32(s) 520 table[hash1] = uint32(s + 1) 521 hash2 := hash6(cv>>16, tableBits) 522 523 // Check repeat at offset checkRep. 524 const checkRep = 1 525 526 if repeat > s { 527 candidate := len(dict.dict) - repeat + s 528 if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) { 529 // Extend back 530 base := s 531 for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; { 532 i-- 533 base-- 534 } 535 d += emitLiteral(dst[d:], src[nextEmit:base]) 536 if debug && nextEmit != base { 537 fmt.Println("emitted ", base-nextEmit, "literals") 538 } 539 s += 4 540 candidate += 4 541 for candidate < len(dict.dict)-8 && s <= len(src)-8 { 542 if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 { 543 s += bits.TrailingZeros64(diff) >> 3 544 break 545 } 546 s += 8 547 candidate += 8 548 } 549 d += emitRepeat(dst[d:], repeat, s-base) 550 if debug { 551 fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s) 552 } 553 nextEmit = s 554 if s >= sLimit { 555 break searchDict 556 } 557 cv = load64(src, s) 558 continue 559 } 560 } else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { 561 base := s + checkRep 562 // Extend back 563 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { 564 i-- 565 base-- 566 } 567 d += emitLiteral(dst[d:], src[nextEmit:base]) 568 if debug && nextEmit != base { 569 fmt.Println("emitted ", base-nextEmit, "literals") 570 } 571 572 // Extend forward 573 candidate := s - repeat + 4 + checkRep 574 s += 4 + checkRep 575 for s <= sLimit { 576 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { 577 s += bits.TrailingZeros64(diff) >> 3 578 break 579 } 580 s += 8 581 candidate += 8 582 } 583 if debug { 584 // Validate match. 585 if s <= candidate { 586 panic("s <= candidate") 587 } 588 a := src[base:s] 589 b := src[base-repeat : base-repeat+(s-base)] 590 if !bytes.Equal(a, b) { 591 panic("mismatch") 592 } 593 } 594 595 if nextEmit > 0 { 596 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. 597 d += emitRepeat(dst[d:], repeat, s-base) 598 } else { 599 // First match, cannot be repeat. 600 d += emitCopy(dst[d:], repeat, s-base) 601 } 602 603 nextEmit = s 604 if s >= sLimit { 605 break searchDict 606 } 607 if debug { 608 fmt.Println("emitted reg repeat", s-base, "s:", s) 609 } 610 cv = load64(src, s) 611 continue searchDict 612 } 613 if s == 0 { 614 cv = load64(src, nextS) 615 s = nextS 616 continue searchDict 617 } 618 // Start with table. These matches will always be closer. 619 if uint32(cv) == load32(src, candidate) { 620 goto emitMatch 621 } 622 candidate = int(table[hash2]) 623 if uint32(cv>>8) == load32(src, candidate2) { 624 table[hash2] = uint32(s + 2) 625 candidate = candidate2 626 s++ 627 goto emitMatch 628 } 629 630 // Check dict. Dicts have longer offsets, so we want longer matches. 631 if cv == load64(dict.dict, candidateDict) { 632 table[hash2] = uint32(s + 2) 633 goto emitDict 634 } 635 636 candidateDict = int(dict.fastTable[hash2]) 637 // Check if upper 7 bytes match 638 if candidateDict2 >= 1 { 639 if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) { 640 table[hash2] = uint32(s + 2) 641 candidateDict = candidateDict2 642 s++ 643 goto emitDict 644 } 645 } 646 647 table[hash2] = uint32(s + 2) 648 if uint32(cv>>16) == load32(src, candidate) { 649 s += 2 650 goto emitMatch 651 } 652 if candidateDict >= 2 { 653 // Check if upper 6 bytes match 654 if cv^load64(dict.dict, candidateDict-2) < (1 << 16) { 655 s += 2 656 goto emitDict 657 } 658 } 659 660 cv = load64(src, nextS) 661 s = nextS 662 continue searchDict 663 664 emitDict: 665 { 666 if debug { 667 if load32(dict.dict, candidateDict) != load32(src, s) { 668 panic("dict emit mismatch") 669 } 670 } 671 // Extend backwards. 672 // The top bytes will be rechecked to get the full match. 673 for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] { 674 candidateDict-- 675 s-- 676 } 677 678 // Bail if we exceed the maximum size. 679 if d+(s-nextEmit) > dstLimit { 680 return 0 681 } 682 683 // A 4-byte match has been found. We'll later see if more than 4 bytes 684 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit 685 // them as literal bytes. 686 687 d += emitLiteral(dst[d:], src[nextEmit:s]) 688 if debug && nextEmit != s { 689 fmt.Println("emitted ", s-nextEmit, "literals") 690 } 691 { 692 // Invariant: we have a 4-byte match at s, and no need to emit any 693 // literal bytes prior to s. 694 base := s 695 repeat = s + (len(dict.dict)) - candidateDict 696 697 // Extend the 4-byte match as long as possible. 698 s += 4 699 candidateDict += 4 700 for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 { 701 if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 { 702 s += bits.TrailingZeros64(diff) >> 3 703 break 704 } 705 s += 8 706 candidateDict += 8 707 } 708 709 // Matches longer than 64 are split. 710 if s <= sLimit || s-base < 8 { 711 d += emitCopy(dst[d:], repeat, s-base) 712 } else { 713 // Split to ensure we don't start a copy within next block 714 d += emitCopy(dst[d:], repeat, 4) 715 d += emitRepeat(dst[d:], repeat, s-base-4) 716 } 717 if false { 718 // Validate match. 719 if s <= candidate { 720 panic("s <= candidate") 721 } 722 a := src[base:s] 723 b := dict.dict[base-repeat : base-repeat+(s-base)] 724 if !bytes.Equal(a, b) { 725 panic("mismatch") 726 } 727 } 728 if debug { 729 fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s) 730 } 731 nextEmit = s 732 if s >= sLimit { 733 break searchDict 734 } 735 736 if d > dstLimit { 737 // Do we have space for more, if not bail. 738 return 0 739 } 740 741 // Index and continue loop to try new candidate. 742 x := load64(src, s-2) 743 m2Hash := hash6(x, tableBits) 744 currHash := hash6(x>>8, tableBits) 745 candidate = int(table[currHash]) 746 table[m2Hash] = uint32(s - 2) 747 table[currHash] = uint32(s - 1) 748 cv = load64(src, s) 749 } 750 continue 751 } 752 emitMatch: 753 754 // Extend backwards. 755 // The top bytes will be rechecked to get the full match. 756 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { 757 candidate-- 758 s-- 759 } 760 761 // Bail if we exceed the maximum size. 762 if d+(s-nextEmit) > dstLimit { 763 return 0 764 } 765 766 // A 4-byte match has been found. We'll later see if more than 4 bytes 767 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit 768 // them as literal bytes. 769 770 d += emitLiteral(dst[d:], src[nextEmit:s]) 771 if debug && nextEmit != s { 772 fmt.Println("emitted ", s-nextEmit, "literals") 773 } 774 // Call emitCopy, and then see if another emitCopy could be our next 775 // move. Repeat until we find no match for the input immediately after 776 // what was consumed by the last emitCopy call. 777 // 778 // If we exit this loop normally then we need to call emitLiteral next, 779 // though we don't yet know how big the literal will be. We handle that 780 // by proceeding to the next iteration of the main loop. We also can 781 // exit this loop via goto if we get close to exhausting the input. 782 for { 783 // Invariant: we have a 4-byte match at s, and no need to emit any 784 // literal bytes prior to s. 785 base := s 786 repeat = base - candidate 787 788 // Extend the 4-byte match as long as possible. 789 s += 4 790 candidate += 4 791 for s <= len(src)-8 { 792 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { 793 s += bits.TrailingZeros64(diff) >> 3 794 break 795 } 796 s += 8 797 candidate += 8 798 } 799 800 d += emitCopy(dst[d:], repeat, s-base) 801 if debug { 802 // Validate match. 803 if s <= candidate { 804 panic("s <= candidate") 805 } 806 a := src[base:s] 807 b := src[base-repeat : base-repeat+(s-base)] 808 if !bytes.Equal(a, b) { 809 panic("mismatch") 810 } 811 } 812 if debug { 813 fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s) 814 } 815 nextEmit = s 816 if s >= sLimit { 817 break searchDict 818 } 819 820 if d > dstLimit { 821 // Do we have space for more, if not bail. 822 return 0 823 } 824 // Check for an immediate match, otherwise start search at s+1 825 x := load64(src, s-2) 826 m2Hash := hash6(x, tableBits) 827 currHash := hash6(x>>16, tableBits) 828 candidate = int(table[currHash]) 829 table[m2Hash] = uint32(s - 2) 830 table[currHash] = uint32(s) 831 if debug && s == candidate { 832 panic("s == candidate") 833 } 834 if uint32(x>>16) != load32(src, candidate) { 835 cv = load64(src, s+1) 836 s++ 837 break 838 } 839 } 840 } 841 842 // Search without dict: 843 if repeat > s { 844 repeat = 0 845 } 846 847 // No more dict 848 sLimit = len(src) - inputMargin 849 if s >= sLimit { 850 goto emitRemainder 851 } 852 if debug { 853 fmt.Println("non-dict matching at", s, "repeat:", repeat) 854 } 855 cv = load64(src, s) 856 if debug { 857 fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s) 858 } 859 for { 860 candidate := 0 861 for { 862 // Next src position to check 863 nextS := s + (s-nextEmit)>>6 + 4 864 if nextS > sLimit { 865 goto emitRemainder 866 } 867 hash0 := hash6(cv, tableBits) 868 hash1 := hash6(cv>>8, tableBits) 869 candidate = int(table[hash0]) 870 candidate2 := int(table[hash1]) 871 table[hash0] = uint32(s) 872 table[hash1] = uint32(s + 1) 873 hash2 := hash6(cv>>16, tableBits) 874 875 // Check repeat at offset checkRep. 876 const checkRep = 1 877 if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { 878 base := s + checkRep 879 // Extend back 880 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { 881 i-- 882 base-- 883 } 884 d += emitLiteral(dst[d:], src[nextEmit:base]) 885 if debug && nextEmit != base { 886 fmt.Println("emitted ", base-nextEmit, "literals") 887 } 888 // Extend forward 889 candidate := s - repeat + 4 + checkRep 890 s += 4 + checkRep 891 for s <= sLimit { 892 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { 893 s += bits.TrailingZeros64(diff) >> 3 894 break 895 } 896 s += 8 897 candidate += 8 898 } 899 if debug { 900 // Validate match. 901 if s <= candidate { 902 panic("s <= candidate") 903 } 904 a := src[base:s] 905 b := src[base-repeat : base-repeat+(s-base)] 906 if !bytes.Equal(a, b) { 907 panic("mismatch") 908 } 909 } 910 if nextEmit > 0 { 911 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. 912 d += emitRepeat(dst[d:], repeat, s-base) 913 } else { 914 // First match, cannot be repeat. 915 d += emitCopy(dst[d:], repeat, s-base) 916 } 917 if debug { 918 fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s) 919 } 920 nextEmit = s 921 if s >= sLimit { 922 goto emitRemainder 923 } 924 925 cv = load64(src, s) 926 continue 927 } 928 929 if uint32(cv) == load32(src, candidate) { 930 break 931 } 932 candidate = int(table[hash2]) 933 if uint32(cv>>8) == load32(src, candidate2) { 934 table[hash2] = uint32(s + 2) 935 candidate = candidate2 936 s++ 937 break 938 } 939 table[hash2] = uint32(s + 2) 940 if uint32(cv>>16) == load32(src, candidate) { 941 s += 2 942 break 943 } 944 945 cv = load64(src, nextS) 946 s = nextS 947 } 948 949 // Extend backwards. 950 // The top bytes will be rechecked to get the full match. 951 for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { 952 candidate-- 953 s-- 954 } 955 956 // Bail if we exceed the maximum size. 957 if d+(s-nextEmit) > dstLimit { 958 return 0 959 } 960 961 // A 4-byte match has been found. We'll later see if more than 4 bytes 962 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit 963 // them as literal bytes. 964 965 d += emitLiteral(dst[d:], src[nextEmit:s]) 966 if debug && nextEmit != s { 967 fmt.Println("emitted ", s-nextEmit, "literals") 968 } 969 // Call emitCopy, and then see if another emitCopy could be our next 970 // move. Repeat until we find no match for the input immediately after 971 // what was consumed by the last emitCopy call. 972 // 973 // If we exit this loop normally then we need to call emitLiteral next, 974 // though we don't yet know how big the literal will be. We handle that 975 // by proceeding to the next iteration of the main loop. We also can 976 // exit this loop via goto if we get close to exhausting the input. 977 for { 978 // Invariant: we have a 4-byte match at s, and no need to emit any 979 // literal bytes prior to s. 980 base := s 981 repeat = base - candidate 982 983 // Extend the 4-byte match as long as possible. 984 s += 4 985 candidate += 4 986 for s <= len(src)-8 { 987 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { 988 s += bits.TrailingZeros64(diff) >> 3 989 break 990 } 991 s += 8 992 candidate += 8 993 } 994 995 d += emitCopy(dst[d:], repeat, s-base) 996 if debug { 997 // Validate match. 998 if s <= candidate { 999 panic("s <= candidate") 1000 } 1001 a := src[base:s] 1002 b := src[base-repeat : base-repeat+(s-base)] 1003 if !bytes.Equal(a, b) { 1004 panic("mismatch") 1005 } 1006 } 1007 if debug { 1008 fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s) 1009 } 1010 nextEmit = s 1011 if s >= sLimit { 1012 goto emitRemainder 1013 } 1014 1015 if d > dstLimit { 1016 // Do we have space for more, if not bail. 1017 return 0 1018 } 1019 // Check for an immediate match, otherwise start search at s+1 1020 x := load64(src, s-2) 1021 m2Hash := hash6(x, tableBits) 1022 currHash := hash6(x>>16, tableBits) 1023 candidate = int(table[currHash]) 1024 table[m2Hash] = uint32(s - 2) 1025 table[currHash] = uint32(s) 1026 if debug && s == candidate { 1027 panic("s == candidate") 1028 } 1029 if uint32(x>>16) != load32(src, candidate) { 1030 cv = load64(src, s+1) 1031 s++ 1032 break 1033 } 1034 } 1035 } 1036 1037 emitRemainder: 1038 if nextEmit < len(src) { 1039 // Bail if we exceed the maximum size. 1040 if d+len(src)-nextEmit > dstLimit { 1041 return 0 1042 } 1043 d += emitLiteral(dst[d:], src[nextEmit:]) 1044 if debug && nextEmit != s { 1045 fmt.Println("emitted ", len(src)-nextEmit, "literals") 1046 } 1047 } 1048 return d 1049 }