encode_best.go (21457B)
1 // Copyright 2016 The Snappy-Go Authors. All rights reserved. 2 // Copyright (c) 2019 Klaus Post. All rights reserved. 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 package s2 7 8 import ( 9 "fmt" 10 "math" 11 "math/bits" 12 ) 13 14 // encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It 15 // assumes that the varint-encoded length of the decompressed bytes has already 16 // been written. 17 // 18 // It also assumes that: 19 // 20 // len(dst) >= MaxEncodedLen(len(src)) && 21 // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize 22 func encodeBlockBest(dst, src []byte, dict *Dict) (d int) { 23 // Initialize the hash tables. 24 const ( 25 // Long hash matches. 26 lTableBits = 19 27 maxLTableSize = 1 << lTableBits 28 29 // Short hash matches. 30 sTableBits = 16 31 maxSTableSize = 1 << sTableBits 32 33 inputMargin = 8 + 2 34 35 debug = false 36 ) 37 38 // sLimit is when to stop looking for offset/length copies. The inputMargin 39 // lets us use a fast path for emitLiteral in the main loop, while we are 40 // looking for copies. 41 sLimit := len(src) - inputMargin 42 if len(src) < minNonLiteralBlockSize { 43 return 0 44 } 45 sLimitDict := len(src) - inputMargin 46 if sLimitDict > MaxDictSrcOffset-inputMargin { 47 sLimitDict = MaxDictSrcOffset - inputMargin 48 } 49 50 var lTable [maxLTableSize]uint64 51 var sTable [maxSTableSize]uint64 52 53 // Bail if we can't compress to at least this. 54 dstLimit := len(src) - 5 55 56 // nextEmit is where in src the next emitLiteral should start from. 57 nextEmit := 0 58 59 // The encoded form must start with a literal, as there are no previous 60 // bytes to copy, so we start looking for hash matches at s == 1. 61 s := 1 62 repeat := 1 63 if dict != nil { 64 dict.initBest() 65 s = 0 66 repeat = len(dict.dict) - dict.repeat 67 } 68 cv := load64(src, s) 69 70 // We search for a repeat at -1, but don't output repeats when nextEmit == 0 71 const lowbitMask = 0xffffffff 72 getCur := func(x uint64) int { 73 return int(x & lowbitMask) 74 } 75 getPrev := func(x uint64) int { 76 return int(x >> 32) 77 } 78 const maxSkip = 64 79 80 for { 81 type match struct { 82 offset int 83 s int 84 length int 85 score int 86 rep, dict bool 87 } 88 var best match 89 for { 90 // Next src position to check 91 nextS := (s-nextEmit)>>8 + 1 92 if nextS > maxSkip { 93 nextS = s + maxSkip 94 } else { 95 nextS += s 96 } 97 if nextS > sLimit { 98 goto emitRemainder 99 } 100 if dict != nil && s >= MaxDictSrcOffset { 101 dict = nil 102 if repeat > s { 103 repeat = math.MinInt32 104 } 105 } 106 hashL := hash8(cv, lTableBits) 107 hashS := hash4(cv, sTableBits) 108 candidateL := lTable[hashL] 109 candidateS := sTable[hashS] 110 111 score := func(m match) int { 112 // Matches that are longer forward are penalized since we must emit it as a literal. 113 score := m.length - m.s 114 if nextEmit == m.s { 115 // If we do not have to emit literals, we save 1 byte 116 score++ 117 } 118 offset := m.s - m.offset 119 if m.rep { 120 return score - emitRepeatSize(offset, m.length) 121 } 122 return score - emitCopySize(offset, m.length) 123 } 124 125 matchAt := func(offset, s int, first uint32, rep bool) match { 126 if best.length != 0 && best.s-best.offset == s-offset { 127 // Don't retest if we have the same offset. 128 return match{offset: offset, s: s} 129 } 130 if load32(src, offset) != first { 131 return match{offset: offset, s: s} 132 } 133 m := match{offset: offset, s: s, length: 4 + offset, rep: rep} 134 s += 4 135 for s < len(src) { 136 if len(src)-s < 8 { 137 if src[s] == src[m.length] { 138 m.length++ 139 s++ 140 continue 141 } 142 break 143 } 144 if diff := load64(src, s) ^ load64(src, m.length); diff != 0 { 145 m.length += bits.TrailingZeros64(diff) >> 3 146 break 147 } 148 s += 8 149 m.length += 8 150 } 151 m.length -= offset 152 m.score = score(m) 153 if m.score <= -m.s { 154 // Eliminate if no savings, we might find a better one. 155 m.length = 0 156 } 157 return m 158 } 159 matchDict := func(candidate, s int, first uint32, rep bool) match { 160 // Calculate offset as if in continuous array with s 161 offset := -len(dict.dict) + candidate 162 if best.length != 0 && best.s-best.offset == s-offset && !rep { 163 // Don't retest if we have the same offset. 164 return match{offset: offset, s: s} 165 } 166 167 if load32(dict.dict, candidate) != first { 168 return match{offset: offset, s: s} 169 } 170 m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true} 171 s += 4 172 if !rep { 173 for s < sLimitDict && m.length < len(dict.dict) { 174 if len(src)-s < 8 || len(dict.dict)-m.length < 8 { 175 if src[s] == dict.dict[m.length] { 176 m.length++ 177 s++ 178 continue 179 } 180 break 181 } 182 if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { 183 m.length += bits.TrailingZeros64(diff) >> 3 184 break 185 } 186 s += 8 187 m.length += 8 188 } 189 } else { 190 for s < len(src) && m.length < len(dict.dict) { 191 if len(src)-s < 8 || len(dict.dict)-m.length < 8 { 192 if src[s] == dict.dict[m.length] { 193 m.length++ 194 s++ 195 continue 196 } 197 break 198 } 199 if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { 200 m.length += bits.TrailingZeros64(diff) >> 3 201 break 202 } 203 s += 8 204 m.length += 8 205 } 206 } 207 m.length -= candidate 208 m.score = score(m) 209 if m.score <= -m.s { 210 // Eliminate if no savings, we might find a better one. 211 m.length = 0 212 } 213 return m 214 } 215 216 bestOf := func(a, b match) match { 217 if b.length == 0 { 218 return a 219 } 220 if a.length == 0 { 221 return b 222 } 223 as := a.score + b.s 224 bs := b.score + a.s 225 if as >= bs { 226 return a 227 } 228 return b 229 } 230 231 if s > 0 { 232 best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false)) 233 best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false)) 234 best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false)) 235 } 236 if dict != nil { 237 candidateL := dict.bestTableLong[hashL] 238 candidateS := dict.bestTableShort[hashS] 239 best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) 240 best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false)) 241 best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) 242 best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false)) 243 } 244 { 245 if (dict == nil || repeat <= s) && repeat > 0 { 246 best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true)) 247 } else if s-repeat < -4 && dict != nil { 248 candidate := len(dict.dict) - (repeat - s) 249 best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) 250 candidate++ 251 best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true)) 252 } 253 254 if best.length > 0 { 255 hashS := hash4(cv>>8, sTableBits) 256 // s+1 257 nextShort := sTable[hashS] 258 s := s + 1 259 cv := load64(src, s) 260 hashL := hash8(cv, lTableBits) 261 nextLong := lTable[hashL] 262 best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) 263 best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) 264 best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) 265 best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) 266 267 // Dict at + 1 268 if dict != nil { 269 candidateL := dict.bestTableLong[hashL] 270 candidateS := dict.bestTableShort[hashS] 271 272 best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) 273 best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) 274 } 275 276 // s+2 277 if true { 278 hashS := hash4(cv>>8, sTableBits) 279 280 nextShort = sTable[hashS] 281 s++ 282 cv = load64(src, s) 283 hashL := hash8(cv, lTableBits) 284 nextLong = lTable[hashL] 285 286 if (dict == nil || repeat <= s) && repeat > 0 { 287 // Repeat at + 2 288 best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true)) 289 } else if repeat-s > 4 && dict != nil { 290 candidate := len(dict.dict) - (repeat - s) 291 best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) 292 } 293 best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) 294 best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) 295 best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) 296 best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) 297 298 // Dict at +2 299 // Very small gain 300 if dict != nil { 301 candidateL := dict.bestTableLong[hashL] 302 candidateS := dict.bestTableShort[hashS] 303 304 best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) 305 best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) 306 } 307 } 308 // Search for a match at best match end, see if that is better. 309 // Allow some bytes at the beginning to mismatch. 310 // Sweet spot is around 1-2 bytes, but depends on input. 311 // The skipped bytes are tested in Extend backwards, 312 // and still picked up as part of the match if they do. 313 const skipBeginning = 2 314 const skipEnd = 1 315 if sAt := best.s + best.length - skipEnd; sAt < sLimit { 316 317 sBack := best.s + skipBeginning - skipEnd 318 backL := best.length - skipBeginning 319 // Load initial values 320 cv = load64(src, sBack) 321 322 // Grab candidates... 323 next := lTable[hash8(load64(src, sAt), lTableBits)] 324 325 if checkAt := getCur(next) - backL; checkAt > 0 { 326 best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) 327 } 328 if checkAt := getPrev(next) - backL; checkAt > 0 { 329 best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) 330 } 331 // Disabled: Extremely small gain 332 if false { 333 next = sTable[hash4(load64(src, sAt), sTableBits)] 334 if checkAt := getCur(next) - backL; checkAt > 0 { 335 best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) 336 } 337 if checkAt := getPrev(next) - backL; checkAt > 0 { 338 best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) 339 } 340 } 341 } 342 } 343 } 344 345 // Update table 346 lTable[hashL] = uint64(s) | candidateL<<32 347 sTable[hashS] = uint64(s) | candidateS<<32 348 349 if best.length > 0 { 350 break 351 } 352 353 cv = load64(src, nextS) 354 s = nextS 355 } 356 357 // Extend backwards, not needed for repeats... 358 s = best.s 359 if !best.rep && !best.dict { 360 for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] { 361 best.offset-- 362 best.length++ 363 s-- 364 } 365 } 366 if false && best.offset >= s { 367 panic(fmt.Errorf("t %d >= s %d", best.offset, s)) 368 } 369 // Bail if we exceed the maximum size. 370 if d+(s-nextEmit) > dstLimit { 371 return 0 372 } 373 374 base := s 375 offset := s - best.offset 376 s += best.length 377 378 if offset > 65535 && s-base <= 5 && !best.rep { 379 // Bail if the match is equal or worse to the encoding. 380 s = best.s + 1 381 if s >= sLimit { 382 goto emitRemainder 383 } 384 cv = load64(src, s) 385 continue 386 } 387 if debug && nextEmit != base { 388 fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base) 389 } 390 d += emitLiteral(dst[d:], src[nextEmit:base]) 391 if best.rep { 392 if nextEmit > 0 || best.dict { 393 if debug { 394 fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) 395 } 396 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. 397 d += emitRepeat(dst[d:], offset, best.length) 398 } else { 399 // First match without dict cannot be a repeat. 400 if debug { 401 fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) 402 } 403 d += emitCopy(dst[d:], offset, best.length) 404 } 405 } else { 406 if debug { 407 fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) 408 } 409 d += emitCopy(dst[d:], offset, best.length) 410 } 411 repeat = offset 412 413 nextEmit = s 414 if s >= sLimit { 415 goto emitRemainder 416 } 417 418 if d > dstLimit { 419 // Do we have space for more, if not bail. 420 return 0 421 } 422 // Fill tables... 423 for i := best.s + 1; i < s; i++ { 424 cv0 := load64(src, i) 425 long0 := hash8(cv0, lTableBits) 426 short0 := hash4(cv0, sTableBits) 427 lTable[long0] = uint64(i) | lTable[long0]<<32 428 sTable[short0] = uint64(i) | sTable[short0]<<32 429 } 430 cv = load64(src, s) 431 } 432 433 emitRemainder: 434 if nextEmit < len(src) { 435 // Bail if we exceed the maximum size. 436 if d+len(src)-nextEmit > dstLimit { 437 return 0 438 } 439 if debug && nextEmit != s { 440 fmt.Println("emitted ", len(src)-nextEmit, "literals") 441 } 442 d += emitLiteral(dst[d:], src[nextEmit:]) 443 } 444 return d 445 } 446 447 // encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It 448 // assumes that the varint-encoded length of the decompressed bytes has already 449 // been written. 450 // 451 // It also assumes that: 452 // 453 // len(dst) >= MaxEncodedLen(len(src)) && 454 // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize 455 func encodeBlockBestSnappy(dst, src []byte) (d int) { 456 // Initialize the hash tables. 457 const ( 458 // Long hash matches. 459 lTableBits = 19 460 maxLTableSize = 1 << lTableBits 461 462 // Short hash matches. 463 sTableBits = 16 464 maxSTableSize = 1 << sTableBits 465 466 inputMargin = 8 + 2 467 ) 468 469 // sLimit is when to stop looking for offset/length copies. The inputMargin 470 // lets us use a fast path for emitLiteral in the main loop, while we are 471 // looking for copies. 472 sLimit := len(src) - inputMargin 473 if len(src) < minNonLiteralBlockSize { 474 return 0 475 } 476 477 var lTable [maxLTableSize]uint64 478 var sTable [maxSTableSize]uint64 479 480 // Bail if we can't compress to at least this. 481 dstLimit := len(src) - 5 482 483 // nextEmit is where in src the next emitLiteral should start from. 484 nextEmit := 0 485 486 // The encoded form must start with a literal, as there are no previous 487 // bytes to copy, so we start looking for hash matches at s == 1. 488 s := 1 489 cv := load64(src, s) 490 491 // We search for a repeat at -1, but don't output repeats when nextEmit == 0 492 repeat := 1 493 const lowbitMask = 0xffffffff 494 getCur := func(x uint64) int { 495 return int(x & lowbitMask) 496 } 497 getPrev := func(x uint64) int { 498 return int(x >> 32) 499 } 500 const maxSkip = 64 501 502 for { 503 type match struct { 504 offset int 505 s int 506 length int 507 score int 508 } 509 var best match 510 for { 511 // Next src position to check 512 nextS := (s-nextEmit)>>8 + 1 513 if nextS > maxSkip { 514 nextS = s + maxSkip 515 } else { 516 nextS += s 517 } 518 if nextS > sLimit { 519 goto emitRemainder 520 } 521 hashL := hash8(cv, lTableBits) 522 hashS := hash4(cv, sTableBits) 523 candidateL := lTable[hashL] 524 candidateS := sTable[hashS] 525 526 score := func(m match) int { 527 // Matches that are longer forward are penalized since we must emit it as a literal. 528 score := m.length - m.s 529 if nextEmit == m.s { 530 // If we do not have to emit literals, we save 1 byte 531 score++ 532 } 533 offset := m.s - m.offset 534 535 return score - emitCopyNoRepeatSize(offset, m.length) 536 } 537 538 matchAt := func(offset, s int, first uint32) match { 539 if best.length != 0 && best.s-best.offset == s-offset { 540 // Don't retest if we have the same offset. 541 return match{offset: offset, s: s} 542 } 543 if load32(src, offset) != first { 544 return match{offset: offset, s: s} 545 } 546 m := match{offset: offset, s: s, length: 4 + offset} 547 s += 4 548 for s <= sLimit { 549 if diff := load64(src, s) ^ load64(src, m.length); diff != 0 { 550 m.length += bits.TrailingZeros64(diff) >> 3 551 break 552 } 553 s += 8 554 m.length += 8 555 } 556 m.length -= offset 557 m.score = score(m) 558 if m.score <= -m.s { 559 // Eliminate if no savings, we might find a better one. 560 m.length = 0 561 } 562 return m 563 } 564 565 bestOf := func(a, b match) match { 566 if b.length == 0 { 567 return a 568 } 569 if a.length == 0 { 570 return b 571 } 572 as := a.score + b.s 573 bs := b.score + a.s 574 if as >= bs { 575 return a 576 } 577 return b 578 } 579 580 best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv))) 581 best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv))) 582 best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv))) 583 584 { 585 best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8))) 586 if best.length > 0 { 587 // s+1 588 nextShort := sTable[hash4(cv>>8, sTableBits)] 589 s := s + 1 590 cv := load64(src, s) 591 nextLong := lTable[hash8(cv, lTableBits)] 592 best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv))) 593 best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv))) 594 best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv))) 595 best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv))) 596 // Repeat at + 2 597 best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8))) 598 599 // s+2 600 if true { 601 nextShort = sTable[hash4(cv>>8, sTableBits)] 602 s++ 603 cv = load64(src, s) 604 nextLong = lTable[hash8(cv, lTableBits)] 605 best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv))) 606 best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv))) 607 best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv))) 608 best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv))) 609 } 610 // Search for a match at best match end, see if that is better. 611 if sAt := best.s + best.length; sAt < sLimit { 612 sBack := best.s 613 backL := best.length 614 // Load initial values 615 cv = load64(src, sBack) 616 // Search for mismatch 617 next := lTable[hash8(load64(src, sAt), lTableBits)] 618 //next := sTable[hash4(load64(src, sAt), sTableBits)] 619 620 if checkAt := getCur(next) - backL; checkAt > 0 { 621 best = bestOf(best, matchAt(checkAt, sBack, uint32(cv))) 622 } 623 if checkAt := getPrev(next) - backL; checkAt > 0 { 624 best = bestOf(best, matchAt(checkAt, sBack, uint32(cv))) 625 } 626 } 627 } 628 } 629 630 // Update table 631 lTable[hashL] = uint64(s) | candidateL<<32 632 sTable[hashS] = uint64(s) | candidateS<<32 633 634 if best.length > 0 { 635 break 636 } 637 638 cv = load64(src, nextS) 639 s = nextS 640 } 641 642 // Extend backwards, not needed for repeats... 643 s = best.s 644 if true { 645 for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] { 646 best.offset-- 647 best.length++ 648 s-- 649 } 650 } 651 if false && best.offset >= s { 652 panic(fmt.Errorf("t %d >= s %d", best.offset, s)) 653 } 654 // Bail if we exceed the maximum size. 655 if d+(s-nextEmit) > dstLimit { 656 return 0 657 } 658 659 base := s 660 offset := s - best.offset 661 662 s += best.length 663 664 if offset > 65535 && s-base <= 5 { 665 // Bail if the match is equal or worse to the encoding. 666 s = best.s + 1 667 if s >= sLimit { 668 goto emitRemainder 669 } 670 cv = load64(src, s) 671 continue 672 } 673 d += emitLiteral(dst[d:], src[nextEmit:base]) 674 d += emitCopyNoRepeat(dst[d:], offset, best.length) 675 repeat = offset 676 677 nextEmit = s 678 if s >= sLimit { 679 goto emitRemainder 680 } 681 682 if d > dstLimit { 683 // Do we have space for more, if not bail. 684 return 0 685 } 686 // Fill tables... 687 for i := best.s + 1; i < s; i++ { 688 cv0 := load64(src, i) 689 long0 := hash8(cv0, lTableBits) 690 short0 := hash4(cv0, sTableBits) 691 lTable[long0] = uint64(i) | lTable[long0]<<32 692 sTable[short0] = uint64(i) | sTable[short0]<<32 693 } 694 cv = load64(src, s) 695 } 696 697 emitRemainder: 698 if nextEmit < len(src) { 699 // Bail if we exceed the maximum size. 700 if d+len(src)-nextEmit > dstLimit { 701 return 0 702 } 703 d += emitLiteral(dst[d:], src[nextEmit:]) 704 } 705 return d 706 } 707 708 // emitCopySize returns the size to encode the offset+length 709 // 710 // It assumes that: 711 // 712 // 1 <= offset && offset <= math.MaxUint32 713 // 4 <= length && length <= 1 << 24 714 func emitCopySize(offset, length int) int { 715 if offset >= 65536 { 716 i := 0 717 if length > 64 { 718 length -= 64 719 if length >= 4 { 720 // Emit remaining as repeats 721 return 5 + emitRepeatSize(offset, length) 722 } 723 i = 5 724 } 725 if length == 0 { 726 return i 727 } 728 return i + 5 729 } 730 731 // Offset no more than 2 bytes. 732 if length > 64 { 733 if offset < 2048 { 734 // Emit 8 bytes, then rest as repeats... 735 return 2 + emitRepeatSize(offset, length-8) 736 } 737 // Emit remaining as repeats, at least 4 bytes remain. 738 return 3 + emitRepeatSize(offset, length-60) 739 } 740 if length >= 12 || offset >= 2048 { 741 return 3 742 } 743 // Emit the remaining copy, encoded as 2 bytes. 744 return 2 745 } 746 747 // emitCopyNoRepeatSize returns the size to encode the offset+length 748 // 749 // It assumes that: 750 // 751 // 1 <= offset && offset <= math.MaxUint32 752 // 4 <= length && length <= 1 << 24 753 func emitCopyNoRepeatSize(offset, length int) int { 754 if offset >= 65536 { 755 return 5 + 5*(length/64) 756 } 757 758 // Offset no more than 2 bytes. 759 if length > 64 { 760 // Emit remaining as repeats, at least 4 bytes remain. 761 return 3 + 3*(length/60) 762 } 763 if length >= 12 || offset >= 2048 { 764 return 3 765 } 766 // Emit the remaining copy, encoded as 2 bytes. 767 return 2 768 } 769 770 // emitRepeatSize returns the number of bytes required to encode a repeat. 771 // Length must be at least 4 and < 1<<24 772 func emitRepeatSize(offset, length int) int { 773 // Repeat offset, make length cheaper 774 if length <= 4+4 || (length < 8+4 && offset < 2048) { 775 return 2 776 } 777 if length < (1<<8)+4+4 { 778 return 3 779 } 780 if length < (1<<16)+(1<<8)+4 { 781 return 4 782 } 783 const maxRepeat = (1 << 24) - 1 784 length -= (1 << 16) - 4 785 left := 0 786 if length > maxRepeat { 787 left = length - maxRepeat + 4 788 } 789 if left > 0 { 790 return 5 + emitRepeatSize(offset, left) 791 } 792 return 5 793 }