gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

encode_all.go (26881B)


      1 // Copyright 2016 The Snappy-Go Authors. All rights reserved.
      2 // Copyright (c) 2019 Klaus Post. All rights reserved.
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 package s2
      7 
      8 import (
      9 	"bytes"
     10 	"encoding/binary"
     11 	"fmt"
     12 	"math/bits"
     13 )
     14 
     15 func load32(b []byte, i int) uint32 {
     16 	return binary.LittleEndian.Uint32(b[i:])
     17 }
     18 
     19 func load64(b []byte, i int) uint64 {
     20 	return binary.LittleEndian.Uint64(b[i:])
     21 }
     22 
     23 // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
     24 // Preferably h should be a constant and should always be <64.
     25 func hash6(u uint64, h uint8) uint32 {
     26 	const prime6bytes = 227718039650203
     27 	return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
     28 }
     29 
     30 func encodeGo(dst, src []byte) []byte {
     31 	if n := MaxEncodedLen(len(src)); n < 0 {
     32 		panic(ErrTooLarge)
     33 	} else if len(dst) < n {
     34 		dst = make([]byte, n)
     35 	}
     36 
     37 	// The block starts with the varint-encoded length of the decompressed bytes.
     38 	d := binary.PutUvarint(dst, uint64(len(src)))
     39 
     40 	if len(src) == 0 {
     41 		return dst[:d]
     42 	}
     43 	if len(src) < minNonLiteralBlockSize {
     44 		d += emitLiteral(dst[d:], src)
     45 		return dst[:d]
     46 	}
     47 	n := encodeBlockGo(dst[d:], src)
     48 	if n > 0 {
     49 		d += n
     50 		return dst[:d]
     51 	}
     52 	// Not compressible
     53 	d += emitLiteral(dst[d:], src)
     54 	return dst[:d]
     55 }
     56 
     57 // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
     58 // assumes that the varint-encoded length of the decompressed bytes has already
     59 // been written.
     60 //
     61 // It also assumes that:
     62 //
     63 //	len(dst) >= MaxEncodedLen(len(src)) &&
     64 //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
     65 func encodeBlockGo(dst, src []byte) (d int) {
     66 	// Initialize the hash table.
     67 	const (
     68 		tableBits    = 14
     69 		maxTableSize = 1 << tableBits
     70 
     71 		debug = false
     72 	)
     73 
     74 	var table [maxTableSize]uint32
     75 
     76 	// sLimit is when to stop looking for offset/length copies. The inputMargin
     77 	// lets us use a fast path for emitLiteral in the main loop, while we are
     78 	// looking for copies.
     79 	sLimit := len(src) - inputMargin
     80 
     81 	// Bail if we can't compress to at least this.
     82 	dstLimit := len(src) - len(src)>>5 - 5
     83 
     84 	// nextEmit is where in src the next emitLiteral should start from.
     85 	nextEmit := 0
     86 
     87 	// The encoded form must start with a literal, as there are no previous
     88 	// bytes to copy, so we start looking for hash matches at s == 1.
     89 	s := 1
     90 	cv := load64(src, s)
     91 
     92 	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
     93 	repeat := 1
     94 
     95 	for {
     96 		candidate := 0
     97 		for {
     98 			// Next src position to check
     99 			nextS := s + (s-nextEmit)>>6 + 4
    100 			if nextS > sLimit {
    101 				goto emitRemainder
    102 			}
    103 			hash0 := hash6(cv, tableBits)
    104 			hash1 := hash6(cv>>8, tableBits)
    105 			candidate = int(table[hash0])
    106 			candidate2 := int(table[hash1])
    107 			table[hash0] = uint32(s)
    108 			table[hash1] = uint32(s + 1)
    109 			hash2 := hash6(cv>>16, tableBits)
    110 
    111 			// Check repeat at offset checkRep.
    112 			const checkRep = 1
    113 			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
    114 				base := s + checkRep
    115 				// Extend back
    116 				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
    117 					i--
    118 					base--
    119 				}
    120 				d += emitLiteral(dst[d:], src[nextEmit:base])
    121 
    122 				// Extend forward
    123 				candidate := s - repeat + 4 + checkRep
    124 				s += 4 + checkRep
    125 				for s <= sLimit {
    126 					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
    127 						s += bits.TrailingZeros64(diff) >> 3
    128 						break
    129 					}
    130 					s += 8
    131 					candidate += 8
    132 				}
    133 				if debug {
    134 					// Validate match.
    135 					if s <= candidate {
    136 						panic("s <= candidate")
    137 					}
    138 					a := src[base:s]
    139 					b := src[base-repeat : base-repeat+(s-base)]
    140 					if !bytes.Equal(a, b) {
    141 						panic("mismatch")
    142 					}
    143 				}
    144 				if nextEmit > 0 {
    145 					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
    146 					d += emitRepeat(dst[d:], repeat, s-base)
    147 				} else {
    148 					// First match, cannot be repeat.
    149 					d += emitCopy(dst[d:], repeat, s-base)
    150 				}
    151 				nextEmit = s
    152 				if s >= sLimit {
    153 					goto emitRemainder
    154 				}
    155 
    156 				cv = load64(src, s)
    157 				continue
    158 			}
    159 
    160 			if uint32(cv) == load32(src, candidate) {
    161 				break
    162 			}
    163 			candidate = int(table[hash2])
    164 			if uint32(cv>>8) == load32(src, candidate2) {
    165 				table[hash2] = uint32(s + 2)
    166 				candidate = candidate2
    167 				s++
    168 				break
    169 			}
    170 			table[hash2] = uint32(s + 2)
    171 			if uint32(cv>>16) == load32(src, candidate) {
    172 				s += 2
    173 				break
    174 			}
    175 
    176 			cv = load64(src, nextS)
    177 			s = nextS
    178 		}
    179 
    180 		// Extend backwards.
    181 		// The top bytes will be rechecked to get the full match.
    182 		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
    183 			candidate--
    184 			s--
    185 		}
    186 
    187 		// Bail if we exceed the maximum size.
    188 		if d+(s-nextEmit) > dstLimit {
    189 			return 0
    190 		}
    191 
    192 		// A 4-byte match has been found. We'll later see if more than 4 bytes
    193 		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
    194 		// them as literal bytes.
    195 
    196 		d += emitLiteral(dst[d:], src[nextEmit:s])
    197 
    198 		// Call emitCopy, and then see if another emitCopy could be our next
    199 		// move. Repeat until we find no match for the input immediately after
    200 		// what was consumed by the last emitCopy call.
    201 		//
    202 		// If we exit this loop normally then we need to call emitLiteral next,
    203 		// though we don't yet know how big the literal will be. We handle that
    204 		// by proceeding to the next iteration of the main loop. We also can
    205 		// exit this loop via goto if we get close to exhausting the input.
    206 		for {
    207 			// Invariant: we have a 4-byte match at s, and no need to emit any
    208 			// literal bytes prior to s.
    209 			base := s
    210 			repeat = base - candidate
    211 
    212 			// Extend the 4-byte match as long as possible.
    213 			s += 4
    214 			candidate += 4
    215 			for s <= len(src)-8 {
    216 				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
    217 					s += bits.TrailingZeros64(diff) >> 3
    218 					break
    219 				}
    220 				s += 8
    221 				candidate += 8
    222 			}
    223 
    224 			d += emitCopy(dst[d:], repeat, s-base)
    225 			if debug {
    226 				// Validate match.
    227 				if s <= candidate {
    228 					panic("s <= candidate")
    229 				}
    230 				a := src[base:s]
    231 				b := src[base-repeat : base-repeat+(s-base)]
    232 				if !bytes.Equal(a, b) {
    233 					panic("mismatch")
    234 				}
    235 			}
    236 
    237 			nextEmit = s
    238 			if s >= sLimit {
    239 				goto emitRemainder
    240 			}
    241 
    242 			if d > dstLimit {
    243 				// Do we have space for more, if not bail.
    244 				return 0
    245 			}
    246 			// Check for an immediate match, otherwise start search at s+1
    247 			x := load64(src, s-2)
    248 			m2Hash := hash6(x, tableBits)
    249 			currHash := hash6(x>>16, tableBits)
    250 			candidate = int(table[currHash])
    251 			table[m2Hash] = uint32(s - 2)
    252 			table[currHash] = uint32(s)
    253 			if debug && s == candidate {
    254 				panic("s == candidate")
    255 			}
    256 			if uint32(x>>16) != load32(src, candidate) {
    257 				cv = load64(src, s+1)
    258 				s++
    259 				break
    260 			}
    261 		}
    262 	}
    263 
    264 emitRemainder:
    265 	if nextEmit < len(src) {
    266 		// Bail if we exceed the maximum size.
    267 		if d+len(src)-nextEmit > dstLimit {
    268 			return 0
    269 		}
    270 		d += emitLiteral(dst[d:], src[nextEmit:])
    271 	}
    272 	return d
    273 }
    274 
    275 func encodeBlockSnappyGo(dst, src []byte) (d int) {
    276 	// Initialize the hash table.
    277 	const (
    278 		tableBits    = 14
    279 		maxTableSize = 1 << tableBits
    280 	)
    281 
    282 	var table [maxTableSize]uint32
    283 
    284 	// sLimit is when to stop looking for offset/length copies. The inputMargin
    285 	// lets us use a fast path for emitLiteral in the main loop, while we are
    286 	// looking for copies.
    287 	sLimit := len(src) - inputMargin
    288 
    289 	// Bail if we can't compress to at least this.
    290 	dstLimit := len(src) - len(src)>>5 - 5
    291 
    292 	// nextEmit is where in src the next emitLiteral should start from.
    293 	nextEmit := 0
    294 
    295 	// The encoded form must start with a literal, as there are no previous
    296 	// bytes to copy, so we start looking for hash matches at s == 1.
    297 	s := 1
    298 	cv := load64(src, s)
    299 
    300 	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
    301 	repeat := 1
    302 
    303 	for {
    304 		candidate := 0
    305 		for {
    306 			// Next src position to check
    307 			nextS := s + (s-nextEmit)>>6 + 4
    308 			if nextS > sLimit {
    309 				goto emitRemainder
    310 			}
    311 			hash0 := hash6(cv, tableBits)
    312 			hash1 := hash6(cv>>8, tableBits)
    313 			candidate = int(table[hash0])
    314 			candidate2 := int(table[hash1])
    315 			table[hash0] = uint32(s)
    316 			table[hash1] = uint32(s + 1)
    317 			hash2 := hash6(cv>>16, tableBits)
    318 
    319 			// Check repeat at offset checkRep.
    320 			const checkRep = 1
    321 			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
    322 				base := s + checkRep
    323 				// Extend back
    324 				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
    325 					i--
    326 					base--
    327 				}
    328 				d += emitLiteral(dst[d:], src[nextEmit:base])
    329 
    330 				// Extend forward
    331 				candidate := s - repeat + 4 + checkRep
    332 				s += 4 + checkRep
    333 				for s <= sLimit {
    334 					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
    335 						s += bits.TrailingZeros64(diff) >> 3
    336 						break
    337 					}
    338 					s += 8
    339 					candidate += 8
    340 				}
    341 
    342 				d += emitCopyNoRepeat(dst[d:], repeat, s-base)
    343 				nextEmit = s
    344 				if s >= sLimit {
    345 					goto emitRemainder
    346 				}
    347 
    348 				cv = load64(src, s)
    349 				continue
    350 			}
    351 
    352 			if uint32(cv) == load32(src, candidate) {
    353 				break
    354 			}
    355 			candidate = int(table[hash2])
    356 			if uint32(cv>>8) == load32(src, candidate2) {
    357 				table[hash2] = uint32(s + 2)
    358 				candidate = candidate2
    359 				s++
    360 				break
    361 			}
    362 			table[hash2] = uint32(s + 2)
    363 			if uint32(cv>>16) == load32(src, candidate) {
    364 				s += 2
    365 				break
    366 			}
    367 
    368 			cv = load64(src, nextS)
    369 			s = nextS
    370 		}
    371 
    372 		// Extend backwards
    373 		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
    374 			candidate--
    375 			s--
    376 		}
    377 
    378 		// Bail if we exceed the maximum size.
    379 		if d+(s-nextEmit) > dstLimit {
    380 			return 0
    381 		}
    382 
    383 		// A 4-byte match has been found. We'll later see if more than 4 bytes
    384 		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
    385 		// them as literal bytes.
    386 
    387 		d += emitLiteral(dst[d:], src[nextEmit:s])
    388 
    389 		// Call emitCopy, and then see if another emitCopy could be our next
    390 		// move. Repeat until we find no match for the input immediately after
    391 		// what was consumed by the last emitCopy call.
    392 		//
    393 		// If we exit this loop normally then we need to call emitLiteral next,
    394 		// though we don't yet know how big the literal will be. We handle that
    395 		// by proceeding to the next iteration of the main loop. We also can
    396 		// exit this loop via goto if we get close to exhausting the input.
    397 		for {
    398 			// Invariant: we have a 4-byte match at s, and no need to emit any
    399 			// literal bytes prior to s.
    400 			base := s
    401 			repeat = base - candidate
    402 
    403 			// Extend the 4-byte match as long as possible.
    404 			s += 4
    405 			candidate += 4
    406 			for s <= len(src)-8 {
    407 				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
    408 					s += bits.TrailingZeros64(diff) >> 3
    409 					break
    410 				}
    411 				s += 8
    412 				candidate += 8
    413 			}
    414 
    415 			d += emitCopyNoRepeat(dst[d:], repeat, s-base)
    416 			if false {
    417 				// Validate match.
    418 				a := src[base:s]
    419 				b := src[base-repeat : base-repeat+(s-base)]
    420 				if !bytes.Equal(a, b) {
    421 					panic("mismatch")
    422 				}
    423 			}
    424 
    425 			nextEmit = s
    426 			if s >= sLimit {
    427 				goto emitRemainder
    428 			}
    429 
    430 			if d > dstLimit {
    431 				// Do we have space for more, if not bail.
    432 				return 0
    433 			}
    434 			// Check for an immediate match, otherwise start search at s+1
    435 			x := load64(src, s-2)
    436 			m2Hash := hash6(x, tableBits)
    437 			currHash := hash6(x>>16, tableBits)
    438 			candidate = int(table[currHash])
    439 			table[m2Hash] = uint32(s - 2)
    440 			table[currHash] = uint32(s)
    441 			if uint32(x>>16) != load32(src, candidate) {
    442 				cv = load64(src, s+1)
    443 				s++
    444 				break
    445 			}
    446 		}
    447 	}
    448 
    449 emitRemainder:
    450 	if nextEmit < len(src) {
    451 		// Bail if we exceed the maximum size.
    452 		if d+len(src)-nextEmit > dstLimit {
    453 			return 0
    454 		}
    455 		d += emitLiteral(dst[d:], src[nextEmit:])
    456 	}
    457 	return d
    458 }
    459 
    460 // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
    461 // assumes that the varint-encoded length of the decompressed bytes has already
    462 // been written.
    463 //
    464 // It also assumes that:
    465 //
    466 //	len(dst) >= MaxEncodedLen(len(src)) &&
    467 //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
    468 func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
    469 	// Initialize the hash table.
    470 	const (
    471 		tableBits    = 14
    472 		maxTableSize = 1 << tableBits
    473 		maxAhead     = 8 // maximum bytes ahead without checking sLimit
    474 
    475 		debug = false
    476 	)
    477 	dict.initFast()
    478 
    479 	var table [maxTableSize]uint32
    480 
    481 	// sLimit is when to stop looking for offset/length copies. The inputMargin
    482 	// lets us use a fast path for emitLiteral in the main loop, while we are
    483 	// looking for copies.
    484 	sLimit := len(src) - inputMargin
    485 	if sLimit > MaxDictSrcOffset-maxAhead {
    486 		sLimit = MaxDictSrcOffset - maxAhead
    487 	}
    488 
    489 	// Bail if we can't compress to at least this.
    490 	dstLimit := len(src) - len(src)>>5 - 5
    491 
    492 	// nextEmit is where in src the next emitLiteral should start from.
    493 	nextEmit := 0
    494 
    495 	// The encoded form can start with a dict entry (copy or repeat).
    496 	s := 0
    497 
    498 	// Convert dict repeat to offset
    499 	repeat := len(dict.dict) - dict.repeat
    500 	cv := load64(src, 0)
    501 
    502 	// While in dict
    503 searchDict:
    504 	for {
    505 		// Next src position to check
    506 		nextS := s + (s-nextEmit)>>6 + 4
    507 		hash0 := hash6(cv, tableBits)
    508 		hash1 := hash6(cv>>8, tableBits)
    509 		if nextS > sLimit {
    510 			if debug {
    511 				fmt.Println("slimit reached", s, nextS)
    512 			}
    513 			break searchDict
    514 		}
    515 		candidateDict := int(dict.fastTable[hash0])
    516 		candidateDict2 := int(dict.fastTable[hash1])
    517 		candidate2 := int(table[hash1])
    518 		candidate := int(table[hash0])
    519 		table[hash0] = uint32(s)
    520 		table[hash1] = uint32(s + 1)
    521 		hash2 := hash6(cv>>16, tableBits)
    522 
    523 		// Check repeat at offset checkRep.
    524 		const checkRep = 1
    525 
    526 		if repeat > s {
    527 			candidate := len(dict.dict) - repeat + s
    528 			if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) {
    529 				// Extend back
    530 				base := s
    531 				for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
    532 					i--
    533 					base--
    534 				}
    535 				d += emitLiteral(dst[d:], src[nextEmit:base])
    536 				if debug && nextEmit != base {
    537 					fmt.Println("emitted ", base-nextEmit, "literals")
    538 				}
    539 				s += 4
    540 				candidate += 4
    541 				for candidate < len(dict.dict)-8 && s <= len(src)-8 {
    542 					if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
    543 						s += bits.TrailingZeros64(diff) >> 3
    544 						break
    545 					}
    546 					s += 8
    547 					candidate += 8
    548 				}
    549 				d += emitRepeat(dst[d:], repeat, s-base)
    550 				if debug {
    551 					fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
    552 				}
    553 				nextEmit = s
    554 				if s >= sLimit {
    555 					break searchDict
    556 				}
    557 				cv = load64(src, s)
    558 				continue
    559 			}
    560 		} else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
    561 			base := s + checkRep
    562 			// Extend back
    563 			for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
    564 				i--
    565 				base--
    566 			}
    567 			d += emitLiteral(dst[d:], src[nextEmit:base])
    568 			if debug && nextEmit != base {
    569 				fmt.Println("emitted ", base-nextEmit, "literals")
    570 			}
    571 
    572 			// Extend forward
    573 			candidate := s - repeat + 4 + checkRep
    574 			s += 4 + checkRep
    575 			for s <= sLimit {
    576 				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
    577 					s += bits.TrailingZeros64(diff) >> 3
    578 					break
    579 				}
    580 				s += 8
    581 				candidate += 8
    582 			}
    583 			if debug {
    584 				// Validate match.
    585 				if s <= candidate {
    586 					panic("s <= candidate")
    587 				}
    588 				a := src[base:s]
    589 				b := src[base-repeat : base-repeat+(s-base)]
    590 				if !bytes.Equal(a, b) {
    591 					panic("mismatch")
    592 				}
    593 			}
    594 
    595 			if nextEmit > 0 {
    596 				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
    597 				d += emitRepeat(dst[d:], repeat, s-base)
    598 			} else {
    599 				// First match, cannot be repeat.
    600 				d += emitCopy(dst[d:], repeat, s-base)
    601 			}
    602 
    603 			nextEmit = s
    604 			if s >= sLimit {
    605 				break searchDict
    606 			}
    607 			if debug {
    608 				fmt.Println("emitted reg repeat", s-base, "s:", s)
    609 			}
    610 			cv = load64(src, s)
    611 			continue searchDict
    612 		}
    613 		if s == 0 {
    614 			cv = load64(src, nextS)
    615 			s = nextS
    616 			continue searchDict
    617 		}
    618 		// Start with table. These matches will always be closer.
    619 		if uint32(cv) == load32(src, candidate) {
    620 			goto emitMatch
    621 		}
    622 		candidate = int(table[hash2])
    623 		if uint32(cv>>8) == load32(src, candidate2) {
    624 			table[hash2] = uint32(s + 2)
    625 			candidate = candidate2
    626 			s++
    627 			goto emitMatch
    628 		}
    629 
    630 		// Check dict. Dicts have longer offsets, so we want longer matches.
    631 		if cv == load64(dict.dict, candidateDict) {
    632 			table[hash2] = uint32(s + 2)
    633 			goto emitDict
    634 		}
    635 
    636 		candidateDict = int(dict.fastTable[hash2])
    637 		// Check if upper 7 bytes match
    638 		if candidateDict2 >= 1 {
    639 			if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) {
    640 				table[hash2] = uint32(s + 2)
    641 				candidateDict = candidateDict2
    642 				s++
    643 				goto emitDict
    644 			}
    645 		}
    646 
    647 		table[hash2] = uint32(s + 2)
    648 		if uint32(cv>>16) == load32(src, candidate) {
    649 			s += 2
    650 			goto emitMatch
    651 		}
    652 		if candidateDict >= 2 {
    653 			// Check if upper 6 bytes match
    654 			if cv^load64(dict.dict, candidateDict-2) < (1 << 16) {
    655 				s += 2
    656 				goto emitDict
    657 			}
    658 		}
    659 
    660 		cv = load64(src, nextS)
    661 		s = nextS
    662 		continue searchDict
    663 
    664 	emitDict:
    665 		{
    666 			if debug {
    667 				if load32(dict.dict, candidateDict) != load32(src, s) {
    668 					panic("dict emit mismatch")
    669 				}
    670 			}
    671 			// Extend backwards.
    672 			// The top bytes will be rechecked to get the full match.
    673 			for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] {
    674 				candidateDict--
    675 				s--
    676 			}
    677 
    678 			// Bail if we exceed the maximum size.
    679 			if d+(s-nextEmit) > dstLimit {
    680 				return 0
    681 			}
    682 
    683 			// A 4-byte match has been found. We'll later see if more than 4 bytes
    684 			// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
    685 			// them as literal bytes.
    686 
    687 			d += emitLiteral(dst[d:], src[nextEmit:s])
    688 			if debug && nextEmit != s {
    689 				fmt.Println("emitted ", s-nextEmit, "literals")
    690 			}
    691 			{
    692 				// Invariant: we have a 4-byte match at s, and no need to emit any
    693 				// literal bytes prior to s.
    694 				base := s
    695 				repeat = s + (len(dict.dict)) - candidateDict
    696 
    697 				// Extend the 4-byte match as long as possible.
    698 				s += 4
    699 				candidateDict += 4
    700 				for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 {
    701 					if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 {
    702 						s += bits.TrailingZeros64(diff) >> 3
    703 						break
    704 					}
    705 					s += 8
    706 					candidateDict += 8
    707 				}
    708 
    709 				// Matches longer than 64 are split.
    710 				if s <= sLimit || s-base < 8 {
    711 					d += emitCopy(dst[d:], repeat, s-base)
    712 				} else {
    713 					// Split to ensure we don't start a copy within next block
    714 					d += emitCopy(dst[d:], repeat, 4)
    715 					d += emitRepeat(dst[d:], repeat, s-base-4)
    716 				}
    717 				if false {
    718 					// Validate match.
    719 					if s <= candidate {
    720 						panic("s <= candidate")
    721 					}
    722 					a := src[base:s]
    723 					b := dict.dict[base-repeat : base-repeat+(s-base)]
    724 					if !bytes.Equal(a, b) {
    725 						panic("mismatch")
    726 					}
    727 				}
    728 				if debug {
    729 					fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s)
    730 				}
    731 				nextEmit = s
    732 				if s >= sLimit {
    733 					break searchDict
    734 				}
    735 
    736 				if d > dstLimit {
    737 					// Do we have space for more, if not bail.
    738 					return 0
    739 				}
    740 
    741 				// Index and continue loop to try new candidate.
    742 				x := load64(src, s-2)
    743 				m2Hash := hash6(x, tableBits)
    744 				currHash := hash6(x>>8, tableBits)
    745 				candidate = int(table[currHash])
    746 				table[m2Hash] = uint32(s - 2)
    747 				table[currHash] = uint32(s - 1)
    748 				cv = load64(src, s)
    749 			}
    750 			continue
    751 		}
    752 	emitMatch:
    753 
    754 		// Extend backwards.
    755 		// The top bytes will be rechecked to get the full match.
    756 		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
    757 			candidate--
    758 			s--
    759 		}
    760 
    761 		// Bail if we exceed the maximum size.
    762 		if d+(s-nextEmit) > dstLimit {
    763 			return 0
    764 		}
    765 
    766 		// A 4-byte match has been found. We'll later see if more than 4 bytes
    767 		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
    768 		// them as literal bytes.
    769 
    770 		d += emitLiteral(dst[d:], src[nextEmit:s])
    771 		if debug && nextEmit != s {
    772 			fmt.Println("emitted ", s-nextEmit, "literals")
    773 		}
    774 		// Call emitCopy, and then see if another emitCopy could be our next
    775 		// move. Repeat until we find no match for the input immediately after
    776 		// what was consumed by the last emitCopy call.
    777 		//
    778 		// If we exit this loop normally then we need to call emitLiteral next,
    779 		// though we don't yet know how big the literal will be. We handle that
    780 		// by proceeding to the next iteration of the main loop. We also can
    781 		// exit this loop via goto if we get close to exhausting the input.
    782 		for {
    783 			// Invariant: we have a 4-byte match at s, and no need to emit any
    784 			// literal bytes prior to s.
    785 			base := s
    786 			repeat = base - candidate
    787 
    788 			// Extend the 4-byte match as long as possible.
    789 			s += 4
    790 			candidate += 4
    791 			for s <= len(src)-8 {
    792 				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
    793 					s += bits.TrailingZeros64(diff) >> 3
    794 					break
    795 				}
    796 				s += 8
    797 				candidate += 8
    798 			}
    799 
    800 			d += emitCopy(dst[d:], repeat, s-base)
    801 			if debug {
    802 				// Validate match.
    803 				if s <= candidate {
    804 					panic("s <= candidate")
    805 				}
    806 				a := src[base:s]
    807 				b := src[base-repeat : base-repeat+(s-base)]
    808 				if !bytes.Equal(a, b) {
    809 					panic("mismatch")
    810 				}
    811 			}
    812 			if debug {
    813 				fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
    814 			}
    815 			nextEmit = s
    816 			if s >= sLimit {
    817 				break searchDict
    818 			}
    819 
    820 			if d > dstLimit {
    821 				// Do we have space for more, if not bail.
    822 				return 0
    823 			}
    824 			// Check for an immediate match, otherwise start search at s+1
    825 			x := load64(src, s-2)
    826 			m2Hash := hash6(x, tableBits)
    827 			currHash := hash6(x>>16, tableBits)
    828 			candidate = int(table[currHash])
    829 			table[m2Hash] = uint32(s - 2)
    830 			table[currHash] = uint32(s)
    831 			if debug && s == candidate {
    832 				panic("s == candidate")
    833 			}
    834 			if uint32(x>>16) != load32(src, candidate) {
    835 				cv = load64(src, s+1)
    836 				s++
    837 				break
    838 			}
    839 		}
    840 	}
    841 
    842 	// Search without dict:
    843 	if repeat > s {
    844 		repeat = 0
    845 	}
    846 
    847 	// No more dict
    848 	sLimit = len(src) - inputMargin
    849 	if s >= sLimit {
    850 		goto emitRemainder
    851 	}
    852 	if debug {
    853 		fmt.Println("non-dict matching at", s, "repeat:", repeat)
    854 	}
    855 	cv = load64(src, s)
    856 	if debug {
    857 		fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
    858 	}
    859 	for {
    860 		candidate := 0
    861 		for {
    862 			// Next src position to check
    863 			nextS := s + (s-nextEmit)>>6 + 4
    864 			if nextS > sLimit {
    865 				goto emitRemainder
    866 			}
    867 			hash0 := hash6(cv, tableBits)
    868 			hash1 := hash6(cv>>8, tableBits)
    869 			candidate = int(table[hash0])
    870 			candidate2 := int(table[hash1])
    871 			table[hash0] = uint32(s)
    872 			table[hash1] = uint32(s + 1)
    873 			hash2 := hash6(cv>>16, tableBits)
    874 
    875 			// Check repeat at offset checkRep.
    876 			const checkRep = 1
    877 			if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
    878 				base := s + checkRep
    879 				// Extend back
    880 				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
    881 					i--
    882 					base--
    883 				}
    884 				d += emitLiteral(dst[d:], src[nextEmit:base])
    885 				if debug && nextEmit != base {
    886 					fmt.Println("emitted ", base-nextEmit, "literals")
    887 				}
    888 				// Extend forward
    889 				candidate := s - repeat + 4 + checkRep
    890 				s += 4 + checkRep
    891 				for s <= sLimit {
    892 					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
    893 						s += bits.TrailingZeros64(diff) >> 3
    894 						break
    895 					}
    896 					s += 8
    897 					candidate += 8
    898 				}
    899 				if debug {
    900 					// Validate match.
    901 					if s <= candidate {
    902 						panic("s <= candidate")
    903 					}
    904 					a := src[base:s]
    905 					b := src[base-repeat : base-repeat+(s-base)]
    906 					if !bytes.Equal(a, b) {
    907 						panic("mismatch")
    908 					}
    909 				}
    910 				if nextEmit > 0 {
    911 					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
    912 					d += emitRepeat(dst[d:], repeat, s-base)
    913 				} else {
    914 					// First match, cannot be repeat.
    915 					d += emitCopy(dst[d:], repeat, s-base)
    916 				}
    917 				if debug {
    918 					fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s)
    919 				}
    920 				nextEmit = s
    921 				if s >= sLimit {
    922 					goto emitRemainder
    923 				}
    924 
    925 				cv = load64(src, s)
    926 				continue
    927 			}
    928 
    929 			if uint32(cv) == load32(src, candidate) {
    930 				break
    931 			}
    932 			candidate = int(table[hash2])
    933 			if uint32(cv>>8) == load32(src, candidate2) {
    934 				table[hash2] = uint32(s + 2)
    935 				candidate = candidate2
    936 				s++
    937 				break
    938 			}
    939 			table[hash2] = uint32(s + 2)
    940 			if uint32(cv>>16) == load32(src, candidate) {
    941 				s += 2
    942 				break
    943 			}
    944 
    945 			cv = load64(src, nextS)
    946 			s = nextS
    947 		}
    948 
    949 		// Extend backwards.
    950 		// The top bytes will be rechecked to get the full match.
    951 		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
    952 			candidate--
    953 			s--
    954 		}
    955 
    956 		// Bail if we exceed the maximum size.
    957 		if d+(s-nextEmit) > dstLimit {
    958 			return 0
    959 		}
    960 
    961 		// A 4-byte match has been found. We'll later see if more than 4 bytes
    962 		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
    963 		// them as literal bytes.
    964 
    965 		d += emitLiteral(dst[d:], src[nextEmit:s])
    966 		if debug && nextEmit != s {
    967 			fmt.Println("emitted ", s-nextEmit, "literals")
    968 		}
    969 		// Call emitCopy, and then see if another emitCopy could be our next
    970 		// move. Repeat until we find no match for the input immediately after
    971 		// what was consumed by the last emitCopy call.
    972 		//
    973 		// If we exit this loop normally then we need to call emitLiteral next,
    974 		// though we don't yet know how big the literal will be. We handle that
    975 		// by proceeding to the next iteration of the main loop. We also can
    976 		// exit this loop via goto if we get close to exhausting the input.
    977 		for {
    978 			// Invariant: we have a 4-byte match at s, and no need to emit any
    979 			// literal bytes prior to s.
    980 			base := s
    981 			repeat = base - candidate
    982 
    983 			// Extend the 4-byte match as long as possible.
    984 			s += 4
    985 			candidate += 4
    986 			for s <= len(src)-8 {
    987 				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
    988 					s += bits.TrailingZeros64(diff) >> 3
    989 					break
    990 				}
    991 				s += 8
    992 				candidate += 8
    993 			}
    994 
    995 			d += emitCopy(dst[d:], repeat, s-base)
    996 			if debug {
    997 				// Validate match.
    998 				if s <= candidate {
    999 					panic("s <= candidate")
   1000 				}
   1001 				a := src[base:s]
   1002 				b := src[base-repeat : base-repeat+(s-base)]
   1003 				if !bytes.Equal(a, b) {
   1004 					panic("mismatch")
   1005 				}
   1006 			}
   1007 			if debug {
   1008 				fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
   1009 			}
   1010 			nextEmit = s
   1011 			if s >= sLimit {
   1012 				goto emitRemainder
   1013 			}
   1014 
   1015 			if d > dstLimit {
   1016 				// Do we have space for more, if not bail.
   1017 				return 0
   1018 			}
   1019 			// Check for an immediate match, otherwise start search at s+1
   1020 			x := load64(src, s-2)
   1021 			m2Hash := hash6(x, tableBits)
   1022 			currHash := hash6(x>>16, tableBits)
   1023 			candidate = int(table[currHash])
   1024 			table[m2Hash] = uint32(s - 2)
   1025 			table[currHash] = uint32(s)
   1026 			if debug && s == candidate {
   1027 				panic("s == candidate")
   1028 			}
   1029 			if uint32(x>>16) != load32(src, candidate) {
   1030 				cv = load64(src, s+1)
   1031 				s++
   1032 				break
   1033 			}
   1034 		}
   1035 	}
   1036 
   1037 emitRemainder:
   1038 	if nextEmit < len(src) {
   1039 		// Bail if we exceed the maximum size.
   1040 		if d+len(src)-nextEmit > dstLimit {
   1041 			return 0
   1042 		}
   1043 		d += emitLiteral(dst[d:], src[nextEmit:])
   1044 		if debug && nextEmit != s {
   1045 			fmt.Println("emitted ", len(src)-nextEmit, "literals")
   1046 		}
   1047 	}
   1048 	return d
   1049 }