gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 752c38b0d5c01dbcc8f846518a61cbde4b7537cf
parent e1b704e06e01eb2d5996c2ebb2e3353e574e8ce7
Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Date:   Mon, 27 Feb 2023 10:21:58 +0100

[chore]: Bump github.com/minio/minio-go/v7 from 7.0.48 to 7.0.49 (#1567)

Bumps [github.com/minio/minio-go/v7](https://github.com/minio/minio-go) from 7.0.48 to 7.0.49.
- [Release notes](https://github.com/minio/minio-go/releases)
- [Commits](https://github.com/minio/minio-go/compare/v7.0.48...v7.0.49)

---
updated-dependencies:
- dependency-name: github.com/minio/minio-go/v7
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Diffstat:
Mgo.mod | 8++++----
Mgo.sum | 15++++++++-------
Mvendor/github.com/dustin/go-humanize/.travis.yml | 16++++++++--------
Mvendor/github.com/dustin/go-humanize/README.markdown | 2+-
Mvendor/github.com/dustin/go-humanize/bigbytes.go | 20++++++++++++++++++--
Mvendor/github.com/dustin/go-humanize/commaf.go | 1+
Mvendor/github.com/dustin/go-humanize/ftoa.go | 3+++
Mvendor/github.com/dustin/go-humanize/number.go | 2+-
Mvendor/github.com/dustin/go-humanize/si.go | 4++++
Mvendor/github.com/klauspost/compress/flate/deflate.go | 132+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Mvendor/github.com/klauspost/compress/flate/dict_decoder.go | 24++++++++++++------------
Mvendor/github.com/klauspost/compress/flate/fast_encoder.go | 59+++++++++++++++++++++--------------------------------------
Mvendor/github.com/klauspost/compress/flate/huffman_bit_writer.go | 20+++++++++++---------
Mvendor/github.com/klauspost/compress/flate/huffman_code.go | 15++++++++++-----
Mvendor/github.com/klauspost/compress/flate/level1.go | 27++++++++++++++-------------
Mvendor/github.com/klauspost/compress/flate/level2.go | 35++++++++++++++++++-----------------
Mvendor/github.com/klauspost/compress/flate/level3.go | 41+++++++++++++++++++++--------------------
Mvendor/github.com/klauspost/compress/flate/level4.go | 11++++++-----
Mvendor/github.com/klauspost/compress/flate/level5.go | 28++++++++++++++++++----------
Mvendor/github.com/klauspost/compress/flate/level6.go | 30++++++++++++++++++++----------
Mvendor/github.com/klauspost/compress/flate/stateless.go | 19++++++++++++++++---
Mvendor/github.com/klauspost/compress/s2/README.md | 283++++++++++++++++++++++++++++++++++++-------------------------------------------
Mvendor/github.com/klauspost/compress/s2/decode.go | 6+++++-
Mvendor/github.com/klauspost/compress/s2/decode_other.go | 34+++++++++++++++++++++++-----------
Mvendor/github.com/klauspost/compress/s2/encode_all.go | 3++-
Mvendor/github.com/klauspost/compress/s2/encode_amd64.go | 12++++++++----
Mvendor/github.com/klauspost/compress/s2/encode_best.go | 35++++++++++++++++++++++++++++-------
Mvendor/github.com/klauspost/compress/s2/encode_better.go | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Mvendor/github.com/klauspost/compress/s2/encode_go.go | 9++++++++-
Mvendor/github.com/klauspost/compress/s2/encodeblock_amd64.go | 23+++++++++++++----------
Mvendor/github.com/klauspost/compress/s2/encodeblock_amd64.s | 919++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mvendor/github.com/klauspost/cpuid/v2/README.md | 229+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mvendor/github.com/klauspost/cpuid/v2/cpuid.go | 146++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Mvendor/github.com/klauspost/cpuid/v2/featureid_string.go | 363+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Mvendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go | 2+-
Mvendor/github.com/minio/minio-go/v7/api-put-object.go | 28++++++++++++++++++++++++++++
Mvendor/github.com/minio/minio-go/v7/api.go | 2+-
Mvendor/modules.txt | 12++++++------
38 files changed, 1698 insertions(+), 1025 deletions(-)

diff --git a/go.mod b/go.mod @@ -35,7 +35,7 @@ require ( github.com/jackc/pgx/v4 v4.17.2 github.com/microcosm-cc/bluemonday v1.0.22 github.com/miekg/dns v1.1.50 - github.com/minio/minio-go/v7 v7.0.48 + github.com/minio/minio-go/v7 v7.0.49 github.com/mitchellh/mapstructure v1.5.0 github.com/oklog/ulid v1.3.1 github.com/spf13/cobra v1.6.1 @@ -89,7 +89,7 @@ require ( github.com/dsoprea/go-photoshop-info-format v0.0.0-20200610045659-121dd752914d // indirect github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d // indirect github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect - github.com/dustin/go-humanize v1.0.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-errors/errors v1.4.1 // indirect @@ -118,8 +118,8 @@ require ( github.com/jinzhu/inflection v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect - github.com/klauspost/compress v1.15.9 // indirect - github.com/klauspost/cpuid/v2 v2.1.1 // indirect + github.com/klauspost/compress v1.15.15 // indirect + github.com/klauspost/cpuid/v2 v2.2.3 // indirect github.com/leodido/go-urn v1.2.1 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mattn/go-isatty v0.0.17 // indirect diff --git a/go.sum b/go.sum @@ -161,8 +161,9 @@ github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d/ github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU= -github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -398,13 +399,13 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= -github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw= +github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.1.1 h1:t0wUqjowdm8ezddV5k0tLWVklVuvLJpoHeb4WBdydm0= -github.com/klauspost/cpuid/v2 v2.1.1/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= +github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= @@ -443,8 +444,8 @@ github.com/miekg/dns v1.1.50 h1:DQUfb9uc6smULcREF09Uc+/Gd46YWqJd5DbpPE9xkcA= github.com/miekg/dns v1.1.50/go.mod h1:e3IlAVfNqAllflbibAZEWOXOQ+Ynzk/dDozDxY7XnME= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= -github.com/minio/minio-go/v7 v7.0.48 h1:VQtYB/2xHW2SlxqhjRlDpvSiSOfGlyFlXZF1EHARPHM= -github.com/minio/minio-go/v7 v7.0.48/go.mod h1:nCrRzjoSUQh8hgKKtu3Y708OLvRLtuASMg2/nvmbarw= +github.com/minio/minio-go/v7 v7.0.49 h1:dE5DfOtnXMXCjr/HWI6zN9vCrY6Sv666qhhiwUMvGV4= +github.com/minio/minio-go/v7 v7.0.49/go.mod h1:UI34MvQEiob3Cf/gGExGMmzugkM/tNgbFypNDy5LMVc= github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4= diff --git a/vendor/github.com/dustin/go-humanize/.travis.yml b/vendor/github.com/dustin/go-humanize/.travis.yml @@ -1,12 +1,12 @@ sudo: false language: go +go_import_path: github.com/dustin/go-humanize go: - - 1.3.x - - 1.5.x - - 1.6.x - - 1.7.x - - 1.8.x - - 1.9.x + - 1.13.x + - 1.14.x + - 1.15.x + - 1.16.x + - stable - master matrix: allow_failures: @@ -15,7 +15,7 @@ matrix: install: - # Do nothing. This is needed to prevent default install action "go get -t -v ./..." from happening here (we want it to happen inside script step). script: - - go get -t -v ./... - diff -u <(echo -n) <(gofmt -d -s .) - - go tool vet . + - go vet . + - go install -v -race ./... - go test -v -race ./... diff --git a/vendor/github.com/dustin/go-humanize/README.markdown b/vendor/github.com/dustin/go-humanize/README.markdown @@ -5,7 +5,7 @@ Just a few functions for helping humanize times and sizes. `go get` it as `github.com/dustin/go-humanize`, import it as `"github.com/dustin/go-humanize"`, use it as `humanize`. -See [godoc](https://godoc.org/github.com/dustin/go-humanize) for +See [godoc](https://pkg.go.dev/github.com/dustin/go-humanize) for complete documentation. ## Sizes diff --git a/vendor/github.com/dustin/go-humanize/bigbytes.go b/vendor/github.com/dustin/go-humanize/bigbytes.go @@ -28,6 +28,10 @@ var ( BigZiByte = (&big.Int{}).Mul(BigEiByte, bigIECExp) // BigYiByte is 1,024 z bytes in bit.Ints BigYiByte = (&big.Int{}).Mul(BigZiByte, bigIECExp) + // BigRiByte is 1,024 y bytes in bit.Ints + BigRiByte = (&big.Int{}).Mul(BigYiByte, bigIECExp) + // BigQiByte is 1,024 r bytes in bit.Ints + BigQiByte = (&big.Int{}).Mul(BigRiByte, bigIECExp) ) var ( @@ -51,6 +55,10 @@ var ( BigZByte = (&big.Int{}).Mul(BigEByte, bigSIExp) // BigYByte is 1,000 SI z bytes in big.Ints BigYByte = (&big.Int{}).Mul(BigZByte, bigSIExp) + // BigRByte is 1,000 SI y bytes in big.Ints + BigRByte = (&big.Int{}).Mul(BigYByte, bigSIExp) + // BigQByte is 1,000 SI r bytes in big.Ints + BigQByte = (&big.Int{}).Mul(BigRByte, bigSIExp) ) var bigBytesSizeTable = map[string]*big.Int{ @@ -71,6 +79,10 @@ var bigBytesSizeTable = map[string]*big.Int{ "zb": BigZByte, "yib": BigYiByte, "yb": BigYByte, + "rib": BigRiByte, + "rb": BigRByte, + "qib": BigQiByte, + "qb": BigQByte, // Without suffix "": BigByte, "ki": BigKiByte, @@ -89,6 +101,10 @@ var bigBytesSizeTable = map[string]*big.Int{ "zi": BigZiByte, "y": BigYByte, "yi": BigYiByte, + "r": BigRByte, + "ri": BigRiByte, + "q": BigQByte, + "qi": BigQiByte, } var ten = big.NewInt(10) @@ -115,7 +131,7 @@ func humanateBigBytes(s, base *big.Int, sizes []string) string { // // BigBytes(82854982) -> 83 MB func BigBytes(s *big.Int) string { - sizes := []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"} + sizes := []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", "RB", "QB"} return humanateBigBytes(s, bigSIExp, sizes) } @@ -125,7 +141,7 @@ func BigBytes(s *big.Int) string { // // BigIBytes(82854982) -> 79 MiB func BigIBytes(s *big.Int) string { - sizes := []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} + sizes := []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", "RiB", "QiB"} return humanateBigBytes(s, bigIECExp, sizes) } diff --git a/vendor/github.com/dustin/go-humanize/commaf.go b/vendor/github.com/dustin/go-humanize/commaf.go @@ -1,3 +1,4 @@ +//go:build go1.6 // +build go1.6 package humanize diff --git a/vendor/github.com/dustin/go-humanize/ftoa.go b/vendor/github.com/dustin/go-humanize/ftoa.go @@ -6,6 +6,9 @@ import ( ) func stripTrailingZeros(s string) string { + if !strings.ContainsRune(s, '.') { + return s + } offset := len(s) - 1 for offset > 0 { if s[offset] == '.' { diff --git a/vendor/github.com/dustin/go-humanize/number.go b/vendor/github.com/dustin/go-humanize/number.go @@ -73,7 +73,7 @@ func FormatFloat(format string, n float64) string { if n > math.MaxFloat64 { return "Infinity" } - if n < -math.MaxFloat64 { + if n < (0.0 - math.MaxFloat64) { return "-Infinity" } diff --git a/vendor/github.com/dustin/go-humanize/si.go b/vendor/github.com/dustin/go-humanize/si.go @@ -8,6 +8,8 @@ import ( ) var siPrefixTable = map[float64]string{ + -30: "q", // quecto + -27: "r", // ronto -24: "y", // yocto -21: "z", // zepto -18: "a", // atto @@ -25,6 +27,8 @@ var siPrefixTable = map[float64]string{ 18: "E", // exa 21: "Z", // zetta 24: "Y", // yotta + 27: "R", // ronna + 30: "Q", // quetta } var revSIPrefixTable = revfmap(siPrefixTable) diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -131,7 +131,8 @@ func (d *compressor) fillDeflate(b []byte) int { s := d.state if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { // shift the window by windowSize - copy(d.window[:], d.window[windowSize:2*windowSize]) + //copy(d.window[:], d.window[windowSize:2*windowSize]) + *(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:]) s.index -= windowSize d.windowEnd -= windowSize if d.blockStart >= windowSize { @@ -293,7 +294,6 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of } offset = 0 - cGain := 0 if d.chain < 100 { for i := prevHead; tries > 0; tries-- { if wEnd == win[i+length] { @@ -321,10 +321,14 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of return } + // Minimum gain to accept a match. + cGain := 4 + // Some like it higher (CSV), some like it lower (JSON) - const baseCost = 6 + const baseCost = 3 // Base is 4 bytes at with an additional cost. // Matches must be better than this. + for i := prevHead; tries > 0; tries-- { if wEnd == win[i+length] { n := matchLen(win[i:i+minMatchLook], wPos) @@ -332,7 +336,7 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of // Calculate gain. Estimate newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) - //fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n])) + //fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length) if newGain > cGain { length = n offset = pos - i @@ -373,6 +377,12 @@ func hash4(b []byte) uint32 { return hash4u(binary.LittleEndian.Uint32(b), hashBits) } +// hash4 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4u(u uint32, h uint8) uint32 { + return (u * prime4bytes) >> (32 - h) +} + // bulkHash4 will compute hashes using the same // algorithm as hash4 func bulkHash4(b []byte, dst []uint32) { @@ -483,27 +493,103 @@ func (d *compressor) deflateLazy() { } if prevLength >= minMatchLength && s.length <= prevLength { - // Check for better match at end... + // No better match, but check for better match at end... // - // checkOff must be >=2 since we otherwise risk checking s.index - // Offset of 2 seems to yield best results. + // Skip forward a number of bytes. + // Offset of 2 seems to yield best results. 3 is sometimes better. const checkOff = 2 - prevIndex := s.index - 1 - if prevIndex+prevLength+checkOff < s.maxInsertIndex { - end := lookahead - if lookahead > maxMatchLength { - end = maxMatchLength - } - end += prevIndex - idx := prevIndex + prevLength - (4 - checkOff) - h := hash4(d.window[idx:]) - ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + (4 - checkOff) - if ch2 > minIndex { - length := matchLen(d.window[prevIndex:end], d.window[ch2:]) - // It seems like a pure length metric is best. - if length > prevLength { - prevLength = length - prevOffset = prevIndex - ch2 + + // Check all, except full length + if prevLength < maxMatchLength-checkOff { + prevIndex := s.index - 1 + if prevIndex+prevLength < s.maxInsertIndex { + end := lookahead + if lookahead > maxMatchLength+checkOff { + end = maxMatchLength + checkOff + } + end += prevIndex + + // Hash at match end. + h := hash4(d.window[prevIndex+prevLength:]) + ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { + length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) + // It seems like a pure length metric is best. + if length > prevLength { + prevLength = length + prevOffset = prevIndex - ch2 + + // Extend back... + for i := checkOff - 1; i >= 0; i-- { + if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] { + // Emit tokens we "owe" + for j := 0; j <= i; j++ { + d.tokens.AddLiteral(d.window[prevIndex+j]) + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } + } + break + } else { + prevLength++ + } + } + } else if false { + // Check one further ahead. + // Only rarely better, disabled for now. + prevIndex++ + h := hash4(d.window[prevIndex+prevLength:]) + ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { + length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) + // It seems like a pure length metric is best. + if length > prevLength+checkOff { + prevLength = length + prevOffset = prevIndex - ch2 + prevIndex-- + + // Extend back... + for i := checkOff; i >= 0; i-- { + if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] { + // Emit tokens we "owe" + for j := 0; j <= i; j++ { + d.tokens.AddLiteral(d.window[prevIndex+j]) + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } + } + break + } else { + prevLength++ + } + } + } + } + } } } } diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go @@ -7,19 +7,19 @@ package flate // dictDecoder implements the LZ77 sliding dictionary as used in decompression. // LZ77 decompresses data through sequences of two forms of commands: // -// * Literal insertions: Runs of one or more symbols are inserted into the data -// stream as is. This is accomplished through the writeByte method for a -// single symbol, or combinations of writeSlice/writeMark for multiple symbols. -// Any valid stream must start with a literal insertion if no preset dictionary -// is used. +// - Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. // -// * Backward copies: Runs of one or more symbols are copied from previously -// emitted data. Backward copies come as the tuple (dist, length) where dist -// determines how far back in the stream to copy from and length determines how -// many bytes to copy. Note that it is valid for the length to be greater than -// the distance. Since LZ77 uses forward copies, that situation is used to -// perform a form of run-length encoding on repeated runs of symbols. -// The writeCopy and tryWriteCopy are used to implement this command. +// - Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. // // For performance reasons, this implementation performs little to no sanity // checks about the arguments. As such, the invariants documented for each diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -58,17 +58,6 @@ const ( prime8bytes = 0xcf1bbcdcb7a56463 ) -func load32(b []byte, i int) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 -} - -func load64(b []byte, i int) uint64 { - return binary.LittleEndian.Uint64(b[i:]) -} - func load3232(b []byte, i int32) uint32 { return binary.LittleEndian.Uint32(b[i:]) } @@ -77,10 +66,6 @@ func load6432(b []byte, i int32) uint64 { return binary.LittleEndian.Uint64(b[i:]) } -func hash(u uint32) uint32 { - return (u * 0x1e35a7bd) >> tableShift -} - type tableEntry struct { offset int32 } @@ -104,7 +89,8 @@ func (e *fastGen) addBlock(src []byte) int32 { } // Move down offset := int32(len(e.hist)) - maxMatchOffset - copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + // copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + *(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:]) e.cur += offset e.hist = e.hist[:maxMatchOffset] } @@ -114,39 +100,36 @@ func (e *fastGen) addBlock(src []byte) int32 { return s } -// hash4 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4u(u uint32, h uint8) uint32 { - return (u * prime4bytes) >> (32 - h) -} - type tableEntryPrev struct { Cur tableEntry Prev tableEntry } -// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4x64(u uint64, h uint8) uint32 { - return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32) -} - // hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. // Preferably h should be a constant and should always be <64. func hash7(u uint64, h uint8) uint32 { return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64)) } -// hash8 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash8(u uint64, h uint8) uint32 { - return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64)) -} - -// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash6(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64)) +// hashLen returns a hash of the lowest mls bytes of with length output bits. +// mls must be >=3 and <=8. Any other value will return hash for 4 bytes. +// length should always be < 32. +// Preferably length and mls should be a constant for inlining. +func hashLen(u uint64, length, mls uint8) uint32 { + switch mls { + case 3: + return (uint32(u<<8) * prime3bytes) >> (32 - length) + case 5: + return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length)) + case 6: + return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length)) + case 7: + return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length)) + case 8: + return uint32((u * prime8bytes) >> (64 - length)) + default: + return (uint32(u) * prime4bytes) >> (32 - length) + } } // matchlen will return the match length between offsets and t in src. diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -265,9 +265,9 @@ func (w *huffmanBitWriter) writeBytes(bytes []byte) { // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional // information. Code badCode is an end marker // -// numLiterals The number of literals in literalEncoding -// numOffsets The number of offsets in offsetEncoding -// litenc, offenc The literal and offset encoder to use +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +// litenc, offenc The literal and offset encoder to use func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { for i := range w.codegenFreq { w.codegenFreq[i] = 0 @@ -460,9 +460,9 @@ func (w *huffmanBitWriter) writeOutBits() { // Write the header of a dynamic Huffman block to the output stream. // -// numLiterals The number of literals specified in codegen -// numOffsets The number of offsets specified in codegen -// numCodegens The number of codegens used in codegen +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { if w.err != nil { return @@ -790,9 +790,11 @@ func (w *huffmanBitWriter) fillTokens() { // and offsetEncoding. // The number of literal and offset tokens is returned. func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { - copy(w.literalFreq[:], t.litHist[:]) - copy(w.literalFreq[256:], t.extraHist[:]) - copy(w.offsetFreq[:], t.offHist[:offsetCodeCount]) + //copy(w.literalFreq[:], t.litHist[:]) + *(*[256]uint16)(w.literalFreq[:]) = t.litHist + //copy(w.literalFreq[256:], t.extraHist[:]) + *(*[32]uint16)(w.literalFreq[256:]) = t.extraHist + w.offsetFreq = t.offHist if t.n == 0 { return diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -168,13 +168,18 @@ func (h *huffmanEncoder) canReuseBits(freq []uint16) int { // The cases of 0, 1, and 2 literals are handled by special case code. // // list An array of the literals with non-zero frequencies -// and their associated frequencies. The array is in order of increasing -// frequency, and has as its last element a special element with frequency -// MaxInt32 +// +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// // maxBits The maximum number of bits that should be used to encode any literal. -// Must be less than 16. +// +// Must be less than 16. +// // return An integer array in which array[i] indicates the number of literals -// that should be encoded in i bits. +// +// that should be encoded in i bits. func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { if maxBits >= maxBitsLimit { panic("flate: maxBits too large") diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go @@ -19,6 +19,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashBytes = 5 ) if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) @@ -68,7 +69,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { sLimit := int32(len(src) - inputMargin) // nextEmit is where in src the next emitLiteral should start from. - cv := load3232(src, s) + cv := load6432(src, s) for { const skipLog = 5 @@ -77,7 +78,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { nextS := s var candidate tableEntry for { - nextHash := hash(cv) + nextHash := hashLen(cv, tableBits, hashBytes) candidate = e.table[nextHash] nextS = s + doEvery + (s-nextEmit)>>skipLog if nextS > sLimit { @@ -86,16 +87,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { now := load6432(src, nextS) e.table[nextHash] = tableEntry{offset: s + e.cur} - nextHash = hash(uint32(now)) + nextHash = hashLen(now, tableBits, hashBytes) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } // Do one right away... - cv = uint32(now) + cv = now s = nextS nextS++ candidate = e.table[nextHash] @@ -103,11 +104,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } - cv = uint32(now) + cv = now s = nextS } @@ -198,9 +199,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { } if s >= sLimit { // Index first pair after match end. - if int(s+l+4) < len(src) { - cv := load3232(src, s) - e.table[hash(cv)] = tableEntry{offset: s + e.cur} + if int(s+l+8) < len(src) { + cv := load6432(src, s) + e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -213,16 +214,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { // three load32 calls. x := load6432(src, s-2) o := e.cur + s - 2 - prevHash := hash(uint32(x)) + prevHash := hashLen(x, tableBits, hashBytes) e.table[prevHash] = tableEntry{offset: o} x >>= 16 - currHash := hash(uint32(x)) + currHash := hashLen(x, tableBits, hashBytes) candidate = e.table[currHash] e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { - cv = uint32(x >> 8) + cv = x >> 8 s++ break } diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go @@ -16,6 +16,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashBytes = 5 ) if debugDeflate && e.cur < 0 { @@ -66,7 +67,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { sLimit := int32(len(src) - inputMargin) // nextEmit is where in src the next emitLiteral should start from. - cv := load3232(src, s) + cv := load6432(src, s) for { // When should we start skipping if we haven't found matches in a long while. const skipLog = 5 @@ -75,7 +76,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { nextS := s var candidate tableEntry for { - nextHash := hash4u(cv, bTableBits) + nextHash := hashLen(cv, bTableBits, hashBytes) s = nextS nextS = s + doEvery + (s-nextEmit)>>skipLog if nextS > sLimit { @@ -84,16 +85,16 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { candidate = e.table[nextHash] now := load6432(src, nextS) e.table[nextHash] = tableEntry{offset: s + e.cur} - nextHash = hash4u(uint32(now), bTableBits) + nextHash = hashLen(now, bTableBits, hashBytes) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } // Do one right away... - cv = uint32(now) + cv = now s = nextS nextS++ candidate = e.table[nextHash] @@ -101,10 +102,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { break } - cv = uint32(now) + cv = now } // A 4-byte match has been found. We'll later see if more than 4 bytes @@ -154,9 +155,9 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { if s >= sLimit { // Index first pair after match end. - if int(s+l+4) < len(src) { - cv := load3232(src, s) - e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur} + if int(s+l+8) < len(src) { + cv := load6432(src, s) + e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -164,15 +165,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { // Store every second hash in-between, but offset by 1. for i := s - l + 2; i < s-5; i += 7 { x := load6432(src, i) - nextHash := hash4u(uint32(x), bTableBits) + nextHash := hashLen(x, bTableBits, hashBytes) e.table[nextHash] = tableEntry{offset: e.cur + i} // Skip one x >>= 16 - nextHash = hash4u(uint32(x), bTableBits) + nextHash = hashLen(x, bTableBits, hashBytes) e.table[nextHash] = tableEntry{offset: e.cur + i + 2} // Skip one x >>= 16 - nextHash = hash4u(uint32(x), bTableBits) + nextHash = hashLen(x, bTableBits, hashBytes) e.table[nextHash] = tableEntry{offset: e.cur + i + 4} } @@ -184,17 +185,17 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { // three load32 calls. x := load6432(src, s-2) o := e.cur + s - 2 - prevHash := hash4u(uint32(x), bTableBits) - prevHash2 := hash4u(uint32(x>>8), bTableBits) + prevHash := hashLen(x, bTableBits, hashBytes) + prevHash2 := hashLen(x>>8, bTableBits, hashBytes) e.table[prevHash] = tableEntry{offset: o} e.table[prevHash2] = tableEntry{offset: o + 1} - currHash := hash4u(uint32(x>>16), bTableBits) + currHash := hashLen(x>>16, bTableBits, hashBytes) candidate = e.table[currHash] e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { - cv = uint32(x >> 24) + cv = x >> 24 s++ break } diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go @@ -11,10 +11,11 @@ type fastEncL3 struct { // Encode uses a similar algorithm to level 2, will check up to two candidates. func (e *fastEncL3) Encode(dst *tokens, src []byte) { const ( - inputMargin = 8 - 1 + inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin tableBits = 16 tableSize = 1 << tableBits + hashBytes = 5 ) if debugDeflate && e.cur < 0 { @@ -69,20 +70,20 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { sLimit := int32(len(src) - inputMargin) // nextEmit is where in src the next emitLiteral should start from. - cv := load3232(src, s) + cv := load6432(src, s) for { - const skipLog = 6 + const skipLog = 7 nextS := s var candidate tableEntry for { - nextHash := hash4u(cv, tableBits) + nextHash := hashLen(cv, tableBits, hashBytes) s = nextS nextS = s + 1 + (s-nextEmit)>>skipLog if nextS > sLimit { goto emitRemainder } candidates := e.table[nextHash] - now := load3232(src, nextS) + now := load6432(src, nextS) // Safe offset distance until s + 4... minOffset := e.cur + s - (maxMatchOffset - 4) @@ -96,8 +97,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { continue } - if cv == load3232(src, candidate.offset-e.cur) { - if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) { + if uint32(cv) == load3232(src, candidate.offset-e.cur) { + if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) { break } // Both match and are valid, pick longest. @@ -112,7 +113,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { // We only check if value mismatches. // Offset will always be invalid in other cases. candidate = candidates.Prev - if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { break } } @@ -164,9 +165,9 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { if s >= sLimit { t += l // Index first pair after match end. - if int(t+4) < len(src) && t > 0 { - cv := load3232(src, t) - nextHash := hash4u(cv, tableBits) + if int(t+8) < len(src) && t > 0 { + cv = load6432(src, t) + nextHash := hashLen(cv, tableBits, hashBytes) e.table[nextHash] = tableEntryPrev{ Prev: e.table[nextHash].Cur, Cur: tableEntry{offset: e.cur + t}, @@ -176,8 +177,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { } // Store every 5th hash in-between. - for i := s - l + 2; i < s-5; i += 5 { - nextHash := hash4u(load3232(src, i), tableBits) + for i := s - l + 2; i < s-5; i += 6 { + nextHash := hashLen(load6432(src, i), tableBits, hashBytes) e.table[nextHash] = tableEntryPrev{ Prev: e.table[nextHash].Cur, Cur: tableEntry{offset: e.cur + i}} @@ -185,23 +186,23 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { // We could immediately start working at s now, but to improve // compression we first update the hash table at s-2 to s. x := load6432(src, s-2) - prevHash := hash4u(uint32(x), tableBits) + prevHash := hashLen(x, tableBits, hashBytes) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, Cur: tableEntry{offset: e.cur + s - 2}, } x >>= 8 - prevHash = hash4u(uint32(x), tableBits) + prevHash = hashLen(x, tableBits, hashBytes) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, Cur: tableEntry{offset: e.cur + s - 1}, } x >>= 8 - currHash := hash4u(uint32(x), tableBits) + currHash := hashLen(x, tableBits, hashBytes) candidates := e.table[currHash] - cv = uint32(x) + cv = x e.table[currHash] = tableEntryPrev{ Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}, @@ -212,17 +213,17 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { minOffset := e.cur + s - (maxMatchOffset - 4) if candidate.offset > minOffset { - if cv == load3232(src, candidate.offset-e.cur) { + if uint32(cv) == load3232(src, candidate.offset-e.cur) { // Found a match... continue } candidate = candidates.Prev - if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { // Match at prev... continue } } - cv = uint32(x >> 8) + cv = x >> 8 s++ break } diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go @@ -12,6 +12,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 ) if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) @@ -80,7 +81,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { nextS := s var t int32 for { - nextHashS := hash4x64(cv, tableBits) + nextHashS := hashLen(cv, tableBits, hashShortBytes) nextHashL := hash7(cv, tableBits) s = nextS @@ -168,7 +169,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+8) < len(src) { cv := load6432(src, s) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur} e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} } goto emitRemainder @@ -183,7 +184,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 i += 3 for ; i < s-1; i += 3 { @@ -192,7 +193,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 } } } @@ -201,7 +202,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { // compression we first update the hash table at s-1 and at s. x := load6432(src, s-1) o := e.cur + s - 1 - prevHashS := hash4x64(x, tableBits) + prevHashS := hashLen(x, tableBits, hashShortBytes) prevHashL := hash7(x, tableBits) e.table[prevHashS] = tableEntry{offset: o} e.bTable[prevHashL] = tableEntry{offset: o} diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go @@ -12,6 +12,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 ) if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) @@ -88,7 +89,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { var l int32 var t int32 for { - nextHashS := hash4x64(cv, tableBits) + nextHashS := hashLen(cv, tableBits, hashShortBytes) nextHashL := hash7(cv, tableBits) s = nextS @@ -105,7 +106,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { eLong := &e.bTable[nextHashL] eLong.Cur, eLong.Prev = entry, eLong.Cur - nextHashS = hash4x64(next, tableBits) + nextHashS = hashLen(next, tableBits, hashShortBytes) nextHashL = hash7(next, tableBits) t = lCandidate.Cur.offset - e.cur @@ -191,14 +192,21 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { // Try to locate a better match by checking the end of best match... if sAt := s + l; l < 30 && sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset - // Test current - t2 := eLong - e.cur - l - off := s - t2 + t2 := eLong - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 if t2 >= 0 && off < maxMatchOffset && off > 0 { - if l2 := e.matchlenLong(s, t2, src); l2 > l { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { t = t2 l = l2 + s = s2 } } } @@ -250,7 +258,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { if i < s-1 { cv := load6432(src, i) t := tableEntry{offset: i + e.cur} - e.table[hash4x64(cv, tableBits)] = t + e.table[hashLen(cv, tableBits, hashShortBytes)] = t eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur @@ -263,7 +271,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { // We only have enough bits for a short entry at i+2 cv >>= 8 t = tableEntry{offset: t.offset + 1} - e.table[hash4x64(cv, tableBits)] = t + e.table[hashLen(cv, tableBits, hashShortBytes)] = t // Skip one - otherwise we risk hitting 's' i += 4 @@ -273,7 +281,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur - e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 } } } @@ -282,7 +290,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { // compression we first update the hash table at s-1 and at s. x := load6432(src, s-1) o := e.cur + s - 1 - prevHashS := hash4x64(x, tableBits) + prevHashS := hashLen(x, tableBits, hashShortBytes) prevHashL := hash7(x, tableBits) e.table[prevHashS] = tableEntry{offset: o} eLong := &e.bTable[prevHashL] diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go @@ -12,6 +12,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 ) if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) @@ -90,7 +91,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { var l int32 var t int32 for { - nextHashS := hash4x64(cv, tableBits) + nextHashS := hashLen(cv, tableBits, hashShortBytes) nextHashL := hash7(cv, tableBits) s = nextS nextS = s + doEvery + (s-nextEmit)>>skipLog @@ -107,7 +108,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { eLong.Cur, eLong.Prev = entry, eLong.Cur // Calculate hashes of 'next' - nextHashS = hash4x64(next, tableBits) + nextHashS = hashLen(next, tableBits, hashShortBytes) nextHashL = hash7(next, tableBits) t = lCandidate.Cur.offset - e.cur @@ -213,24 +214,33 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // Try to locate a better match by checking the end-of-match... if sAt := s + l; sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] // Test current - t2 := eLong.Cur.offset - e.cur - l - off := s - t2 + t2 := eLong.Cur.offset - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 if off < maxMatchOffset { if off > 0 && t2 >= 0 { - if l2 := e.matchlenLong(s, t2, src); l2 > l { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { t = t2 l = l2 + s = s2 } } // Test next: - t2 = eLong.Prev.offset - e.cur - l - off := s - t2 + t2 = eLong.Prev.offset - e.cur - l + skipBeginning + off := s2 - t2 if off > 0 && off < maxMatchOffset && t2 >= 0 { - if l2 := e.matchlenLong(s, t2, src); l2 > l { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { t = t2 l = l2 + s = s2 } } } @@ -277,7 +287,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // Index after match end. for i := nextS + 1; i < int32(len(src))-8; i += 2 { cv := load6432(src, i) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur} eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur } @@ -292,7 +302,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong2 := &e.bTable[hash7(cv>>8, tableBits)] - e.table[hash4x64(cv, tableBits)] = t + e.table[hashLen(cv, tableBits, hashShortBytes)] = t eLong.Cur, eLong.Prev = t, eLong.Cur eLong2.Cur, eLong2.Prev = t2, eLong2.Cur } diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go @@ -86,11 +86,19 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { dict = dict[len(dict)-maxStatelessDict:] } + // For subsequent loops, keep shallow dict reference to avoid alloc+copy. + var inDict []byte + for len(in) > 0 { todo := in - if len(todo) > maxStatelessBlock-len(dict) { + if len(inDict) > 0 { + if len(todo) > maxStatelessBlock-maxStatelessDict { + todo = todo[:maxStatelessBlock-maxStatelessDict] + } + } else if len(todo) > maxStatelessBlock-len(dict) { todo = todo[:maxStatelessBlock-len(dict)] } + inOrg := in in = in[len(todo):] uncompressed := todo if len(dict) > 0 { @@ -102,7 +110,11 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { todo = combined } // Compress - statelessEnc(&dst, todo, int16(len(dict))) + if len(inDict) == 0 { + statelessEnc(&dst, todo, int16(len(dict))) + } else { + statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict) + } isEof := eof && len(in) == 0 if dst.n == 0 { @@ -119,7 +131,8 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { } if len(in) > 0 { // Retain a dict if we have more - dict = todo[len(todo)-maxStatelessDict:] + inDict = inOrg[len(uncompressed)-maxStatelessDict:] + dict = nil dst.Reset() } if bw.err != nil { diff --git a/vendor/github.com/klauspost/compress/s2/README.md b/vendor/github.com/klauspost/compress/s2/README.md @@ -325,35 +325,35 @@ The content compressed in this mode is fully compatible with the standard decode Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all threads and a single thread (1 CPU): -| File | S2 speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller | -|-----------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------| -| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 12.70x | 10556 MB/s | 7.35% | 4.15x | 3455 MB/s | 12.79% | -| (1 CPU) | 1.14x | 948 MB/s | - | 0.42x | 349 MB/s | - | -| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 17.13x | 14484 MB/s | 31.60% | 10.09x | 8533 MB/s | 37.71% | -| (1 CPU) | 1.33x | 1127 MB/s | - | 0.70x | 589 MB/s | - | -| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12000 MB/s | -5.79% | 6.59x | 5223 MB/s | 5.80% | -| (1 CPU) | 1.11x | 877 MB/s | - | 0.47x | 370 MB/s | - | -| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 14.62x | 12116 MB/s | 15.90% | 5.35x | 4430 MB/s | 16.08% | -| (1 CPU) | 1.38x | 1146 MB/s | - | 0.38x | 312 MB/s | - | -| [adresser.json](https://files.klauspost.com/compress/adresser.json.zst) | 8.83x | 17579 MB/s | 43.86% | 6.54x | 13011 MB/s | 47.23% | -| (1 CPU) | 1.14x | 2259 MB/s | - | 0.74x | 1475 MB/s | - | -| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 16.72x | 14019 MB/s | 24.02% | 10.11x | 8477 MB/s | 30.48% | -| (1 CPU) | 1.24x | 1043 MB/s | - | 0.70x | 586 MB/s | - | -| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9254 MB/s | 1.84% | 6.75x | 4686 MB/s | 6.72% | -| (1 CPU) | 0.97x | 672 MB/s | - | 0.53x | 366 MB/s | - | -| sharnd.out.2gb | 2.11x | 12639 MB/s | 0.01% | 1.98x | 11833 MB/s | 0.01% | -| (1 CPU) | 0.93x | 5594 MB/s | - | 1.34x | 8030 MB/s | - | -| [enwik9](http://mattmahoney.net/dc/textdata.html) | 19.34x | 8220 MB/s | 3.98% | 7.87x | 3345 MB/s | 15.82% | -| (1 CPU) | 1.06x | 452 MB/s | - | 0.50x | 213 MB/s | - | -| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 10.48x | 6124 MB/s | 5.67% | 3.76x | 2197 MB/s | 12.60% | -| (1 CPU) | 0.97x | 568 MB/s | - | 0.46x | 271 MB/s | - | -| [enwik10](https://encode.su/threads/3315-enwik10-benchmark-results) | 21.07x | 9020 MB/s | 6.36% | 6.91x | 2959 MB/s | 16.95% | -| (1 CPU) | 1.07x | 460 MB/s | - | 0.51x | 220 MB/s | - | +| File | S2 Speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller | +|---------------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------| +| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 16.33x | 10556 MB/s | 8.0% | 6.04x | 5252 MB/s | 14.7% | +| (1 CPU) | 1.08x | 940 MB/s | - | 0.46x | 400 MB/s | - | +| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 16.51x | 15224 MB/s | 31.70% | 9.47x | 8734 MB/s | 37.71% | +| (1 CPU) | 1.26x | 1157 MB/s | - | 0.60x | 556 MB/s | - | +| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12598 MB/s | -5.76% | 6.23x | 5675 MB/s | 3.62% | +| (1 CPU) | 1.02x | 932 MB/s | - | 0.47x | 432 MB/s | - | +| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 11.21x | 12116 MB/s | 15.95% | 3.24x | 3500 MB/s | 18.00% | +| (1 CPU) | 1.05x | 1135 MB/s | - | 0.27x | 292 MB/s | - | +| [apache.log](https://files.klauspost.com/compress/apache.log.zst) | 8.55x | 16673 MB/s | 20.54% | 5.85x | 11420 MB/s | 24.97% | +| (1 CPU) | 1.91x | 1771 MB/s | - | 0.53x | 1041 MB/s | - | +| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 15.76x | 14357 MB/s | 24.01% | 8.67x | 7891 MB/s | 33.68% | +| (1 CPU) | 1.17x | 1064 MB/s | - | 0.65x | 595 MB/s | - | +| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9835 MB/s | 2.34% | 6.85x | 4863 MB/s | 9.96% | +| (1 CPU) | 0.97x | 689 MB/s | - | 0.55x | 387 MB/s | - | +| sharnd.out.2gb | 9.11x | 13213 MB/s | 0.01% | 1.49x | 9184 MB/s | 0.01% | +| (1 CPU) | 0.88x | 5418 MB/s | - | 0.77x | 5417 MB/s | - | +| [sofia-air-quality-dataset csv](https://files.klauspost.com/compress/sofia-air-quality-dataset.tar.zst) | 22.00x | 11477 MB/s | 18.73% | 11.15x | 5817 MB/s | 27.88% | +| (1 CPU) | 1.23x | 642 MB/s | - | 0.71x | 642 MB/s | - | +| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 11.23x | 6520 MB/s | 5.9% | 5.35x | 3109 MB/s | 15.88% | +| (1 CPU) | 1.05x | 607 MB/s | - | 0.52x | 304 MB/s | - | +| [enwik9](https://files.klauspost.com/compress/enwik9.zst) | 19.28x | 8440 MB/s | 4.04% | 9.31x | 4076 MB/s | 18.04% | +| (1 CPU) | 1.12x | 488 MB/s | - | 0.57x | 250 MB/s | - | ### Legend -* `S2 speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core. -* `S2 throughput`: Throughput of S2 in MB/s. +* `S2 Speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core. +* `S2 Throughput`: Throughput of S2 in MB/s. * `S2 % smaller`: How many percent of the Snappy output size is S2 better. * `S2 "better"`: Speed when enabling "better" compression mode in S2 compared to Snappy. * `"better" throughput`: Speed when enabling "better" compression mode in S2 compared to Snappy. @@ -361,7 +361,7 @@ Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all th There is a good speedup across the board when using a single thread and a significant speedup when using multiple threads. -Machine generated data gets by far the biggest compression boost, with size being being reduced by up to 45% of Snappy size. +Machine generated data gets by far the biggest compression boost, with size being reduced by up to 35% of Snappy size. The "better" compression mode sees a good improvement in all cases, but usually at a performance cost. @@ -404,15 +404,15 @@ The "better" compression mode will actively look for shorter matches, which is w Without assembly decompression is also very fast; single goroutine decompression speed. No assembly: | File | S2 Throughput | S2 throughput | -|--------------------------------|--------------|---------------| -| consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s | -| 10gb.tar.s2 | 1.30x | 867.07 MB/s | -| rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s | -| github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s | -| github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s | -| enwik9.s2 | 1.67x | 681.53 MB/s | -| adresser.json.s2 | 3.41x | 4230.53 MB/s | -| silesia.tar.s2 | 1.52x | 811.58 | +|--------------------------------|---------------|---------------| +| consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s | +| 10gb.tar.s2 | 1.30x | 867.07 MB/s | +| rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s | +| github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s | +| github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s | +| enwik9.s2 | 1.67x | 681.53 MB/s | +| adresser.json.s2 | 3.41x | 4230.53 MB/s | +| silesia.tar.s2 | 1.52x | 811.58 | Even though S2 typically compresses better than Snappy, decompression speed is always better. @@ -450,14 +450,14 @@ The most reliable is a wide dataset. For this we use [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z), 53927 files, total input size: 4,014,735,833 bytes. Single goroutine used. -| * | Input | Output | Reduction | MB/s | -|-------------------|------------|------------|-----------|--------| -| S2 | 4014735833 | 1059723369 | 73.60% | **934.34** | -| S2 Better | 4014735833 | 969670507 | 75.85% | 532.70 | -| S2 Best | 4014735833 | 906625668 | **77.85%** | 46.84 | -| Snappy | 4014735833 | 1128706759 | 71.89% | 762.59 | -| S2, Snappy Output | 4014735833 | 1093821420 | 72.75% | 908.60 | -| LZ4 | 4014735833 | 1079259294 | 73.12% | 526.94 | +| * | Input | Output | Reduction | MB/s | +|-------------------|------------|------------|------------|------------| +| S2 | 4014735833 | 1059723369 | 73.60% | **936.73** | +| S2 Better | 4014735833 | 961580539 | 76.05% | 451.10 | +| S2 Best | 4014735833 | 899182886 | **77.60%** | 46.84 | +| Snappy | 4014735833 | 1128706759 | 71.89% | 790.15 | +| S2, Snappy Output | 4014735833 | 1093823291 | 72.75% | 936.60 | +| LZ4 | 4014735833 | 1063768713 | 73.50% | 452.02 | S2 delivers both the best single threaded throughput with regular mode and the best compression rate with "best". "Better" mode provides the same compression speed as LZ4 with better compression ratio. @@ -489,42 +489,23 @@ AMD64 assembly is use for both S2 and Snappy. | Absolute Perf | Snappy size | S2 Size | Snappy Speed | S2 Speed | Snappy dec | S2 dec | |-----------------------|-------------|---------|--------------|-------------|-------------|-------------| -| html | 22843 | 21111 | 16246 MB/s | 17438 MB/s | 40972 MB/s | 49263 MB/s | -| urls.10K | 335492 | 287326 | 7943 MB/s | 9693 MB/s | 22523 MB/s | 26484 MB/s | -| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 273889 MB/s | 718321 MB/s | 827552 MB/s | -| fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 17773 MB/s | 33691 MB/s | 52421 MB/s | -| paper-100k.pdf | 85304 | 84459 | 167546 MB/s | 101263 MB/s | 326905 MB/s | 291944 MB/s | -| html_x_4 | 92234 | 21113 | 15194 MB/s | 50670 MB/s | 30843 MB/s | 32217 MB/s | -| alice29.txt | 88034 | 85975 | 5936 MB/s | 6139 MB/s | 12882 MB/s | 20044 MB/s | -| asyoulik.txt | 77503 | 79650 | 5517 MB/s | 6366 MB/s | 12735 MB/s | 22806 MB/s | -| lcet10.txt | 234661 | 220670 | 6235 MB/s | 6067 MB/s | 14519 MB/s | 18697 MB/s | -| plrabn12.txt | 319267 | 317985 | 5159 MB/s | 5726 MB/s | 11923 MB/s | 19901 MB/s | -| geo.protodata | 23335 | 18690 | 21220 MB/s | 26529 MB/s | 56271 MB/s | 62540 MB/s | -| kppkn.gtb | 69526 | 65312 | 9732 MB/s | 8559 MB/s | 18491 MB/s | 18969 MB/s | -| alice29.txt (128B) | 80 | 82 | 6691 MB/s | 15489 MB/s | 31883 MB/s | 38874 MB/s | -| alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13000 MB/s | 48056 MB/s | 52341 MB/s | -| alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12806 MB/s | 32378 MB/s | 46322 MB/s | -| alice29.txt (20000B) | 12686 | 13574 | 7733 MB/s | 11210 MB/s | 30566 MB/s | 58969 MB/s | - - -| Relative Perf | Snappy size | S2 size improved | S2 Speed | S2 Dec Speed | -|-----------------------|-------------|------------------|----------|--------------| -| html | 22.31% | 7.58% | 1.07x | 1.20x | -| urls.10K | 47.78% | 14.36% | 1.22x | 1.18x | -| fireworks.jpeg | 99.95% | -0.05% | 0.78x | 1.15x | -| fireworks.jpeg (200B) | 73.00% | -6.16% | 2.00x | 1.56x | -| paper-100k.pdf | 83.30% | 0.99% | 0.60x | 0.89x | -| html_x_4 | 22.52% | 77.11% | 3.33x | 1.04x | -| alice29.txt | 57.88% | 2.34% | 1.03x | 1.56x | -| asyoulik.txt | 61.91% | -2.77% | 1.15x | 1.79x | -| lcet10.txt | 54.99% | 5.96% | 0.97x | 1.29x | -| plrabn12.txt | 66.26% | 0.40% | 1.11x | 1.67x | -| geo.protodata | 19.68% | 19.91% | 1.25x | 1.11x | -| kppkn.gtb | 37.72% | 6.06% | 0.88x | 1.03x | -| alice29.txt (128B) | 62.50% | -2.50% | 2.31x | 1.22x | -| alice29.txt (1000B) | 77.40% | 0.00% | 1.07x | 1.09x | -| alice29.txt (10000B) | 66.48% | -4.29% | 1.27x | 1.43x | -| alice29.txt (20000B) | 63.43% | -7.00% | 1.45x | 1.93x | +| html | 22843 | 20868 | 16246 MB/s | 18617 MB/s | 40972 MB/s | 49263 MB/s | +| urls.10K | 335492 | 286541 | 7943 MB/s | 10201 MB/s | 22523 MB/s | 26484 MB/s | +| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 303228 MB/s | 718321 MB/s | 827552 MB/s | +| fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 20180 MB/s | 33691 MB/s | 52421 MB/s | +| paper-100k.pdf | 85304 | 84202 | 167546 MB/s | 112988 MB/s | 326905 MB/s | 291944 MB/s | +| html_x_4 | 92234 | 20870 | 15194 MB/s | 54457 MB/s | 30843 MB/s | 32217 MB/s | +| alice29.txt | 88034 | 85934 | 5936 MB/s | 6540 MB/s | 12882 MB/s | 20044 MB/s | +| asyoulik.txt | 77503 | 79575 | 5517 MB/s | 6657 MB/s | 12735 MB/s | 22806 MB/s | +| lcet10.txt | 234661 | 220383 | 6235 MB/s | 6303 MB/s | 14519 MB/s | 18697 MB/s | +| plrabn12.txt | 319267 | 318196 | 5159 MB/s | 6074 MB/s | 11923 MB/s | 19901 MB/s | +| geo.protodata | 23335 | 18606 | 21220 MB/s | 25432 MB/s | 56271 MB/s | 62540 MB/s | +| kppkn.gtb | 69526 | 65019 | 9732 MB/s | 8905 MB/s | 18491 MB/s | 18969 MB/s | +| alice29.txt (128B) | 80 | 82 | 6691 MB/s | 17179 MB/s | 31883 MB/s | 38874 MB/s | +| alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13273 MB/s | 48056 MB/s | 52341 MB/s | +| alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12824 MB/s | 32378 MB/s | 46322 MB/s | +| alice29.txt (20000B) | 12686 | 13516 | 7733 MB/s | 12160 MB/s | 30566 MB/s | 58969 MB/s | + Speed is generally at or above Snappy. Small blocks gets a significant speedup, although at the expense of size. @@ -543,42 +524,23 @@ So individual benchmarks should only be seen as a guideline and the overall pict | Absolute Perf | Snappy size | Better Size | Snappy Speed | Better Speed | Snappy dec | Better dec | |-----------------------|-------------|-------------|--------------|--------------|-------------|-------------| -| html | 22843 | 19833 | 16246 MB/s | 7731 MB/s | 40972 MB/s | 40292 MB/s | -| urls.10K | 335492 | 253529 | 7943 MB/s | 3980 MB/s | 22523 MB/s | 20981 MB/s | -| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 9760 MB/s | 718321 MB/s | 823698 MB/s | -| fireworks.jpeg (200B) | 146 | 142 | 8869 MB/s | 594 MB/s | 33691 MB/s | 30101 MB/s | -| paper-100k.pdf | 85304 | 82915 | 167546 MB/s | 7470 MB/s | 326905 MB/s | 198869 MB/s | -| html_x_4 | 92234 | 19841 | 15194 MB/s | 23403 MB/s | 30843 MB/s | 30937 MB/s | -| alice29.txt | 88034 | 73218 | 5936 MB/s | 2945 MB/s | 12882 MB/s | 16611 MB/s | -| asyoulik.txt | 77503 | 66844 | 5517 MB/s | 2739 MB/s | 12735 MB/s | 14975 MB/s | -| lcet10.txt | 234661 | 190589 | 6235 MB/s | 3099 MB/s | 14519 MB/s | 16634 MB/s | -| plrabn12.txt | 319267 | 270828 | 5159 MB/s | 2600 MB/s | 11923 MB/s | 13382 MB/s | -| geo.protodata | 23335 | 18278 | 21220 MB/s | 11208 MB/s | 56271 MB/s | 57961 MB/s | -| kppkn.gtb | 69526 | 61851 | 9732 MB/s | 4556 MB/s | 18491 MB/s | 16524 MB/s | -| alice29.txt (128B) | 80 | 81 | 6691 MB/s | 529 MB/s | 31883 MB/s | 34225 MB/s | -| alice29.txt (1000B) | 774 | 748 | 12204 MB/s | 1943 MB/s | 48056 MB/s | 42068 MB/s | -| alice29.txt (10000B) | 6648 | 6234 | 10044 MB/s | 2949 MB/s | 32378 MB/s | 28813 MB/s | -| alice29.txt (20000B) | 12686 | 11584 | 7733 MB/s | 2822 MB/s | 30566 MB/s | 27315 MB/s | - - -| Relative Perf | Snappy size | Better size | Better Speed | Better dec | -|-----------------------|-------------|-------------|--------------|------------| -| html | 22.31% | 13.18% | 0.48x | 0.98x | -| urls.10K | 47.78% | 24.43% | 0.50x | 0.93x | -| fireworks.jpeg | 99.95% | -0.05% | 0.03x | 1.15x | -| fireworks.jpeg (200B) | 73.00% | 2.74% | 0.07x | 0.89x | -| paper-100k.pdf | 83.30% | 2.80% | 0.07x | 0.61x | -| html_x_4 | 22.52% | 78.49% | 0.04x | 1.00x | -| alice29.txt | 57.88% | 16.83% | 1.54x | 1.29x | -| asyoulik.txt | 61.91% | 13.75% | 0.50x | 1.18x | -| lcet10.txt | 54.99% | 18.78% | 0.50x | 1.15x | -| plrabn12.txt | 66.26% | 15.17% | 0.50x | 1.12x | -| geo.protodata | 19.68% | 21.67% | 0.50x | 1.03x | -| kppkn.gtb | 37.72% | 11.04% | 0.53x | 0.89x | -| alice29.txt (128B) | 62.50% | -1.25% | 0.47x | 1.07x | -| alice29.txt (1000B) | 77.40% | 3.36% | 0.08x | 0.88x | -| alice29.txt (10000B) | 66.48% | 6.23% | 0.16x | 0.89x | -| alice29.txt (20000B) | 63.43% | 8.69% | 0.29x | 0.89x | +| html | 22843 | 18972 | 16246 MB/s | 8621 MB/s | 40972 MB/s | 40292 MB/s | +| urls.10K | 335492 | 248079 | 7943 MB/s | 5104 MB/s | 22523 MB/s | 20981 MB/s | +| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 84429 MB/s | 718321 MB/s | 823698 MB/s | +| fireworks.jpeg (200B) | 146 | 149 | 8869 MB/s | 7125 MB/s | 33691 MB/s | 30101 MB/s | +| paper-100k.pdf | 85304 | 82887 | 167546 MB/s | 11087 MB/s | 326905 MB/s | 198869 MB/s | +| html_x_4 | 92234 | 18982 | 15194 MB/s | 29316 MB/s | 30843 MB/s | 30937 MB/s | +| alice29.txt | 88034 | 71611 | 5936 MB/s | 3709 MB/s | 12882 MB/s | 16611 MB/s | +| asyoulik.txt | 77503 | 65941 | 5517 MB/s | 3380 MB/s | 12735 MB/s | 14975 MB/s | +| lcet10.txt | 234661 | 184939 | 6235 MB/s | 3537 MB/s | 14519 MB/s | 16634 MB/s | +| plrabn12.txt | 319267 | 264990 | 5159 MB/s | 2960 MB/s | 11923 MB/s | 13382 MB/s | +| geo.protodata | 23335 | 17689 | 21220 MB/s | 10859 MB/s | 56271 MB/s | 57961 MB/s | +| kppkn.gtb | 69526 | 55398 | 9732 MB/s | 5206 MB/s | 18491 MB/s | 16524 MB/s | +| alice29.txt (128B) | 80 | 78 | 6691 MB/s | 7422 MB/s | 31883 MB/s | 34225 MB/s | +| alice29.txt (1000B) | 774 | 746 | 12204 MB/s | 5734 MB/s | 48056 MB/s | 42068 MB/s | +| alice29.txt (10000B) | 6648 | 6218 | 10044 MB/s | 6055 MB/s | 32378 MB/s | 28813 MB/s | +| alice29.txt (20000B) | 12686 | 11492 | 7733 MB/s | 3143 MB/s | 30566 MB/s | 27315 MB/s | + Except for the mostly incompressible JPEG image compression is better and usually in the double digits in terms of percentage reduction over Snappy. @@ -605,29 +567,29 @@ Some examples compared on 16 core CPU, amd64 assembly used: ``` * enwik10 -Default... 10000000000 -> 4761467548 [47.61%]; 1.098s, 8685.6MB/s -Better... 10000000000 -> 4219438251 [42.19%]; 1.925s, 4954.2MB/s -Best... 10000000000 -> 3627364337 [36.27%]; 43.051s, 221.5MB/s +Default... 10000000000 -> 4759950115 [47.60%]; 1.03s, 9263.0MB/s +Better... 10000000000 -> 4084706676 [40.85%]; 2.16s, 4415.4MB/s +Best... 10000000000 -> 3615520079 [36.16%]; 42.259s, 225.7MB/s * github-june-2days-2019.json -Default... 6273951764 -> 1043196283 [16.63%]; 431ms, 13882.3MB/s -Better... 6273951764 -> 949146808 [15.13%]; 547ms, 10938.4MB/s -Best... 6273951764 -> 832855506 [13.27%]; 9.455s, 632.8MB/s +Default... 6273951764 -> 1041700255 [16.60%]; 431ms, 13882.3MB/s +Better... 6273951764 -> 945841238 [15.08%]; 547ms, 10938.4MB/s +Best... 6273951764 -> 826392576 [13.17%]; 9.455s, 632.8MB/s * nyc-taxi-data-10M.csv -Default... 3325605752 -> 1095998837 [32.96%]; 324ms, 9788.7MB/s -Better... 3325605752 -> 954776589 [28.71%]; 491ms, 6459.4MB/s -Best... 3325605752 -> 779098746 [23.43%]; 8.29s, 382.6MB/s +Default... 3325605752 -> 1093516949 [32.88%]; 324ms, 9788.7MB/s +Better... 3325605752 -> 885394158 [26.62%]; 491ms, 6459.4MB/s +Best... 3325605752 -> 773681257 [23.26%]; 8.29s, 412.0MB/s * 10gb.tar -Default... 10065157632 -> 5916578242 [58.78%]; 1.028s, 9337.4MB/s -Better... 10065157632 -> 5649207485 [56.13%]; 1.597s, 6010.6MB/s -Best... 10065157632 -> 5208719802 [51.75%]; 32.78s, 292.8MB/ +Default... 10065157632 -> 5915541066 [58.77%]; 1.028s, 9337.4MB/s +Better... 10065157632 -> 5453844650 [54.19%]; 1.597s, 4862.7MB/s +Best... 10065157632 -> 5192495021 [51.59%]; 32.78s, 308.2MB/ * consensus.db.10gb -Default... 10737418240 -> 4562648848 [42.49%]; 882ms, 11610.0MB/s -Better... 10737418240 -> 4542428129 [42.30%]; 1.533s, 6679.7MB/s -Best... 10737418240 -> 4244773384 [39.53%]; 42.96s, 238.4MB/s +Default... 10737418240 -> 4549762344 [42.37%]; 882ms, 12118.4MB/s +Better... 10737418240 -> 4438535064 [41.34%]; 1.533s, 3500.9MB/s +Best... 10737418240 -> 4210602774 [39.21%]; 42.96s, 254.4MB/s ``` Decompression speed should be around the same as using the 'better' compression mode. @@ -648,10 +610,10 @@ If you would like more control, you can use the s2 package as described below: Snappy compatible blocks can be generated with the S2 encoder. Compression and speed is typically a bit better `MaxEncodedLen` is also smaller for smaller memory usage. Replace -| Snappy | S2 replacement | -|----------------------------|-------------------------| -| snappy.Encode(...) | s2.EncodeSnappy(...) | -| snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) | +| Snappy | S2 replacement | +|---------------------------|-----------------------| +| snappy.Encode(...) | s2.EncodeSnappy(...) | +| snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) | `s2.EncodeSnappy` can be replaced with `s2.EncodeSnappyBetter` or `s2.EncodeSnappyBest` to get more efficiently compressed snappy compatible output. @@ -660,12 +622,12 @@ Compression and speed is typically a bit better `MaxEncodedLen` is also smaller Comparison of [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z), 53927 files, total input size: 4,014,735,833 bytes. amd64, single goroutine used: -| Encoder | Size | MB/s | Reduction | -|-----------------------|------------|------------|------------ -| snappy.Encode | 1128706759 | 725.59 | 71.89% | -| s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% | -| s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% | -| s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%**| +| Encoder | Size | MB/s | Reduction | +|-----------------------|------------|------------|------------| +| snappy.Encode | 1128706759 | 725.59 | 71.89% | +| s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% | +| s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% | +| s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%** | ## Streams @@ -835,6 +797,13 @@ This is done using the regular "Skip" function: This will ensure that we are at exactly the offset we want, and reading from `dec` will start at the requested offset. +# Compact storage + +For compact storage [RemoveIndexHeaders](https://pkg.go.dev/github.com/klauspost/compress/s2#RemoveIndexHeaders) can be used to remove any redundant info from +a serialized index. If you remove the header it must be restored before [Loading](https://pkg.go.dev/github.com/klauspost/compress/s2#Index.Load). + +This is expected to save 20 bytes. These can be restored using [RestoreIndexHeaders](https://pkg.go.dev/github.com/klauspost/compress/s2#RestoreIndexHeaders). This removes a layer of security, but is the most compact representation. Returns nil if headers contains errors. + ## Index Format: Each block is structured as a snappy skippable block, with the chunk ID 0x99. @@ -844,20 +813,20 @@ The block can be read from the front, but contains information so it can be read Numbers are stored as fixed size little endian values or [zigzag encoded](https://developers.google.com/protocol-buffers/docs/encoding#signed_integers) [base 128 varints](https://developers.google.com/protocol-buffers/docs/encoding), with un-encoded value length of 64 bits, unless other limits are specified. -| Content | Format | -|---------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| -| ID, `[1]byte` | Always 0x99. | -| Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. | -| Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". | -| UncompressedSize, Varint | Total Uncompressed size. | -| CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. | -| EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. | -| Entries, Varint | Number of Entries in index, must be < 65536 and >=0. | -| HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. | -| UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. | -| CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. | -| Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. | -| Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. | +| Content | Format | +|--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------| +| ID, `[1]byte` | Always 0x99. | +| Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. | +| Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". | +| UncompressedSize, Varint | Total Uncompressed size. | +| CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. | +| EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. | +| Entries, Varint | Number of Entries in index, must be < 65536 and >=0. | +| HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. | +| UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. | +| CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. | +| Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. | +| Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. | For regular streams the uncompressed offsets are fully predictable, so `HasUncompressedOffsets` allows to specify that compressed blocks all have @@ -929,6 +898,7 @@ To decode from any given uncompressed offset `(wantOffset)`: See [using indexes](https://github.com/klauspost/compress/tree/master/s2#using-indexes) for functions that perform the operations with a simpler interface. + # Format Extensions * Frame [Stream identifier](https://github.com/google/snappy/blob/master/framing_format.txt#L68) changed from `sNaPpY` to `S2sTwO`. @@ -951,10 +921,11 @@ The length is specified by reading the 3-bit length specified in the tag and dec | 7 | 65540 + read 3 bytes | This allows any repeat offset + length to be represented by 2 to 5 bytes. +It also allows to emit matches longer than 64 bytes with one copy + one repeat instead of several 64 byte copies. Lengths are stored as little endian values. -The first copy of a block cannot be a repeat offset and the offset is not carried across blocks in streams. +The first copy of a block cannot be a repeat offset and the offset is reset on every block in streams. Default streaming block size is 1MB. diff --git a/vendor/github.com/klauspost/compress/s2/decode.go b/vendor/github.com/klauspost/compress/s2/decode.go @@ -952,7 +952,11 @@ func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) { // Seek allows seeking in compressed data. func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) { if r.err != nil { - return 0, r.err + if !errors.Is(r.err, io.EOF) { + return 0, r.err + } + // Reset on EOF + r.err = nil } if offset == 0 && whence == io.SeekCurrent { return r.blockStart + int64(r.i), nil diff --git a/vendor/github.com/klauspost/compress/s2/decode_other.go b/vendor/github.com/klauspost/compress/s2/decode_other.go @@ -28,6 +28,9 @@ func s2Decode(dst, src []byte) int { // As long as we can read at least 5 bytes... for s < len(src)-5 { + // Removing bounds checks is SLOWER, when if doing + // in := src[s:s+5] + // Checked on Go 1.18 switch src[s] & 0x03 { case tagLiteral: x := uint32(src[s] >> 2) @@ -38,14 +41,19 @@ func s2Decode(dst, src []byte) int { s += 2 x = uint32(src[s-1]) case x == 61: + in := src[s : s+3] + x = uint32(in[1]) | uint32(in[2])<<8 s += 3 - x = uint32(src[s-2]) | uint32(src[s-1])<<8 case x == 62: + in := src[s : s+4] + // Load as 32 bit and shift down. + x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + x >>= 8 s += 4 - x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 case x == 63: + in := src[s : s+5] + x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24 s += 5 - x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 } length = int(x) + 1 if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) { @@ -62,8 +70,8 @@ func s2Decode(dst, src []byte) int { case tagCopy1: s += 2 - length = int(src[s-2]) >> 2 & 0x7 toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + length = int(src[s-2]) >> 2 & 0x7 if toffset == 0 { if debug { fmt.Print("(repeat) ") @@ -71,14 +79,16 @@ func s2Decode(dst, src []byte) int { // keep last offset switch length { case 5: + length = int(src[s]) + 4 s += 1 - length = int(uint32(src[s-1])) + 4 case 6: + in := src[s : s+2] + length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8) s += 2 - length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8) case 7: + in := src[s : s+3] + length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16) s += 3 - length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16) default: // 0-> 4 } } else { @@ -86,14 +96,16 @@ func s2Decode(dst, src []byte) int { } length += 4 case tagCopy2: + in := src[s : s+3] + offset = int(uint32(in[1]) | uint32(in[2])<<8) + length = 1 + int(in[0])>>2 s += 3 - length = 1 + int(src[s-3])>>2 - offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) case tagCopy4: + in := src[s : s+5] + offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24) + length = 1 + int(in[0])>>2 s += 5 - length = 1 + int(src[s-5])>>2 - offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) } if offset <= 0 || d < offset || length > len(dst)-d { diff --git a/vendor/github.com/klauspost/compress/s2/encode_all.go b/vendor/github.com/klauspost/compress/s2/encode_all.go @@ -58,8 +58,9 @@ func encodeGo(dst, src []byte) []byte { // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlockGo(dst, src []byte) (d int) { // Initialize the hash table. const ( diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go @@ -8,8 +8,9 @@ package s2 // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlock(dst, src []byte) (d int) { const ( // Use 12 bit table when less than... @@ -43,8 +44,9 @@ func encodeBlock(dst, src []byte) (d int) { // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlockBetter(dst, src []byte) (d int) { const ( // Use 12 bit table when less than... @@ -78,8 +80,9 @@ func encodeBlockBetter(dst, src []byte) (d int) { // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlockSnappy(dst, src []byte) (d int) { const ( // Use 12 bit table when less than... @@ -112,8 +115,9 @@ func encodeBlockSnappy(dst, src []byte) (d int) { // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlockBetterSnappy(dst, src []byte) (d int) { const ( // Use 12 bit table when less than... diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go @@ -15,8 +15,9 @@ import ( // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlockBest(dst, src []byte) (d int) { // Initialize the hash tables. const ( @@ -176,14 +177,21 @@ func encodeBlockBest(dst, src []byte) (d int) { best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) } // Search for a match at best match end, see if that is better. - if sAt := best.s + best.length; sAt < sLimit { - sBack := best.s - backL := best.length + // Allow some bytes at the beginning to mismatch. + // Sweet spot is around 1-2 bytes, but depends on input. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + const skipEnd = 1 + if sAt := best.s + best.length - skipEnd; sAt < sLimit { + + sBack := best.s + skipBeginning - skipEnd + backL := best.length - skipBeginning // Load initial values cv = load64(src, sBack) - // Search for mismatch + + // Grab candidates... next := lTable[hash8(load64(src, sAt), lTableBits)] - //next := sTable[hash4(load64(src, sAt), sTableBits)] if checkAt := getCur(next) - backL; checkAt > 0 { best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) @@ -191,6 +199,16 @@ func encodeBlockBest(dst, src []byte) (d int) { if checkAt := getPrev(next) - backL; checkAt > 0 { best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) } + // Disabled: Extremely small gain + if false { + next = sTable[hash4(load64(src, sAt), sTableBits)] + if checkAt := getCur(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) + } + if checkAt := getPrev(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) + } + } } } } @@ -288,8 +306,9 @@ emitRemainder: // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlockBestSnappy(dst, src []byte) (d int) { // Initialize the hash tables. const ( @@ -546,6 +565,7 @@ emitRemainder: // emitCopySize returns the size to encode the offset+length // // It assumes that: +// // 1 <= offset && offset <= math.MaxUint32 // 4 <= length && length <= 1 << 24 func emitCopySize(offset, length int) int { @@ -584,6 +604,7 @@ func emitCopySize(offset, length int) int { // emitCopyNoRepeatSize returns the size to encode the offset+length // // It assumes that: +// // 1 <= offset && offset <= math.MaxUint32 // 4 <= length && length <= 1 << 24 func emitCopyNoRepeatSize(offset, length int) int { diff --git a/vendor/github.com/klauspost/compress/s2/encode_better.go b/vendor/github.com/klauspost/compress/s2/encode_better.go @@ -42,8 +42,9 @@ func hash8(u uint64, h uint8) uint32 { // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlockBetterGo(dst, src []byte) (d int) { // sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are @@ -56,7 +57,7 @@ func encodeBlockBetterGo(dst, src []byte) (d int) { // Initialize the hash tables. const ( // Long hash matches. - lTableBits = 16 + lTableBits = 17 maxLTableSize = 1 << lTableBits // Short hash matches. @@ -97,9 +98,26 @@ func encodeBlockBetterGo(dst, src []byte) (d int) { lTable[hashL] = uint32(s) sTable[hashS] = uint32(s) + valLong := load64(src, candidateL) + valShort := load64(src, candidateS) + + // If long matches at least 8 bytes, use that. + if cv == valLong { + break + } + if cv == valShort { + candidateL = candidateS + break + } + // Check repeat at offset checkRep. const checkRep = 1 - if false && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { + // Minimum length of a repeat. Tested with various values. + // While 4-5 offers improvements in some, 6 reduces + // regressions significantly. + const wantRepeatBytes = 6 + const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep) + if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask { base := s + checkRep // Extend back for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { @@ -109,8 +127,8 @@ func encodeBlockBetterGo(dst, src []byte) (d int) { d += emitLiteral(dst[d:], src[nextEmit:base]) // Extend forward - candidate := s - repeat + 4 + checkRep - s += 4 + checkRep + candidate := s - repeat + wantRepeatBytes + checkRep + s += wantRepeatBytes + checkRep for s < len(src) { if len(src)-s < 8 { if src[s] == src[candidate] { @@ -127,28 +145,40 @@ func encodeBlockBetterGo(dst, src []byte) (d int) { s += 8 candidate += 8 } - if nextEmit > 0 { - // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. - d += emitRepeat(dst[d:], repeat, s-base) - } else { - // First match, cannot be repeat. - d += emitCopy(dst[d:], repeat, s-base) - } + // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. + d += emitRepeat(dst[d:], repeat, s-base) nextEmit = s if s >= sLimit { goto emitRemainder } + // Index in-between + index0 := base + 1 + index1 := s - 2 + + cv = load64(src, s) + for index0 < index1 { + cv0 := load64(src, index0) + cv1 := load64(src, index1) + lTable[hash7(cv0, lTableBits)] = uint32(index0) + sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) + + lTable[hash7(cv1, lTableBits)] = uint32(index1) + sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) + index0 += 2 + index1 -= 2 + } cv = load64(src, s) continue } - if uint32(cv) == load32(src, candidateL) { + // Long likely matches 7, so take that. + if uint32(cv) == uint32(valLong) { break } // Check our short candidate - if uint32(cv) == load32(src, candidateS) { + if uint32(cv) == uint32(valShort) { // Try a long candidate at s+1 hashL = hash7(cv>>8, lTableBits) candidateL = int(lTable[hashL]) @@ -227,21 +257,29 @@ func encodeBlockBetterGo(dst, src []byte) (d int) { // Do we have space for more, if not bail. return 0 } - // Index match start+1 (long) and start+2 (short) + + // Index short & long index0 := base + 1 - // Index match end-2 (long) and end-1 (short) index1 := s - 2 cv0 := load64(src, index0) cv1 := load64(src, index1) - cv = load64(src, s) lTable[hash7(cv0, lTableBits)] = uint32(index0) - lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1) - lTable[hash7(cv1, lTableBits)] = uint32(index1) - lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1) sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) - sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2) + + lTable[hash7(cv1, lTableBits)] = uint32(index1) sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) + index0 += 1 + index1 -= 1 + cv = load64(src, s) + + // index every second long in between. + for index0 < index1 { + lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) + lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1) + index0 += 2 + index1 -= 2 + } } emitRemainder: @@ -260,8 +298,9 @@ emitRemainder: // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlockBetterSnappyGo(dst, src []byte) (d int) { // sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are @@ -402,21 +441,29 @@ func encodeBlockBetterSnappyGo(dst, src []byte) (d int) { // Do we have space for more, if not bail. return 0 } - // Index match start+1 (long) and start+2 (short) + + // Index short & long index0 := base + 1 - // Index match end-2 (long) and end-1 (short) index1 := s - 2 cv0 := load64(src, index0) cv1 := load64(src, index1) - cv = load64(src, s) lTable[hash7(cv0, lTableBits)] = uint32(index0) - lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1) - lTable[hash7(cv1, lTableBits)] = uint32(index1) - lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1) sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) - sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2) + + lTable[hash7(cv1, lTableBits)] = uint32(index1) sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) + index0 += 1 + index1 -= 1 + cv = load64(src, s) + + // index every second long in between. + for index0 < index1 { + lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) + lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1) + index0 += 2 + index1 -= 2 + } } emitRemainder: diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -12,6 +12,7 @@ import ( // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) func encodeBlock(dst, src []byte) (d int) { if len(src) < minNonLiteralBlockSize { @@ -25,6 +26,7 @@ func encodeBlock(dst, src []byte) (d int) { // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) func encodeBlockBetter(dst, src []byte) (d int) { return encodeBlockBetterGo(dst, src) @@ -35,6 +37,7 @@ func encodeBlockBetter(dst, src []byte) (d int) { // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) func encodeBlockBetterSnappy(dst, src []byte) (d int) { return encodeBlockBetterSnappyGo(dst, src) @@ -45,6 +48,7 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) { // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) func encodeBlockSnappy(dst, src []byte) (d int) { if len(src) < minNonLiteralBlockSize { @@ -56,6 +60,7 @@ func encodeBlockSnappy(dst, src []byte) (d int) { // emitLiteral writes a literal chunk and returns the number of bytes written. // // It assumes that: +// // dst is long enough to hold the encoded bytes // 0 <= len(lit) && len(lit) <= math.MaxUint32 func emitLiteral(dst, lit []byte) int { @@ -146,6 +151,7 @@ func emitRepeat(dst []byte, offset, length int) int { // emitCopy writes a copy chunk and returns the number of bytes written. // // It assumes that: +// // dst is long enough to hold the encoded bytes // 1 <= offset && offset <= math.MaxUint32 // 4 <= length && length <= 1 << 24 @@ -214,6 +220,7 @@ func emitCopy(dst []byte, offset, length int) int { // emitCopyNoRepeat writes a copy chunk and returns the number of bytes written. // // It assumes that: +// // dst is long enough to hold the encoded bytes // 1 <= offset && offset <= math.MaxUint32 // 4 <= length && length <= 1 << 24 @@ -273,8 +280,8 @@ func emitCopyNoRepeat(dst []byte, offset, length int) int { // matchLen returns how many bytes match in a and b // // It assumes that: -// len(a) <= len(b) // +// len(a) <= len(b) func matchLen(a []byte, b []byte) int { b = b[:len(a)] var checked int diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go @@ -1,7 +1,6 @@ // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. //go:build !appengine && !noasm && gc && !noasm -// +build !appengine,!noasm,gc,!noasm package s2 @@ -150,8 +149,9 @@ func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int // emitLiteral writes a literal chunk and returns the number of bytes written. // // It assumes that: -// dst is long enough to hold the encoded bytes with margin of 0 bytes -// 0 <= len(lit) && len(lit) <= math.MaxUint32 +// +// dst is long enough to hold the encoded bytes with margin of 0 bytes +// 0 <= len(lit) && len(lit) <= math.MaxUint32 // //go:noescape func emitLiteral(dst []byte, lit []byte) int @@ -165,9 +165,10 @@ func emitRepeat(dst []byte, offset int, length int) int // emitCopy writes a copy chunk and returns the number of bytes written. // // It assumes that: -// dst is long enough to hold the encoded bytes -// 1 <= offset && offset <= math.MaxUint32 -// 4 <= length && length <= 1 << 24 +// +// dst is long enough to hold the encoded bytes +// 1 <= offset && offset <= math.MaxUint32 +// 4 <= length && length <= 1 << 24 // //go:noescape func emitCopy(dst []byte, offset int, length int) int @@ -175,9 +176,10 @@ func emitCopy(dst []byte, offset int, length int) int // emitCopyNoRepeat writes a copy chunk and returns the number of bytes written. // // It assumes that: -// dst is long enough to hold the encoded bytes -// 1 <= offset && offset <= math.MaxUint32 -// 4 <= length && length <= 1 << 24 +// +// dst is long enough to hold the encoded bytes +// 1 <= offset && offset <= math.MaxUint32 +// 4 <= length && length <= 1 << 24 // //go:noescape func emitCopyNoRepeat(dst []byte, offset int, length int) int @@ -185,7 +187,8 @@ func emitCopyNoRepeat(dst []byte, offset int, length int) int // matchLen returns how many bytes match in a and b // // It assumes that: -// len(a) <= len(b) +// +// len(a) <= len(b) // //go:noescape func matchLen(a []byte, b []byte) int diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -1,7 +1,6 @@ // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. //go:build !appengine && !noasm && gc && !noasm -// +build !appengine,!noasm,gc,!noasm #include "textflag.h" @@ -5743,9 +5742,9 @@ emit_literal_done_emit_remainder_encodeBlockAsm8B: // func encodeBetterBlockAsm(dst []byte, src []byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm(SB), $327704-56 +TEXT ·encodeBetterBlockAsm(SB), $589848-56 MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX + MOVQ $0x00001200, CX LEAQ 24(SP), DX PXOR X0, X0 @@ -5797,27 +5796,37 @@ check_maxskip_cont_encodeBetterBlockAsm: MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI - MOVL 262168(SP)(R11*4), R8 + MOVL 524312(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) - MOVL CX, 262168(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVL CX, 524312(SP)(R11*4) + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm + +no_short_found_encodeBetterBlockAsm: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) @@ -6590,52 +6599,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm: match_nolit_dst_ok_encodeBetterBlockAsm: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x32, R12 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 262168(SP)(R11*4) - MOVL R15, 262168(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 524312(SP)(R11*4) + MOVL R14, 524312(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x08, R8 + IMULQ SI, R8 + SHRQ $0x2f, R8 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 262168(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: MOVQ src_len+32(FP), CX @@ -6815,9 +6821,9 @@ emit_literal_done_emit_remainder_encodeBetterBlockAsm: // func encodeBetterBlockAsm4MB(dst []byte, src []byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56 +TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56 MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX + MOVQ $0x00001200, CX LEAQ 24(SP), DX PXOR X0, X0 @@ -6869,27 +6875,37 @@ check_maxskip_cont_encodeBetterBlockAsm4MB: MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI - MOVL 262168(SP)(R11*4), R8 + MOVL 524312(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) - MOVL CX, 262168(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVL CX, 524312(SP)(R11*4) + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm4MB - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm4MB + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm4MB + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm4MB + +no_short_found_encodeBetterBlockAsm4MB: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm4MB + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm4MB + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) @@ -7600,52 +7616,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: match_nolit_dst_ok_encodeBetterBlockAsm4MB: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x32, R12 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 262168(SP)(R11*4) - MOVL R15, 262168(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 524312(SP)(R11*4) + MOVL R14, 524312(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm4MB: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm4MB + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x08, R8 + IMULQ SI, R8 + SHRQ $0x2f, R8 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 262168(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm4MB + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: MOVQ src_len+32(FP), CX @@ -7871,12 +7884,22 @@ search_loop_encodeBetterBlockAsm12B: MOVL 65560(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 65560(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm12B - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm12B + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm12B + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm12B + +no_short_found_encodeBetterBlockAsm12B: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm12B + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm12B + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: SHRQ $0x08, DI @@ -8447,52 +8470,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm12B: match_nolit_dst_ok_encodeBetterBlockAsm12B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x32, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x34, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 65560(SP)(R11*4) - MOVL R15, 65560(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 65560(SP)(R11*4) + MOVL R14, 65560(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm12B: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm12B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x32, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x34, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x32, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 65560(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm12B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: MOVQ src_len+32(FP), CX @@ -8707,12 +8727,22 @@ search_loop_encodeBetterBlockAsm10B: MOVL 16408(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 16408(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm10B - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm10B + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm10B + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm10B + +no_short_found_encodeBetterBlockAsm10B: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm10B + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm10B + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: SHRQ $0x08, DI @@ -9283,52 +9313,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm10B: match_nolit_dst_ok_encodeBetterBlockAsm10B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x34, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x36, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 16408(SP)(R11*4) - MOVL R15, 16408(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 16408(SP)(R11*4) + MOVL R14, 16408(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm10B: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm10B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x34, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x36, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x34, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 16408(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm10B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: MOVQ src_len+32(FP), CX @@ -9543,12 +9570,22 @@ search_loop_encodeBetterBlockAsm8B: MOVL 4120(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 4120(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm8B - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm8B + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm8B + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm8B + +no_short_found_encodeBetterBlockAsm8B: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm8B + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm8B + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: SHRQ $0x08, DI @@ -10105,52 +10142,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm8B: match_nolit_dst_ok_encodeBetterBlockAsm8B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x36, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x38, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x38, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 4120(SP)(R11*4) - MOVL R15, 4120(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 4120(SP)(R11*4) + MOVL R14, 4120(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm8B: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm8B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x36, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x38, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x36, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 4120(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm8B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: MOVQ src_len+32(FP), CX @@ -14287,9 +14321,9 @@ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: // func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm(SB), $327704-56 +TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56 MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX + MOVQ $0x00001200, CX LEAQ 24(SP), DX PXOR X0, X0 @@ -14341,27 +14375,37 @@ check_maxskip_cont_encodeSnappyBetterBlockAsm: MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI - MOVL 262168(SP)(R11*4), R8 + MOVL 524312(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) - MOVL CX, 262168(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVL CX, 524312(SP)(R11*4) + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm + +no_short_found_encodeSnappyBetterBlockAsm: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) @@ -14685,52 +14729,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: match_nolit_dst_ok_encodeSnappyBetterBlockAsm: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x32, R12 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 262168(SP)(R11*4) - MOVL R15, 262168(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 524312(SP)(R11*4) + MOVL R14, 524312(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x08, R8 + IMULQ SI, R8 + SHRQ $0x2f, R8 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 262168(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: MOVQ src_len+32(FP), CX @@ -14964,12 +15005,22 @@ search_loop_encodeSnappyBetterBlockAsm64K: MOVL 262168(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 262168(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm64K - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm64K + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm64K + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm64K + +no_short_found_encodeSnappyBetterBlockAsm64K: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm64K + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm64K + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: SHRQ $0x08, DI @@ -15248,52 +15299,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x32, R12 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x30, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 262168(SP)(R11*4) - MOVL R15, 262168(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 262168(SP)(R11*4) + MOVL R14, 262168(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm64K: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm64K + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x08, R8 + IMULQ SI, R8 + SHRQ $0x30, R8 SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 262168(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm64K + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: MOVQ src_len+32(FP), CX @@ -15508,12 +15556,22 @@ search_loop_encodeSnappyBetterBlockAsm12B: MOVL 65560(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 65560(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm12B - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm12B + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm12B + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm12B + +no_short_found_encodeSnappyBetterBlockAsm12B: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm12B + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm12B + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: SHRQ $0x08, DI @@ -15792,52 +15850,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x32, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x34, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 65560(SP)(R11*4) - MOVL R15, 65560(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 65560(SP)(R11*4) + MOVL R14, 65560(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm12B: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm12B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x32, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x34, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x32, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 65560(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm12B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: MOVQ src_len+32(FP), CX @@ -16052,12 +16107,22 @@ search_loop_encodeSnappyBetterBlockAsm10B: MOVL 16408(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 16408(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm10B - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm10B + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm10B + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm10B + +no_short_found_encodeSnappyBetterBlockAsm10B: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm10B + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm10B + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: SHRQ $0x08, DI @@ -16336,52 +16401,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x34, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x36, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 16408(SP)(R11*4) - MOVL R15, 16408(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 16408(SP)(R11*4) + MOVL R14, 16408(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm10B: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm10B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x34, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x36, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x34, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 16408(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm10B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: MOVQ src_len+32(FP), CX @@ -16596,12 +16658,22 @@ search_loop_encodeSnappyBetterBlockAsm8B: MOVL 4120(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 4120(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm8B - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm8B + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm8B + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm8B + +no_short_found_encodeSnappyBetterBlockAsm8B: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm8B + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm8B + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: SHRQ $0x08, DI @@ -16878,52 +16950,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x36, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x38, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x38, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 4120(SP)(R11*4) - MOVL R15, 4120(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 4120(SP)(R11*4) + MOVL R14, 4120(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm8B: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm8B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x36, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x38, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x36, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 4120(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm8B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: MOVQ src_len+32(FP), CX diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md @@ -16,10 +16,17 @@ Package home: https://github.com/klauspost/cpuid ## installing -`go get -u github.com/klauspost/cpuid/v2` using modules. - +`go get -u github.com/klauspost/cpuid/v2` using modules. Drop `v2` for others. +### Homebrew + +For macOS/Linux users, you can install via [brew](https://brew.sh/) + +```sh +$ brew install cpuid +``` + ## example ```Go @@ -77,10 +84,14 @@ We have Streaming SIMD 2 Extensions The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features. A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler. +To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc. +This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported. + Note that for some cpu/os combinations some features will not be detected. `amd64` has rather good support and should work reliably on all platforms. -Note that hypervisors may not pass through all CPU features. +Note that hypervisors may not pass through all CPU features through to the guest OS, +so even if your host supports a feature it may not be visible on guests. ## arm64 feature detection @@ -253,6 +264,218 @@ Exit Code 0 Exit Code 1 ``` + +## Available flags + +### x86 & amd64 + +| Feature Flag | Description | +|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ADX | Intel ADX (Multi-Precision Add-Carry Instruction Extensions) | +| AESNI | Advanced Encryption Standard New Instructions | +| AMD3DNOW | AMD 3DNOW | +| AMD3DNOWEXT | AMD 3DNowExt | +| AMXBF16 | Tile computational operations on BFLOAT16 numbers | +| AMXINT8 | Tile computational operations on 8-bit integers | +| AMXFP16 | Tile computational operations on FP16 numbers | +| AMXTILE | Tile architecture | +| AVX | AVX functions | +| AVX2 | AVX2 functions | +| AVX512BF16 | AVX-512 BFLOAT16 Instructions | +| AVX512BITALG | AVX-512 Bit Algorithms | +| AVX512BW | AVX-512 Byte and Word Instructions | +| AVX512CD | AVX-512 Conflict Detection Instructions | +| AVX512DQ | AVX-512 Doubleword and Quadword Instructions | +| AVX512ER | AVX-512 Exponential and Reciprocal Instructions | +| AVX512F | AVX-512 Foundation | +| AVX512FP16 | AVX-512 FP16 Instructions | +| AVX512IFMA | AVX-512 Integer Fused Multiply-Add Instructions | +| AVX512PF | AVX-512 Prefetch Instructions | +| AVX512VBMI | AVX-512 Vector Bit Manipulation Instructions | +| AVX512VBMI2 | AVX-512 Vector Bit Manipulation Instructions, Version 2 | +| AVX512VL | AVX-512 Vector Length Extensions | +| AVX512VNNI | AVX-512 Vector Neural Network Instructions | +| AVX512VP2INTERSECT | AVX-512 Intersect for D/Q | +| AVX512VPOPCNTDQ | AVX-512 Vector Population Count Doubleword and Quadword | +| AVXIFMA | AVX-IFMA instructions | +| AVXNECONVERT | AVX-NE-CONVERT instructions | +| AVXSLOW | Indicates the CPU performs 2 128 bit operations instead of one | +| AVXVNNI | AVX (VEX encoded) VNNI neural network instructions | +| AVXVNNIINT8 | AVX-VNNI-INT8 instructions | +| BMI1 | Bit Manipulation Instruction Set 1 | +| BMI2 | Bit Manipulation Instruction Set 2 | +| CETIBT | Intel CET Indirect Branch Tracking | +| CETSS | Intel CET Shadow Stack | +| CLDEMOTE | Cache Line Demote | +| CLMUL | Carry-less Multiplication | +| CLZERO | CLZERO instruction supported | +| CMOV | i686 CMOV | +| CMPCCXADD | CMPCCXADD instructions | +| CMPSB_SCADBS_SHORT | Fast short CMPSB and SCASB | +| CMPXCHG8 | CMPXCHG8 instruction | +| CPBOOST | Core Performance Boost | +| CPPC | AMD: Collaborative Processor Performance Control | +| CX16 | CMPXCHG16B Instruction | +| EFER_LMSLE_UNS | AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ | +| ENQCMD | Enqueue Command | +| ERMS | Enhanced REP MOVSB/STOSB | +| F16C | Half-precision floating-point conversion | +| FLUSH_L1D | Flush L1D cache | +| FMA3 | Intel FMA 3. Does not imply AVX. | +| FMA4 | Bulldozer FMA4 functions | +| FP128 | AMD: When set, the internal FP/SIMD execution datapath is 128-bits wide | +| FP256 | AMD: When set, the internal FP/SIMD execution datapath is 256-bits wide | +| FSRM | Fast Short Rep Mov | +| FXSR | FXSAVE, FXRESTOR instructions, CR4 bit 9 | +| FXSROPT | FXSAVE/FXRSTOR optimizations | +| GFNI | Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. | +| HLE | Hardware Lock Elision | +| HRESET | If set CPU supports history reset and the IA32_HRESET_ENABLE MSR | +| HTT | Hyperthreading (enabled) | +| HWA | Hardware assert supported. Indicates support for MSRC001_10 | +| HYBRID_CPU | This part has CPUs of more than one type. | +| HYPERVISOR | This bit has been reserved by Intel & AMD for use by hypervisors | +| IA32_ARCH_CAP | IA32_ARCH_CAPABILITIES MSR (Intel) | +| IA32_CORE_CAP | IA32_CORE_CAPABILITIES MSR | +| IBPB | Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) | +| IBRS | AMD: Indirect Branch Restricted Speculation | +| IBRS_PREFERRED | AMD: IBRS is preferred over software solution | +| IBRS_PROVIDES_SMP | AMD: IBRS provides Same Mode Protection | +| IBS | Instruction Based Sampling (AMD) | +| IBSBRNTRGT | Instruction Based Sampling Feature (AMD) | +| IBSFETCHSAM | Instruction Based Sampling Feature (AMD) | +| IBSFFV | Instruction Based Sampling Feature (AMD) | +| IBSOPCNT | Instruction Based Sampling Feature (AMD) | +| IBSOPCNTEXT | Instruction Based Sampling Feature (AMD) | +| IBSOPSAM | Instruction Based Sampling Feature (AMD) | +| IBSRDWROPCNT | Instruction Based Sampling Feature (AMD) | +| IBSRIPINVALIDCHK | Instruction Based Sampling Feature (AMD) | +| IBS_FETCH_CTLX | AMD: IBS fetch control extended MSR supported | +| IBS_OPDATA4 | AMD: IBS op data 4 MSR supported | +| IBS_OPFUSE | AMD: Indicates support for IbsOpFuse | +| IBS_PREVENTHOST | Disallowing IBS use by the host supported | +| IBS_ZEN4 | Fetch and Op IBS support IBS extensions added with Zen4 | +| INT_WBINVD | WBINVD/WBNOINVD are interruptible. | +| INVLPGB | NVLPGB and TLBSYNC instruction supported | +| LAHF | LAHF/SAHF in long mode | +| LAM | If set, CPU supports Linear Address Masking | +| LBRVIRT | LBR virtualization | +| LZCNT | LZCNT instruction | +| MCAOVERFLOW | MCA overflow recovery support. | +| MCDT_NO | Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. | +| MCOMMIT | MCOMMIT instruction supported | +| MD_CLEAR | VERW clears CPU buffers | +| MMX | standard MMX | +| MMXEXT | SSE integer functions or AMD MMX ext | +| MOVBE | MOVBE instruction (big-endian) | +| MOVDIR64B | Move 64 Bytes as Direct Store | +| MOVDIRI | Move Doubleword as Direct Store | +| MOVSB_ZL | Fast Zero-Length MOVSB | +| MPX | Intel MPX (Memory Protection Extensions) | +| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | +| MSRIRC | Instruction Retired Counter MSR available | +| MSR_PAGEFLUSH | Page Flush MSR available | +| NRIPS | Indicates support for NRIP save on VMEXIT | +| NX | NX (No-Execute) bit | +| OSXSAVE | XSAVE enabled by OS | +| PCONFIG | PCONFIG for Intel Multi-Key Total Memory Encryption | +| POPCNT | POPCNT instruction | +| PPIN | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled | +| PREFETCHI | PREFETCHIT0/1 instructions | +| PSFD | AMD: Predictive Store Forward Disable | +| RDPRU | RDPRU instruction supported | +| RDRAND | RDRAND instruction is available | +| RDSEED | RDSEED instruction is available | +| RDTSCP | RDTSCP Instruction | +| RTM | Restricted Transactional Memory | +| RTM_ALWAYS_ABORT | Indicates that the loaded microcode is forcing RTM abort. | +| SERIALIZE | Serialize Instruction Execution | +| SEV | AMD Secure Encrypted Virtualization supported | +| SEV_64BIT | AMD SEV guest execution only allowed from a 64-bit host | +| SEV_ALTERNATIVE | AMD SEV Alternate Injection supported | +| SEV_DEBUGSWAP | Full debug state swap supported for SEV-ES guests | +| SEV_ES | AMD SEV Encrypted State supported | +| SEV_RESTRICTED | AMD SEV Restricted Injection supported | +| SEV_SNP | AMD SEV Secure Nested Paging supported | +| SGX | Software Guard Extensions | +| SGXLC | Software Guard Extensions Launch Control | +| SHA | Intel SHA Extensions | +| SME | AMD Secure Memory Encryption supported | +| SME_COHERENT | AMD Hardware cache coherency across encryption domains enforced | +| SPEC_CTRL_SSBD | Speculative Store Bypass Disable | +| SRBDS_CTRL | SRBDS mitigation MSR available | +| SSE | SSE functions | +| SSE2 | P4 SSE functions | +| SSE3 | Prescott SSE3 functions | +| SSE4 | Penryn SSE4.1 functions | +| SSE42 | Nehalem SSE4.2 functions | +| SSE4A | AMD Barcelona microarchitecture SSE4a instructions | +| SSSE3 | Conroe SSSE3 functions | +| STIBP | Single Thread Indirect Branch Predictors | +| STIBP_ALWAYSON | AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On | +| STOSB_SHORT | Fast short STOSB | +| SUCCOR | Software uncorrectable error containment and recovery capability. | +| SVM | AMD Secure Virtual Machine | +| SVMDA | Indicates support for the SVM decode assists. | +| SVMFBASID | SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control | +| SVML | AMD SVM lock. Indicates support for SVM-Lock. | +| SVMNP | AMD SVM nested paging | +| SVMPF | SVM pause intercept filter. Indicates support for the pause intercept filter | +| SVMPFT | SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold | +| SYSCALL | System-Call Extension (SCE): SYSCALL and SYSRET instructions. | +| SYSEE | SYSENTER and SYSEXIT instructions | +| TBM | AMD Trailing Bit Manipulation | +| TLB_FLUSH_NESTED | AMD: Flushing includes all the nested translations for guest translations | +| TME | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. | +| TOPEXT | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. | +| TSCRATEMSR | MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 | +| TSXLDTRK | Intel TSX Suspend Load Address Tracking | +| VAES | Vector AES. AVX(512) versions requires additional checks. | +| VMCBCLEAN | VMCB clean bits. Indicates support for VMCB clean bits. | +| VMPL | AMD VM Permission Levels supported | +| VMSA_REGPROT | AMD VMSA Register Protection supported | +| VMX | Virtual Machine Extensions | +| VPCLMULQDQ | Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. | +| VTE | AMD Virtual Transparent Encryption supported | +| WAITPKG | TPAUSE, UMONITOR, UMWAIT | +| WBNOINVD | Write Back and Do Not Invalidate Cache | +| X87 | FPU | +| XGETBV1 | Supports XGETBV with ECX = 1 | +| XOP | Bulldozer XOP functions | +| XSAVE | XSAVE, XRESTOR, XSETBV, XGETBV | +| XSAVEC | Supports XSAVEC and the compacted form of XRSTOR. | +| XSAVEOPT | XSAVEOPT available | +| XSAVES | Supports XSAVES/XRSTORS and IA32_XSS | + +# ARM features: + +| Feature Flag | Description | +|--------------|------------------------------------------------------------------| +| AESARM | AES instructions | +| ARMCPUID | Some CPU ID registers readable at user-level | +| ASIMD | Advanced SIMD | +| ASIMDDP | SIMD Dot Product | +| ASIMDHP | Advanced SIMD half-precision floating point | +| ASIMDRDM | Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) | +| ATOMICS | Large System Extensions (LSE) | +| CRC32 | CRC32/CRC32C instructions | +| DCPOP | Data cache clean to Point of Persistence (DC CVAP) | +| EVTSTRM | Generic timer | +| FCMA | Floatin point complex number addition and multiplication | +| FP | Single-precision and double-precision floating point | +| FPHP | Half-precision floating point | +| GPA | Generic Pointer Authentication | +| JSCVT | Javascript-style double->int convert (FJCVTZS) | +| LRCPC | Weaker release consistency (LDAPR, etc) | +| PMULL | Polynomial Multiply instructions (PMULL/PMULL2) | +| SHA1 | SHA-1 instructions (SHA1C, etc) | +| SHA2 | SHA-2 instructions (SHA256H, etc) | +| SHA3 | SHA-3 instructions (EOR3, RAXI, XAR, BCAX) | +| SHA512 | SHA512 instructions | +| SM3 | SM3 instructions | +| SM4 | SM4 instructions | +| SVE | Scalable Vector Extension | + # license This code is published under an MIT license. See LICENSE file for more information. diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -73,6 +73,7 @@ const ( AMD3DNOW // AMD 3DNOW AMD3DNOWEXT // AMD 3DNowExt AMXBF16 // Tile computational operations on BFLOAT16 numbers + AMXFP16 // Tile computational operations on FP16 numbers AMXINT8 // Tile computational operations on 8-bit integers AMXTILE // Tile architecture AVX // AVX functions @@ -93,8 +94,11 @@ const ( AVX512VNNI // AVX-512 Vector Neural Network Instructions AVX512VP2INTERSECT // AVX-512 Intersect for D/Q AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword + AVXIFMA // AVX-IFMA instructions + AVXNECONVERT // AVX-NE-CONVERT instructions AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one AVXVNNI // AVX (VEX encoded) VNNI neural network instructions + AVXVNNIINT8 // AVX-VNNI-INT8 instructions BMI1 // Bit Manipulation Instruction Set 1 BMI2 // Bit Manipulation Instruction Set 2 CETIBT // Intel CET Indirect Branch Tracking @@ -103,15 +107,22 @@ const ( CLMUL // Carry-less Multiplication CLZERO // CLZERO instruction supported CMOV // i686 CMOV + CMPCCXADD // CMPCCXADD instructions CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB CMPXCHG8 // CMPXCHG8 instruction CPBOOST // Core Performance Boost + CPPC // AMD: Collaborative Processor Performance Control CX16 // CMPXCHG16B Instruction + EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ ENQCMD // Enqueue Command ERMS // Enhanced REP MOVSB/STOSB F16C // Half-precision floating-point conversion + FLUSH_L1D // Flush L1D cache FMA3 // Intel FMA 3. Does not imply AVX. FMA4 // Bulldozer FMA4 functions + FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide + FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide + FSRM // Fast Short Rep Mov FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 FXSROPT // FXSAVE/FXRSTOR optimizations GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. @@ -119,8 +130,14 @@ const ( HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR HTT // Hyperthreading (enabled) HWA // Hardware assert supported. Indicates support for MSRC001_10 + HYBRID_CPU // This part has CPUs of more than one type. HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors + IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) + IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + IBRS // AMD: Indirect Branch Restricted Speculation + IBRS_PREFERRED // AMD: IBRS is preferred over software solution + IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection IBS // Instruction Based Sampling (AMD) IBSBRNTRGT // Instruction Based Sampling Feature (AMD) IBSFETCHSAM // Instruction Based Sampling Feature (AMD) @@ -130,7 +147,11 @@ const ( IBSOPSAM // Instruction Based Sampling Feature (AMD) IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) + IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported + IBS_OPDATA4 // AMD: IBS op data 4 MSR supported + IBS_OPFUSE // AMD: Indicates support for IbsOpFuse IBS_PREVENTHOST // Disallowing IBS use by the host supported + IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 INT_WBINVD // WBINVD/WBNOINVD are interruptible. INVLPGB // NVLPGB and TLBSYNC instruction supported LAHF // LAHF/SAHF in long mode @@ -138,13 +159,16 @@ const ( LBRVIRT // LBR virtualization LZCNT // LZCNT instruction MCAOVERFLOW // MCA overflow recovery support. + MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. MCOMMIT // MCOMMIT instruction supported + MD_CLEAR // VERW clears CPU buffers MMX // standard MMX MMXEXT // SSE integer functions or AMD MMX ext MOVBE // MOVBE instruction (big-endian) MOVDIR64B // Move 64 Bytes as Direct Store MOVDIRI // Move Doubleword as Direct Store MOVSB_ZL // Fast Zero-Length MOVSB + MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD MPX // Intel MPX (Memory Protection Extensions) MSRIRC // Instruction Retired Counter MSR available MSR_PAGEFLUSH // Page Flush MSR available @@ -153,6 +177,9 @@ const ( OSXSAVE // XSAVE enabled by OS PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption POPCNT // POPCNT instruction + PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled + PREFETCHI // PREFETCHIT0/1 instructions + PSFD // AMD: Predictive Store Forward Disable RDPRU // RDPRU instruction supported RDRAND // RDRAND instruction is available RDSEED // RDSEED instruction is available @@ -172,6 +199,8 @@ const ( SHA // Intel SHA Extensions SME // AMD Secure Memory Encryption supported SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced + SPEC_CTRL_SSBD // Speculative Store Bypass Disable + SRBDS_CTRL // SRBDS mitigation MSR available SSE // SSE functions SSE2 // P4 SSE functions SSE3 // Prescott SSE3 functions @@ -180,6 +209,7 @@ const ( SSE4A // AMD Barcelona microarchitecture SSE4a instructions SSSE3 // Conroe SSSE3 functions STIBP // Single Thread Indirect Branch Predictors + STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On STOSB_SHORT // Fast short STOSB SUCCOR // Software uncorrectable error containment and recovery capability. SVM // AMD Secure Virtual Machine @@ -192,8 +222,9 @@ const ( SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. SYSEE // SYSENTER and SYSEXIT instructions TBM // AMD Trailing Bit Manipulation - TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. + TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. + TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 TSXLDTRK // Intel TSX Suspend Load Address Tracking VAES // Vector AES. AVX(512) versions requires additional checks. @@ -358,7 +389,7 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool { // Has allows for checking a single feature. // Should be inlined by the compiler. -func (c CPUInfo) Has(id FeatureID) bool { +func (c *CPUInfo) Has(id FeatureID) bool { return c.featureSet.inSet(id) } @@ -372,26 +403,47 @@ func (c CPUInfo) AnyOf(ids ...FeatureID) bool { return false } +// Features contains several features combined for a fast check using +// CpuInfo.HasAll +type Features *flagSet + +// CombineFeatures allows to combine several features for a close to constant time lookup. +func CombineFeatures(ids ...FeatureID) Features { + var v flagSet + for _, id := range ids { + v.set(id) + } + return &v +} + +func (c *CPUInfo) HasAll(f Features) bool { + return c.featureSet.hasSetP(f) +} + // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels -var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2) -var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) -var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) -var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) +var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) +var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) +var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) +var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) +var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) // X64Level returns the microarchitecture level detected on the CPU. // If features are lacking or non x64 mode, 0 is returned. // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels func (c CPUInfo) X64Level() int { - if c.featureSet.hasSet(level4Features) { + if !c.featureSet.hasOneOf(oneOfLevel) { + return 0 + } + if c.featureSet.hasSetP(level4Features) { return 4 } - if c.featureSet.hasSet(level3Features) { + if c.featureSet.hasSetP(level3Features) { return 3 } - if c.featureSet.hasSet(level2Features) { + if c.featureSet.hasSetP(level2Features) { return 2 } - if c.featureSet.hasSet(level1Features) { + if c.featureSet.hasSetP(level1Features) { return 1 } return 0 @@ -555,7 +607,7 @@ const flagMask = flagBits - 1 // flagSet contains detected cpu features and characteristics in an array of flags type flagSet [(lastID + flagMask) / flagBits]flags -func (s flagSet) inSet(feat FeatureID) bool { +func (s *flagSet) inSet(feat FeatureID) bool { return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 } @@ -585,7 +637,17 @@ func (s *flagSet) or(other flagSet) { } // hasSet returns whether all features are present. -func (s flagSet) hasSet(other flagSet) bool { +func (s *flagSet) hasSet(other flagSet) bool { + for i, v := range other[:] { + if s[i]&v != v { + return false + } + } + return true +} + +// hasSet returns whether all features are present. +func (s *flagSet) hasSetP(other *flagSet) bool { for i, v := range other[:] { if s[i]&v != v { return false @@ -594,8 +656,18 @@ func (s flagSet) hasSet(other flagSet) bool { return true } +// hasOneOf returns whether one or more features are present. +func (s *flagSet) hasOneOf(other *flagSet) bool { + for i, v := range other[:] { + if s[i]&v != 0 { + return true + } + } + return false +} + // nEnabled will return the number of enabled flags. -func (s flagSet) nEnabled() (n int) { +func (s *flagSet) nEnabled() (n int) { for _, v := range s[:] { n += bits.OnesCount64(uint64(v)) } @@ -1093,21 +1165,36 @@ func support() flagSet { fs.setIf(ecx&(1<<30) != 0, SGXLC) // CPUID.(EAX=7, ECX=0).EDX + fs.setIf(edx&(1<<4) != 0, FSRM) + fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) + fs.setIf(edx&(1<<10) != 0, MD_CLEAR) fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) fs.setIf(edx&(1<<14) != 0, SERIALIZE) + fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) fs.setIf(edx&(1<<16) != 0, TSXLDTRK) fs.setIf(edx&(1<<18) != 0, PCONFIG) fs.setIf(edx&(1<<20) != 0, CETIBT) fs.setIf(edx&(1<<26) != 0, IBPB) fs.setIf(edx&(1<<27) != 0, STIBP) + fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) + fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) + fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) + fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) + + // CPUID.(EAX=7, ECX=1).EDX + fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8) + fs.setIf(edx&(1<<5) != 0, AVXNECONVERT) + fs.setIf(edx&(1<<14) != 0, PREFETCHI) - // CPUID.(EAX=7, ECX=1) + // CPUID.(EAX=7, ECX=1).EAX eax1, _, _, _ := cpuidex(7, 1) fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) + fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) fs.setIf(eax1&(1<<22) != 0, HRESET) + fs.setIf(eax1&(1<<23) != 0, AVXIFMA) fs.setIf(eax1&(1<<26) != 0, LAM) // Only detect AVX-512 features if XGETBV is supported @@ -1145,9 +1232,15 @@ func support() flagSet { fs.setIf(edx&(1<<25) != 0, AMXINT8) // eax1 = CPUID.(EAX=7, ECX=1).EAX fs.setIf(eax1&(1<<5) != 0, AVX512BF16) + fs.setIf(eax1&(1<<21) != 0, AMXFP16) } } + + // CPUID.(EAX=7, ECX=2) + _, _, _, edx = cpuidex(7, 2) + fs.setIf(edx&(1<<5) != 0, MCDT_NO) } + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) // EAX // Bit 00: XSAVEOPT is available. @@ -1212,9 +1305,21 @@ func support() flagSet { if maxExtendedFunction() >= 0x80000008 { _, b, _, _ := cpuid(0x80000008) + fs.setIf(b&(1<<28) != 0, PSFD) + fs.setIf(b&(1<<27) != 0, CPPC) + fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) + fs.setIf(b&(1<<23) != 0, PPIN) + fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) + fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) + fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) + fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) + fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) + fs.setIf(b&(1<<15) != 0, STIBP) + fs.setIf(b&(1<<14) != 0, IBRS) + fs.setIf((b&(1<<13)) != 0, INT_WBINVD) + fs.setIf(b&(1<<12) != 0, IBPB) fs.setIf((b&(1<<9)) != 0, WBNOINVD) fs.setIf((b&(1<<8)) != 0, MCOMMIT) - fs.setIf((b&(1<<13)) != 0, INT_WBINVD) fs.setIf((b&(1<<4)) != 0, RDPRU) fs.setIf((b&(1<<3)) != 0, INVLPGB) fs.setIf((b&(1<<1)) != 0, MSRIRC) @@ -1235,6 +1340,13 @@ func support() flagSet { fs.setIf((edx>>12)&1 == 1, SVMPFT) } + if maxExtendedFunction() >= 0x8000001a { + eax, _, _, _ := cpuid(0x8000001a) + fs.setIf((eax>>0)&1 == 1, FP128) + fs.setIf((eax>>1)&1 == 1, MOVU) + fs.setIf((eax>>2)&1 == 1, FP256) + } + if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { eax, _, _, _ := cpuid(0x8000001b) fs.setIf((eax>>0)&1 == 1, IBSFFV) @@ -1245,6 +1357,10 @@ func support() flagSet { fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) + fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) + fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) + fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. + fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) } if maxExtendedFunction() >= 0x8000001f && vend == AMD { diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go @@ -13,176 +13,207 @@ func _() { _ = x[AMD3DNOW-3] _ = x[AMD3DNOWEXT-4] _ = x[AMXBF16-5] - _ = x[AMXINT8-6] - _ = x[AMXTILE-7] - _ = x[AVX-8] - _ = x[AVX2-9] - _ = x[AVX512BF16-10] - _ = x[AVX512BITALG-11] - _ = x[AVX512BW-12] - _ = x[AVX512CD-13] - _ = x[AVX512DQ-14] - _ = x[AVX512ER-15] - _ = x[AVX512F-16] - _ = x[AVX512FP16-17] - _ = x[AVX512IFMA-18] - _ = x[AVX512PF-19] - _ = x[AVX512VBMI-20] - _ = x[AVX512VBMI2-21] - _ = x[AVX512VL-22] - _ = x[AVX512VNNI-23] - _ = x[AVX512VP2INTERSECT-24] - _ = x[AVX512VPOPCNTDQ-25] - _ = x[AVXSLOW-26] - _ = x[AVXVNNI-27] - _ = x[BMI1-28] - _ = x[BMI2-29] - _ = x[CETIBT-30] - _ = x[CETSS-31] - _ = x[CLDEMOTE-32] - _ = x[CLMUL-33] - _ = x[CLZERO-34] - _ = x[CMOV-35] - _ = x[CMPSB_SCADBS_SHORT-36] - _ = x[CMPXCHG8-37] - _ = x[CPBOOST-38] - _ = x[CX16-39] - _ = x[ENQCMD-40] - _ = x[ERMS-41] - _ = x[F16C-42] - _ = x[FMA3-43] - _ = x[FMA4-44] - _ = x[FXSR-45] - _ = x[FXSROPT-46] - _ = x[GFNI-47] - _ = x[HLE-48] - _ = x[HRESET-49] - _ = x[HTT-50] - _ = x[HWA-51] - _ = x[HYPERVISOR-52] - _ = x[IBPB-53] - _ = x[IBS-54] - _ = x[IBSBRNTRGT-55] - _ = x[IBSFETCHSAM-56] - _ = x[IBSFFV-57] - _ = x[IBSOPCNT-58] - _ = x[IBSOPCNTEXT-59] - _ = x[IBSOPSAM-60] - _ = x[IBSRDWROPCNT-61] - _ = x[IBSRIPINVALIDCHK-62] - _ = x[IBS_PREVENTHOST-63] - _ = x[INT_WBINVD-64] - _ = x[INVLPGB-65] - _ = x[LAHF-66] - _ = x[LAM-67] - _ = x[LBRVIRT-68] - _ = x[LZCNT-69] - _ = x[MCAOVERFLOW-70] - _ = x[MCOMMIT-71] - _ = x[MMX-72] - _ = x[MMXEXT-73] - _ = x[MOVBE-74] - _ = x[MOVDIR64B-75] - _ = x[MOVDIRI-76] - _ = x[MOVSB_ZL-77] - _ = x[MPX-78] - _ = x[MSRIRC-79] - _ = x[MSR_PAGEFLUSH-80] - _ = x[NRIPS-81] - _ = x[NX-82] - _ = x[OSXSAVE-83] - _ = x[PCONFIG-84] - _ = x[POPCNT-85] - _ = x[RDPRU-86] - _ = x[RDRAND-87] - _ = x[RDSEED-88] - _ = x[RDTSCP-89] - _ = x[RTM-90] - _ = x[RTM_ALWAYS_ABORT-91] - _ = x[SERIALIZE-92] - _ = x[SEV-93] - _ = x[SEV_64BIT-94] - _ = x[SEV_ALTERNATIVE-95] - _ = x[SEV_DEBUGSWAP-96] - _ = x[SEV_ES-97] - _ = x[SEV_RESTRICTED-98] - _ = x[SEV_SNP-99] - _ = x[SGX-100] - _ = x[SGXLC-101] - _ = x[SHA-102] - _ = x[SME-103] - _ = x[SME_COHERENT-104] - _ = x[SSE-105] - _ = x[SSE2-106] - _ = x[SSE3-107] - _ = x[SSE4-108] - _ = x[SSE42-109] - _ = x[SSE4A-110] - _ = x[SSSE3-111] - _ = x[STIBP-112] - _ = x[STOSB_SHORT-113] - _ = x[SUCCOR-114] - _ = x[SVM-115] - _ = x[SVMDA-116] - _ = x[SVMFBASID-117] - _ = x[SVML-118] - _ = x[SVMNP-119] - _ = x[SVMPF-120] - _ = x[SVMPFT-121] - _ = x[SYSCALL-122] - _ = x[SYSEE-123] - _ = x[TBM-124] - _ = x[TOPEXT-125] - _ = x[TME-126] - _ = x[TSCRATEMSR-127] - _ = x[TSXLDTRK-128] - _ = x[VAES-129] - _ = x[VMCBCLEAN-130] - _ = x[VMPL-131] - _ = x[VMSA_REGPROT-132] - _ = x[VMX-133] - _ = x[VPCLMULQDQ-134] - _ = x[VTE-135] - _ = x[WAITPKG-136] - _ = x[WBNOINVD-137] - _ = x[X87-138] - _ = x[XGETBV1-139] - _ = x[XOP-140] - _ = x[XSAVE-141] - _ = x[XSAVEC-142] - _ = x[XSAVEOPT-143] - _ = x[XSAVES-144] - _ = x[AESARM-145] - _ = x[ARMCPUID-146] - _ = x[ASIMD-147] - _ = x[ASIMDDP-148] - _ = x[ASIMDHP-149] - _ = x[ASIMDRDM-150] - _ = x[ATOMICS-151] - _ = x[CRC32-152] - _ = x[DCPOP-153] - _ = x[EVTSTRM-154] - _ = x[FCMA-155] - _ = x[FP-156] - _ = x[FPHP-157] - _ = x[GPA-158] - _ = x[JSCVT-159] - _ = x[LRCPC-160] - _ = x[PMULL-161] - _ = x[SHA1-162] - _ = x[SHA2-163] - _ = x[SHA3-164] - _ = x[SHA512-165] - _ = x[SM3-166] - _ = x[SM4-167] - _ = x[SVE-168] - _ = x[lastID-169] + _ = x[AMXFP16-6] + _ = x[AMXINT8-7] + _ = x[AMXTILE-8] + _ = x[AVX-9] + _ = x[AVX2-10] + _ = x[AVX512BF16-11] + _ = x[AVX512BITALG-12] + _ = x[AVX512BW-13] + _ = x[AVX512CD-14] + _ = x[AVX512DQ-15] + _ = x[AVX512ER-16] + _ = x[AVX512F-17] + _ = x[AVX512FP16-18] + _ = x[AVX512IFMA-19] + _ = x[AVX512PF-20] + _ = x[AVX512VBMI-21] + _ = x[AVX512VBMI2-22] + _ = x[AVX512VL-23] + _ = x[AVX512VNNI-24] + _ = x[AVX512VP2INTERSECT-25] + _ = x[AVX512VPOPCNTDQ-26] + _ = x[AVXIFMA-27] + _ = x[AVXNECONVERT-28] + _ = x[AVXSLOW-29] + _ = x[AVXVNNI-30] + _ = x[AVXVNNIINT8-31] + _ = x[BMI1-32] + _ = x[BMI2-33] + _ = x[CETIBT-34] + _ = x[CETSS-35] + _ = x[CLDEMOTE-36] + _ = x[CLMUL-37] + _ = x[CLZERO-38] + _ = x[CMOV-39] + _ = x[CMPCCXADD-40] + _ = x[CMPSB_SCADBS_SHORT-41] + _ = x[CMPXCHG8-42] + _ = x[CPBOOST-43] + _ = x[CPPC-44] + _ = x[CX16-45] + _ = x[EFER_LMSLE_UNS-46] + _ = x[ENQCMD-47] + _ = x[ERMS-48] + _ = x[F16C-49] + _ = x[FLUSH_L1D-50] + _ = x[FMA3-51] + _ = x[FMA4-52] + _ = x[FP128-53] + _ = x[FP256-54] + _ = x[FSRM-55] + _ = x[FXSR-56] + _ = x[FXSROPT-57] + _ = x[GFNI-58] + _ = x[HLE-59] + _ = x[HRESET-60] + _ = x[HTT-61] + _ = x[HWA-62] + _ = x[HYBRID_CPU-63] + _ = x[HYPERVISOR-64] + _ = x[IA32_ARCH_CAP-65] + _ = x[IA32_CORE_CAP-66] + _ = x[IBPB-67] + _ = x[IBRS-68] + _ = x[IBRS_PREFERRED-69] + _ = x[IBRS_PROVIDES_SMP-70] + _ = x[IBS-71] + _ = x[IBSBRNTRGT-72] + _ = x[IBSFETCHSAM-73] + _ = x[IBSFFV-74] + _ = x[IBSOPCNT-75] + _ = x[IBSOPCNTEXT-76] + _ = x[IBSOPSAM-77] + _ = x[IBSRDWROPCNT-78] + _ = x[IBSRIPINVALIDCHK-79] + _ = x[IBS_FETCH_CTLX-80] + _ = x[IBS_OPDATA4-81] + _ = x[IBS_OPFUSE-82] + _ = x[IBS_PREVENTHOST-83] + _ = x[IBS_ZEN4-84] + _ = x[INT_WBINVD-85] + _ = x[INVLPGB-86] + _ = x[LAHF-87] + _ = x[LAM-88] + _ = x[LBRVIRT-89] + _ = x[LZCNT-90] + _ = x[MCAOVERFLOW-91] + _ = x[MCDT_NO-92] + _ = x[MCOMMIT-93] + _ = x[MD_CLEAR-94] + _ = x[MMX-95] + _ = x[MMXEXT-96] + _ = x[MOVBE-97] + _ = x[MOVDIR64B-98] + _ = x[MOVDIRI-99] + _ = x[MOVSB_ZL-100] + _ = x[MOVU-101] + _ = x[MPX-102] + _ = x[MSRIRC-103] + _ = x[MSR_PAGEFLUSH-104] + _ = x[NRIPS-105] + _ = x[NX-106] + _ = x[OSXSAVE-107] + _ = x[PCONFIG-108] + _ = x[POPCNT-109] + _ = x[PPIN-110] + _ = x[PREFETCHI-111] + _ = x[PSFD-112] + _ = x[RDPRU-113] + _ = x[RDRAND-114] + _ = x[RDSEED-115] + _ = x[RDTSCP-116] + _ = x[RTM-117] + _ = x[RTM_ALWAYS_ABORT-118] + _ = x[SERIALIZE-119] + _ = x[SEV-120] + _ = x[SEV_64BIT-121] + _ = x[SEV_ALTERNATIVE-122] + _ = x[SEV_DEBUGSWAP-123] + _ = x[SEV_ES-124] + _ = x[SEV_RESTRICTED-125] + _ = x[SEV_SNP-126] + _ = x[SGX-127] + _ = x[SGXLC-128] + _ = x[SHA-129] + _ = x[SME-130] + _ = x[SME_COHERENT-131] + _ = x[SPEC_CTRL_SSBD-132] + _ = x[SRBDS_CTRL-133] + _ = x[SSE-134] + _ = x[SSE2-135] + _ = x[SSE3-136] + _ = x[SSE4-137] + _ = x[SSE42-138] + _ = x[SSE4A-139] + _ = x[SSSE3-140] + _ = x[STIBP-141] + _ = x[STIBP_ALWAYSON-142] + _ = x[STOSB_SHORT-143] + _ = x[SUCCOR-144] + _ = x[SVM-145] + _ = x[SVMDA-146] + _ = x[SVMFBASID-147] + _ = x[SVML-148] + _ = x[SVMNP-149] + _ = x[SVMPF-150] + _ = x[SVMPFT-151] + _ = x[SYSCALL-152] + _ = x[SYSEE-153] + _ = x[TBM-154] + _ = x[TLB_FLUSH_NESTED-155] + _ = x[TME-156] + _ = x[TOPEXT-157] + _ = x[TSCRATEMSR-158] + _ = x[TSXLDTRK-159] + _ = x[VAES-160] + _ = x[VMCBCLEAN-161] + _ = x[VMPL-162] + _ = x[VMSA_REGPROT-163] + _ = x[VMX-164] + _ = x[VPCLMULQDQ-165] + _ = x[VTE-166] + _ = x[WAITPKG-167] + _ = x[WBNOINVD-168] + _ = x[X87-169] + _ = x[XGETBV1-170] + _ = x[XOP-171] + _ = x[XSAVE-172] + _ = x[XSAVEC-173] + _ = x[XSAVEOPT-174] + _ = x[XSAVES-175] + _ = x[AESARM-176] + _ = x[ARMCPUID-177] + _ = x[ASIMD-178] + _ = x[ASIMDDP-179] + _ = x[ASIMDHP-180] + _ = x[ASIMDRDM-181] + _ = x[ATOMICS-182] + _ = x[CRC32-183] + _ = x[DCPOP-184] + _ = x[EVTSTRM-185] + _ = x[FCMA-186] + _ = x[FP-187] + _ = x[FPHP-188] + _ = x[GPA-189] + _ = x[JSCVT-190] + _ = x[LRCPC-191] + _ = x[PMULL-192] + _ = x[SHA1-193] + _ = x[SHA2-194] + _ = x[SHA3-195] + _ = x[SHA512-196] + _ = x[SM3-197] + _ = x[SM4-198] + _ = x[SVE-199] + _ = x[lastID-200] _ = x[firstID-0] } -const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWAVXVNNIBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHRESETHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_PREVENTHOSTINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTOPEXTTMETSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" +const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4INT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" -var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 237, 241, 245, 251, 256, 264, 269, 275, 279, 297, 305, 312, 316, 322, 326, 330, 334, 338, 342, 349, 353, 356, 362, 365, 368, 378, 382, 385, 395, 406, 412, 420, 431, 439, 451, 467, 482, 492, 499, 503, 506, 513, 518, 529, 536, 539, 545, 550, 559, 566, 574, 577, 583, 596, 601, 603, 610, 617, 623, 628, 634, 640, 646, 649, 665, 674, 677, 686, 701, 714, 720, 734, 741, 744, 749, 752, 755, 767, 770, 774, 778, 782, 787, 792, 797, 802, 813, 819, 822, 827, 836, 840, 845, 850, 856, 863, 868, 871, 877, 880, 890, 898, 902, 911, 915, 927, 930, 940, 943, 950, 958, 961, 968, 971, 976, 982, 990, 996, 1002, 1010, 1015, 1022, 1029, 1037, 1044, 1049, 1054, 1061, 1065, 1067, 1071, 1074, 1079, 1084, 1089, 1093, 1097, 1101, 1107, 1110, 1113, 1116, 1122} +var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 278, 282, 288, 293, 301, 306, 312, 316, 325, 343, 351, 358, 362, 366, 380, 386, 390, 394, 403, 407, 411, 416, 421, 425, 429, 436, 440, 443, 449, 452, 455, 465, 475, 488, 501, 505, 509, 523, 540, 543, 553, 564, 570, 578, 589, 597, 609, 625, 639, 650, 660, 675, 683, 693, 700, 704, 707, 714, 719, 730, 737, 744, 752, 755, 761, 766, 775, 782, 790, 794, 797, 803, 816, 821, 823, 830, 837, 843, 847, 856, 860, 865, 871, 877, 883, 886, 902, 911, 914, 923, 938, 951, 957, 971, 978, 981, 986, 989, 992, 1004, 1018, 1028, 1031, 1035, 1039, 1043, 1048, 1053, 1058, 1063, 1077, 1088, 1094, 1097, 1102, 1111, 1115, 1120, 1125, 1131, 1138, 1143, 1146, 1162, 1165, 1171, 1181, 1189, 1193, 1202, 1206, 1218, 1221, 1231, 1234, 1241, 1249, 1252, 1259, 1262, 1267, 1273, 1281, 1287, 1293, 1301, 1306, 1313, 1320, 1328, 1335, 1340, 1345, 1352, 1356, 1358, 1362, 1365, 1370, 1375, 1380, 1384, 1388, 1392, 1398, 1401, 1404, 1407, 1413} func (i FeatureID) String() string { if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { diff --git a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go @@ -83,7 +83,7 @@ func tryToFillCPUInfoFomSysctl(c *CPUInfo) { c.Model = sysctlGetInt(0, "machdep.cpu.model") c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize") c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize") - c.Cache.L1D = sysctlGetInt64(-1, "hw.l1icachesize") + c.Cache.L1D = sysctlGetInt64(-1, "hw.l1dcachesize") c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize") c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize") diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object.go b/vendor/github.com/minio/minio-go/v7/api-put-object.go @@ -93,6 +93,28 @@ type PutObjectOptions struct { // This can be used for faster uploads on non-seekable or slow-to-seek input. ConcurrentStreamParts bool Internal AdvancedPutOptions + + customHeaders http.Header +} + +// SetMatchETag if etag matches while PUT MinIO returns an error +// this is a MinIO specific extension to support optimistic locking +// semantics. +func (opts *PutObjectOptions) SetMatchETag(etag string) { + if opts.customHeaders == nil { + opts.customHeaders = http.Header{} + } + opts.customHeaders.Set("If-Match", "\""+etag+"\"") +} + +// SetMatchETagExcept if etag does not match while PUT MinIO returns an +// error this is a MinIO specific extension to support optimistic locking +// semantics. +func (opts *PutObjectOptions) SetMatchETagExcept(etag string) { + if opts.customHeaders == nil { + opts.customHeaders = http.Header{} + } + opts.customHeaders.Set("If-None-Match", "\""+etag+"\"") } // getNumThreads - gets the number of threads to be used in the multipart @@ -187,6 +209,12 @@ func (opts PutObjectOptions) Header() (header http.Header) { header.Set("x-amz-meta-"+k, v) } } + + // set any other additional custom headers. + for k, v := range opts.customHeaders { + header[k] = v + } + return } diff --git a/vendor/github.com/minio/minio-go/v7/api.go b/vendor/github.com/minio/minio-go/v7/api.go @@ -118,7 +118,7 @@ type Options struct { // Global constants. const ( libraryName = "minio-go" - libraryVersion = "v7.0.48" + libraryVersion = "v7.0.49" ) // User Agent should always following the below style. diff --git a/vendor/modules.txt b/vendor/modules.txt @@ -163,8 +163,8 @@ github.com/dsoprea/go-png-image-structure/v2 ## explicit; go 1.12 github.com/dsoprea/go-utility/v2/filesystem github.com/dsoprea/go-utility/v2/image -# github.com/dustin/go-humanize v1.0.0 -## explicit +# github.com/dustin/go-humanize v1.0.1 +## explicit; go 1.16 github.com/dustin/go-humanize # github.com/fsnotify/fsnotify v1.6.0 ## explicit; go 1.16 @@ -324,14 +324,14 @@ github.com/json-iterator/go # github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 ## explicit github.com/kballard/go-shellquote -# github.com/klauspost/compress v1.15.9 -## explicit; go 1.16 +# github.com/klauspost/compress v1.15.15 +## explicit; go 1.17 github.com/klauspost/compress/flate github.com/klauspost/compress/gzip github.com/klauspost/compress/s2 github.com/klauspost/compress/snappy github.com/klauspost/compress/zlib -# github.com/klauspost/cpuid/v2 v2.1.1 +# github.com/klauspost/cpuid/v2 v2.2.3 ## explicit; go 1.15 github.com/klauspost/cpuid/v2 # github.com/leodido/go-urn v1.2.1 @@ -353,7 +353,7 @@ github.com/miekg/dns # github.com/minio/md5-simd v1.1.2 ## explicit; go 1.14 github.com/minio/md5-simd -# github.com/minio/minio-go/v7 v7.0.48 +# github.com/minio/minio-go/v7 v7.0.49 ## explicit; go 1.17 github.com/minio/minio-go/v7 github.com/minio/minio-go/v7/pkg/credentials