Bump github.com/containerd/stargz-snapshotter/estargz (#431)

Bumps [github.com/containerd/stargz-snapshotter/estargz](https://github.com/containerd/stargz-snapshotter) from 0.7.0 to 0.8.0. - [Release notes](https://github.com/containerd/stargz-snapshotter/releases) - [Commits](https://github.com/containerd/stargz-snapshotter/compare/v0.7.0...v0.8.0) --- updated-dependencies: - dependency-name: github.com/containerd/stargz-snapshotter/estargz dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-17 20:47:51 +02:00 · 2021-09-06 06:23:51 -04:00 · 2021-09-06 06:23:51 -04:00 · 7f145a7e10
commit 7f145a7e10
parent a965b407b2
54 changed files with 3853 additions and 3114 deletions
--- a/go.mod
+++ b/go.mod
@ -3,17 +3,15 @@ module github.com/google/ko
 go 1.15

 require (
-	github.com/containerd/stargz-snapshotter/estargz v0.7.0
+	github.com/containerd/stargz-snapshotter/estargz v0.8.0
 	github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
 	github.com/docker/docker v20.10.8+incompatible
 	github.com/dprotaso/go-yit v0.0.0-20191028211022-135eb7262960
 	github.com/evanphx/json-patch/v5 v5.5.0 // indirect
 	github.com/fsnotify/fsnotify v1.5.1
 	github.com/go-training/helloworld v0.0.0-20200225145412-ba5f4379d78b
-	github.com/golang/snappy v0.0.4 // indirect
 	github.com/google/go-cmp v0.5.6
 	github.com/google/go-containerregistry v0.6.0
-	github.com/klauspost/compress v1.13.1 // indirect
 	github.com/mattmoor/dep-notify v0.0.0-20190205035814-a45dec370a17
 	github.com/mattn/go-isatty v0.0.13 // indirect
 	github.com/opencontainers/image-spec v1.0.2-0.20210730191737-8e42a01fb1b7
--- a/go.sum
+++ b/go.sum
@ -185,8 +185,9 @@ github.com/containerd/imgcrypt v1.1.1/go.mod h1:xpLnwiQmEUJPvQoAapeb2SNCxz7Xr6PJ
 github.com/containerd/nri v0.0.0-20201007170849-eb1350a75164/go.mod h1:+2wGSDGFYfE5+So4M5syatU0N0f0LbWpuqyMi4/BE8c=
 github.com/containerd/nri v0.0.0-20210316161719-dbaa18c31c14/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY=
 github.com/containerd/nri v0.1.0/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY=
-github.com/containerd/stargz-snapshotter/estargz v0.7.0 h1:1d/rydzTywc76lnjJb6qbPCiTiCwts49AzKps/Ecblw=
 github.com/containerd/stargz-snapshotter/estargz v0.7.0/go.mod h1:83VWDqHnurTKliEB0YvWMiCfLDwv4Cjj1X9Vk98GJZw=
+github.com/containerd/stargz-snapshotter/estargz v0.8.0 h1:oA1wx8kTFfImfsT5bScbrZd8gK+WtQnn15q82Djvm0Y=
+github.com/containerd/stargz-snapshotter/estargz v0.8.0/go.mod h1:mwIwuwb+D8FX2t45Trwi0hmWmZm5VW7zPP/rekwhWQU=
 github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
 github.com/containerd/ttrpc v0.0.0-20190828172938-92c8520ef9f8/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
 github.com/containerd/ttrpc v0.0.0-20191028202541-4f1b8fe65a5c/go.mod h1:LPm1u0xBw8r8NOKoOdNMeVHSawSsltak+Ihv+etqsE8=
@ -366,8 +367,6 @@ github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx
 github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
 github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
-github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@ -483,8 +482,8 @@ github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYs
 github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
 github.com/klauspost/compress v1.13.0/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
-github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
-github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
+github.com/klauspost/compress v1.13.5 h1:9O69jUPDcsT9fEm74W92rZL9FQY7rCdaXVneq+yyzl4=
+github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/build.go
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/build.go
@ -26,7 +26,6 @@ import (
 	"archive/tar"
 	"bytes"
 	"compress/gzip"
-	"encoding/json"
 	"fmt"
 	"io"
 	"io/ioutil"
@ -48,6 +47,7 @@ type options struct {
 	compressionLevel       int
 	prioritizedFiles       []string
 	missedPrioritizedFiles *[]string
+	compression            Compression
 }

 type Option func(o *options) error
@ -95,6 +95,15 @@ func WithAllowPrioritizeNotFound(missedFiles *[]string) Option {
 	}
 }

+// WithCompression specifies compression algorithm to be used.
+// Default is gzip.
+func WithCompression(compression Compression) Option {
+	return func(o *options) error {
+		o.compression = compression
+		return nil
+	}
+}
+
 // Blob is an eStargz blob.
 type Blob struct {
 	io.ReadCloser
@ -126,6 +135,9 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
 			return nil, err
 		}
 	}
+	if opts.compression == nil {
+		opts.compression = newGzipCompressionWithLevel(opts.compressionLevel)
+	}
 	layerFiles := newTempFiles()
 	defer func() {
 		if rErr != nil {
@ -155,7 +167,7 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
 			if err != nil {
 				return err
 			}
-			sw := NewWriterLevel(esgzFile, opts.compressionLevel)
+			sw := NewWriterWithCompressor(esgzFile, opts.compression)
 			sw.ChunkSize = opts.chunkSize
 			if err := sw.AppendTar(readerFromEntries(parts...)); err != nil {
 				return err
@ -187,11 +199,12 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
 	diffID := digest.Canonical.Digester()
 	pr, pw := io.Pipe()
 	go func() {
-		r, err := gzip.NewReader(io.TeeReader(io.MultiReader(append(rs, tocAndFooter)...), pw))
+		r, err := opts.compression.Reader(io.TeeReader(io.MultiReader(append(rs, tocAndFooter)...), pw))
 		if err != nil {
 			pw.CloseWithError(err)
 			return
 		}
+		defer r.Close()
 		if _, err := io.Copy(diffID.Hash(), r); err != nil {
 			pw.CloseWithError(err)
 			return
@ -213,7 +226,7 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
 // Writers doesn't write TOC and footer to the underlying writers so they can be
 // combined into a single eStargz and tocAndFooter returned by this function can
 // be appended at the tail of that combined blob.
-func closeWithCombine(compressionLevel int, ws ...*Writer) (tocAndFooter io.Reader, tocDgst digest.Digest, err error) {
+func closeWithCombine(compressionLevel int, ws ...*Writer) (tocAndFooterR io.Reader, tocDgst digest.Digest, err error) {
 	if len(ws) == 0 {
 		return nil, "", fmt.Errorf("at least one writer must be passed")
 	}
@ -230,7 +243,7 @@ func closeWithCombine(compressionLevel int, ws ...*Writer) (tocAndFooter io.Read
 		}
 	}
 	var (
-		mtoc          = new(jtoc)
+		mtoc          = new(JTOC)
 		currentOffset int64
 	)
 	mtoc.Version = ws[0].toc.Version
@ -248,40 +261,16 @@ func closeWithCombine(compressionLevel int, ws ...*Writer) (tocAndFooter io.Read
 		currentOffset += w.cw.n
 	}

-	tocJSON, err := json.MarshalIndent(mtoc, "", "\t")
+	return tocAndFooter(ws[0].compressor, mtoc, currentOffset)
+}
+
+func tocAndFooter(compressor Compressor, toc *JTOC, offset int64) (io.Reader, digest.Digest, error) {
+	buf := new(bytes.Buffer)
+	tocDigest, err := compressor.WriteTOCAndFooter(buf, offset, toc, nil)
 	if err != nil {
 		return nil, "", err
 	}
-	pr, pw := io.Pipe()
-	go func() {
-		zw, _ := gzip.NewWriterLevel(pw, compressionLevel)
-		tw := tar.NewWriter(zw)
-		if err := tw.WriteHeader(&tar.Header{
-			Typeflag: tar.TypeReg,
-			Name:     TOCTarName,
-			Size:     int64(len(tocJSON)),
-		}); err != nil {
-			pw.CloseWithError(err)
-			return
-		}
-		if _, err := tw.Write(tocJSON); err != nil {
-			pw.CloseWithError(err)
-			return
-		}
-		if err := tw.Close(); err != nil {
-			pw.CloseWithError(err)
-			return
-		}
-		if err := zw.Close(); err != nil {
-			pw.CloseWithError(err)
-			return
-		}
-		pw.Close()
-	}()
-	return io.MultiReader(
-		pr,
-		bytes.NewReader(footerBytes(currentOffset)),
-	), digest.FromBytes(tocJSON), nil
+	return buf, tocDigest, nil
 }

 // divideEntries divides passed entries to the parts at least the number specified by the
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go
@ -28,8 +28,6 @@ import (
 	"bytes"
 	"compress/gzip"
 	"crypto/sha256"
-	"encoding/binary"
-	"encoding/json"
 	"fmt"
 	"hash"
 	"io"
@ -37,7 +35,6 @@ import (
 	"os"
 	"path"
 	"sort"
-	"strconv"
 	"strings"
 	"sync"
 	"time"
@ -50,7 +47,7 @@ import (
 // A Reader permits random access reads from a stargz file.
 type Reader struct {
 	sr        *io.SectionReader
-	toc       *jtoc
+	toc       *JTOC
 	tocDigest digest.Digest

 	// m stores all non-chunk entries, keyed by name.
@ -60,39 +57,116 @@ type Reader struct {
 	// are split up. For a file with a single chunk, it's only
 	// stored in m.
 	chunks map[string][]*TOCEntry
+
+	decompressor Decompressor
+}
+
+type openOpts struct {
+	tocOffset     int64
+	decompressors []Decompressor
+	telemetry     *Telemetry
+}
+
+// OpenOption is an option used during opening the layer
+type OpenOption func(o *openOpts) error
+
+// WithTOCOffset option specifies the offset of TOC
+func WithTOCOffset(tocOffset int64) OpenOption {
+	return func(o *openOpts) error {
+		o.tocOffset = tocOffset
+		return nil
+	}
+}
+
+// WithDecompressors option specifies decompressors to use.
+// Default is gzip-based decompressor.
+func WithDecompressors(decompressors ...Decompressor) OpenOption {
+	return func(o *openOpts) error {
+		o.decompressors = decompressors
+		return nil
+	}
+}
+
+// WithTelemetry option specifies the telemetry hooks
+func WithTelemetry(telemetry *Telemetry) OpenOption {
+	return func(o *openOpts) error {
+		o.telemetry = telemetry
+		return nil
+	}
+}
+
+// A func which takes start time and records the diff
+type MeasureLatencyHook func(time.Time)
+
+// A struct which defines telemetry hooks. By implementing these hooks you should be able to record
+// the latency metrics of the respective steps of estargz open operation. To be used with estargz.OpenWithTelemetry(...)
+type Telemetry struct {
+	GetFooterLatency      MeasureLatencyHook // measure time to get stargz footer (in milliseconds)
+	GetTocLatency         MeasureLatencyHook // measure time to GET TOC JSON (in milliseconds)
+	DeserializeTocLatency MeasureLatencyHook // measure time to deserialize TOC JSON (in milliseconds)
 }

 // Open opens a stargz file for reading.
+// The behaviour is configurable using options.
 //
 // Note that each entry name is normalized as the path that is relative to root.
-func Open(sr *io.SectionReader) (*Reader, error) {
-	tocOff, footerSize, err := OpenFooter(sr)
-	if err != nil {
-		return nil, errors.Wrapf(err, "error parsing footer")
+func Open(sr *io.SectionReader, opt ...OpenOption) (*Reader, error) {
+	var opts openOpts
+	for _, o := range opt {
+		if err := o(&opts); err != nil {
+			return nil, err
+		}
 	}
-	tocTargz := make([]byte, sr.Size()-tocOff-footerSize)
-	if _, err := sr.ReadAt(tocTargz, tocOff); err != nil {
-		return nil, fmt.Errorf("error reading %d byte TOC targz: %v", len(tocTargz), err)
+
+	gzipCompressors := []Decompressor{new(GzipDecompressor), new(legacyGzipDecompressor)}
+	decompressors := append(gzipCompressors, opts.decompressors...)
+
+	// Determine the size to fetch. Try to fetch as many bytes as possible.
+	fetchSize := maxFooterSize(sr.Size(), decompressors...)
+	if maybeTocOffset := opts.tocOffset; maybeTocOffset > fetchSize {
+		if maybeTocOffset > sr.Size() {
+			return nil, fmt.Errorf("blob size %d is smaller than the toc offset", sr.Size())
+		}
+		fetchSize = sr.Size() - maybeTocOffset
 	}
-	zr, err := gzip.NewReader(bytes.NewReader(tocTargz))
-	if err != nil {
-		return nil, fmt.Errorf("malformed TOC gzip header: %v", err)
+
+	start := time.Now() // before getting layer footer
+	footer := make([]byte, fetchSize)
+	if _, err := sr.ReadAt(footer, sr.Size()-fetchSize); err != nil {
+		return nil, fmt.Errorf("error reading footer: %v", err)
 	}
-	zr.Multistream(false)
-	tr := tar.NewReader(zr)
-	h, err := tr.Next()
-	if err != nil {
-		return nil, fmt.Errorf("failed to find tar header in TOC gzip stream: %v", err)
+	if opts.telemetry != nil && opts.telemetry.GetFooterLatency != nil {
+		opts.telemetry.GetFooterLatency(start)
 	}
-	if h.Name != TOCTarName {
-		return nil, fmt.Errorf("TOC tar entry had name %q; expected %q", h.Name, TOCTarName)
+
+	var allErr []error
+	var found bool
+	var r *Reader
+	for _, d := range decompressors {
+		fSize := d.FooterSize()
+		fOffset := positive(int64(len(footer)) - fSize)
+		maybeTocBytes := footer[:fOffset]
+		tocOffset, tocSize, err := d.ParseFooter(footer[fOffset:])
+		if err != nil {
+			allErr = append(allErr, err)
+			continue
+		}
+		if tocSize <= 0 {
+			tocSize = sr.Size() - tocOffset - fSize
+		}
+		if tocSize < int64(len(maybeTocBytes)) {
+			maybeTocBytes = maybeTocBytes[:tocSize]
+		}
+		r, err = parseTOC(d, sr, tocOffset, tocSize, maybeTocBytes, opts)
+		if err == nil {
+			found = true
+			break
+		}
+		allErr = append(allErr, err)
 	}
-	dgstr := digest.Canonical.Digester()
-	toc := new(jtoc)
-	if err := json.NewDecoder(io.TeeReader(tr, dgstr.Hash())).Decode(&toc); err != nil {
-		return nil, fmt.Errorf("error decoding TOC JSON: %v", err)
+	if !found {
+		return nil, errorutil.Aggregate(allErr)
 	}
-	r := &Reader{sr: sr, toc: toc, tocDigest: dgstr.Digest()}
 	if err := r.initFields(); err != nil {
 		return nil, fmt.Errorf("failed to initialize fields of entries: %v", err)
 	}
@ -100,17 +174,26 @@ func Open(sr *io.SectionReader) (*Reader, error) {
 }

 // OpenFooter extracts and parses footer from the given blob.
+// only supports gzip-based eStargz.
 func OpenFooter(sr *io.SectionReader) (tocOffset int64, footerSize int64, rErr error) {
 	if sr.Size() < FooterSize && sr.Size() < legacyFooterSize {
 		return 0, 0, fmt.Errorf("blob size %d is smaller than the footer size", sr.Size())
 	}
-	// TODO: read a bigger chunk (1MB?) at once here to hopefully
-	// get the TOC + footer in one go.
 	var footer [FooterSize]byte
 	if _, err := sr.ReadAt(footer[:], sr.Size()-FooterSize); err != nil {
 		return 0, 0, fmt.Errorf("error reading footer: %v", err)
 	}
-	return parseFooter(footer[:])
+	var allErr []error
+	for _, d := range []Decompressor{new(GzipDecompressor), new(legacyGzipDecompressor)} {
+		fSize := d.FooterSize()
+		fOffset := positive(int64(len(footer)) - fSize)
+		tocOffset, _, err := d.ParseFooter(footer[fOffset:])
+		if err == nil {
+			return tocOffset, fSize, err
+		}
+		allErr = append(allErr, err)
+	}
+	return 0, 0, errorutil.Aggregate(allErr)
 }

 // initFields populates the Reader from r.toc after decoding it from
@ -252,33 +335,68 @@ func (r *Reader) VerifyTOC(tocDigest digest.Digest) (TOCEntryVerifier, error) {
 	if r.tocDigest != tocDigest {
 		return nil, fmt.Errorf("invalid TOC JSON %q; want %q", r.tocDigest, tocDigest)
 	}
-	digestMap := make(map[int64]digest.Digest) // map from chunk offset to the digest
+
+	chunkDigestMap := make(map[int64]digest.Digest) // map from chunk offset to the chunk digest
+	regDigestMap := make(map[int64]digest.Digest)   // map from chunk offset to the reg file digest
+	var chunkDigestMapIncomplete bool
+	var regDigestMapIncomplete bool
+	var containsChunk bool
 	for _, e := range r.toc.Entries {
-		if e.Type == "reg" || e.Type == "chunk" {
-			if e.Type == "reg" && e.Size == 0 {
+		if e.Type != "reg" && e.Type != "chunk" {
+			continue
+		}
+
+		// offset must be unique in stargz blob
+		_, dOK := chunkDigestMap[e.Offset]
+		_, rOK := regDigestMap[e.Offset]
+		if dOK || rOK {
+			return nil, fmt.Errorf("offset %d found twice", e.Offset)
+		}
+
+		if e.Type == "reg" {
+			if e.Size == 0 {
 				continue // ignores empty file
 			}

-			// offset must be unique in stargz blob
-			if _, ok := digestMap[e.Offset]; ok {
-				return nil, fmt.Errorf("offset %d found twice", e.Offset)
-			}
-
-			// all chunk entries must contain digest
-			if e.ChunkDigest == "" {
-				return nil, fmt.Errorf("ChunkDigest of %q(off=%d) not found in TOC JSON",
-					e.Name, e.Offset)
+			// record the digest of regular file payload
+			if e.Digest != "" {
+				d, err := digest.Parse(e.Digest)
+				if err != nil {
+					return nil, errors.Wrapf(err,
+						"failed to parse regular file digest %q", e.Digest)
+				}
+				regDigestMap[e.Offset] = d
+			} else {
+				regDigestMapIncomplete = true
 			}
+		} else {
+			containsChunk = true // this layer contains "chunk" entries.
+		}

+		// "reg" also can contain ChunkDigest (e.g. when "reg" is the first entry of
+		// chunked file)
+		if e.ChunkDigest != "" {
 			d, err := digest.Parse(e.ChunkDigest)
 			if err != nil {
-				return nil, errors.Wrapf(err, "failed to parse digest %q", e.ChunkDigest)
+				return nil, errors.Wrapf(err,
+					"failed to parse chunk digest %q", e.ChunkDigest)
 			}
-			digestMap[e.Offset] = d
+			chunkDigestMap[e.Offset] = d
+		} else {
+			chunkDigestMapIncomplete = true
 		}
 	}

-	return &verifier{digestMap: digestMap}, nil
+	if chunkDigestMapIncomplete {
+		// Though some chunk digests are not found, if this layer doesn't contain
+		// "chunk"s and all digest of "reg" files are recorded, we can use them instead.
+		if !containsChunk && !regDigestMapIncomplete {
+			return &verifier{digestMap: regDigestMap}, nil
+		}
+		return nil, fmt.Errorf("some ChunkDigest not found in TOC JSON")
+	}
+
+	return &verifier{digestMap: chunkDigestMap}, nil
 }

 // verifier is an implementation of TOCEntryVerifier which holds verifiers keyed by
@ -413,17 +531,17 @@ func (fr *fileReader) ReadAt(p []byte, off int64) (n int, err error) {
 	off -= ent.ChunkOffset

 	finalEnt := fr.ents[len(fr.ents)-1]
-	gzOff := ent.Offset
-	// gzBytesRemain is the number of compressed gzip bytes in this
-	// file remaining, over 1+ gzip chunks.
-	gzBytesRemain := finalEnt.NextOffset() - gzOff
+	compressedOff := ent.Offset
+	// compressedBytesRemain is the number of compressed bytes in this
+	// file remaining, over 1+ chunks.
+	compressedBytesRemain := finalEnt.NextOffset() - compressedOff

-	sr := io.NewSectionReader(fr.r.sr, gzOff, gzBytesRemain)
+	sr := io.NewSectionReader(fr.r.sr, compressedOff, compressedBytesRemain)

-	const maxGZread = 2 << 20
-	var bufSize = maxGZread
-	if gzBytesRemain < maxGZread {
-		bufSize = int(gzBytesRemain)
+	const maxRead = 2 << 20
+	var bufSize = maxRead
+	if compressedBytesRemain < maxRead {
+		bufSize = int(compressedBytesRemain)
 	}

 	br := bufio.NewReaderSize(sr, bufSize)
@ -431,14 +549,15 @@ func (fr *fileReader) ReadAt(p []byte, off int64) (n int, err error) {
 		return 0, fmt.Errorf("fileReader.ReadAt.peek: %v", err)
 	}

-	gz, err := gzip.NewReader(br)
+	dr, err := fr.r.decompressor.Reader(br)
 	if err != nil {
-		return 0, fmt.Errorf("fileReader.ReadAt.gzipNewReader: %v", err)
+		return 0, fmt.Errorf("fileReader.ReadAt.decompressor.Reader: %v", err)
 	}
-	if n, err := io.CopyN(ioutil.Discard, gz, off); n != off || err != nil {
+	defer dr.Close()
+	if n, err := io.CopyN(ioutil.Discard, dr, off); n != off || err != nil {
 		return 0, fmt.Errorf("discard of %d bytes = %v, %v", off, n, err)
 	}
-	return io.ReadFull(gz, p)
+	return io.ReadFull(dr, p)
 }

 // A Writer writes stargz files.
@ -447,14 +566,14 @@ func (fr *fileReader) ReadAt(p []byte, off int64) (n int, err error) {
 type Writer struct {
 	bw       *bufio.Writer
 	cw       *countWriter
-	toc      *jtoc
+	toc      *JTOC
 	diffHash hash.Hash // SHA-256 of uncompressed tar

-	closed           bool
-	gz               *gzip.Writer
-	lastUsername     map[int]string
-	lastGroupname    map[int]string
-	compressionLevel int
+	closed        bool
+	gz            io.WriteCloser
+	lastUsername  map[int]string
+	lastGroupname map[int]string
+	compressor    Compressor

 	// ChunkSize optionally controls the maximum number of bytes
 	// of data of a regular file that can be written in one gzip
@ -463,16 +582,16 @@ type Writer struct {
 	ChunkSize int
 }

-// currentGzipWriter writes to the current w.gz field, which can
+// currentCompressionWriter writes to the current w.gz field, which can
 // change throughout writing a tar entry.
 //
 // Additionally, it updates w's SHA-256 of the uncompressed bytes
 // of the tar file.
-type currentGzipWriter struct{ w *Writer }
+type currentCompressionWriter struct{ w *Writer }

-func (cgw currentGzipWriter) Write(p []byte) (int, error) {
-	cgw.w.diffHash.Write(p)
-	return cgw.w.gz.Write(p)
+func (ccw currentCompressionWriter) Write(p []byte) (int, error) {
+	ccw.w.diffHash.Write(p)
+	return ccw.w.gz.Write(p)
 }

 func (w *Writer) chunkSize() int {
@ -482,26 +601,34 @@ func (w *Writer) chunkSize() int {
 	return w.ChunkSize
 }

-// NewWriter returns a new stargz writer writing to w.
+// NewWriter returns a new stargz writer (gzip-based) writing to w.
 //
 // The writer must be closed to write its trailing table of contents.
 func NewWriter(w io.Writer) *Writer {
 	return NewWriterLevel(w, gzip.BestCompression)
 }

-// NewWriterLevel returns a new stargz writer writing to w.
+// NewWriterLevel returns a new stargz writer (gzip-based) writing to w.
 // The compression level is configurable.
 //
 // The writer must be closed to write its trailing table of contents.
 func NewWriterLevel(w io.Writer, compressionLevel int) *Writer {
+	return NewWriterWithCompressor(w, NewGzipCompressorWithLevel(compressionLevel))
+}
+
+// NewWriterLevel returns a new stargz writer writing to w.
+// The compression method is configurable.
+//
+// The writer must be closed to write its trailing table of contents.
+func NewWriterWithCompressor(w io.Writer, c Compressor) *Writer {
 	bw := bufio.NewWriter(w)
 	cw := &countWriter{w: bw}
 	return &Writer{
-		bw:               bw,
-		cw:               cw,
-		toc:              &jtoc{Version: 1},
-		diffHash:         sha256.New(),
-		compressionLevel: compressionLevel,
+		bw:         bw,
+		cw:         cw,
+		toc:        &JTOC{Version: 1},
+		diffHash:   sha256.New(),
+		compressor: c,
 	}
 }

@ -517,42 +644,16 @@ func (w *Writer) Close() (digest.Digest, error) {
 		return "", err
 	}

-	// Write the TOC index.
-	tocOff := w.cw.n
-	w.gz, _ = gzip.NewWriterLevel(w.cw, w.compressionLevel)
-	tw := tar.NewWriter(currentGzipWriter{w})
-	tocJSON, err := json.MarshalIndent(w.toc, "", "\t")
+	// Write the TOC index and footer.
+	tocDigest, err := w.compressor.WriteTOCAndFooter(w.cw, w.cw.n, w.toc, w.diffHash)
 	if err != nil {
 		return "", err
 	}
-	if err := tw.WriteHeader(&tar.Header{
-		Typeflag: tar.TypeReg,
-		Name:     TOCTarName,
-		Size:     int64(len(tocJSON)),
-	}); err != nil {
-		return "", err
-	}
-	if _, err := tw.Write(tocJSON); err != nil {
-		return "", err
-	}
-
-	if err := tw.Close(); err != nil {
-		return "", err
-	}
-	if err := w.closeGz(); err != nil {
-		return "", err
-	}
-
-	// And a little footer with pointer to the TOC gzip stream.
-	if _, err := w.bw.Write(footerBytes(tocOff)); err != nil {
-		return "", err
-	}
-
 	if err := w.bw.Flush(); err != nil {
 		return "", err
 	}

-	return digest.FromBytes(tocJSON), nil
+	return tocDigest, nil
 }

 func (w *Writer) closeGz() error {
@ -584,10 +685,11 @@ func (w *Writer) nameIfChanged(mp *map[int]string, id int, name string) string {
 	return name
 }

-func (w *Writer) condOpenGz() {
+func (w *Writer) condOpenGz() (err error) {
 	if w.gz == nil {
-		w.gz, _ = gzip.NewWriterLevel(w.cw, w.compressionLevel)
+		w.gz, err = w.compressor.Writer(w.cw)
 	}
+	return
 }

 // AppendTar reads the tar or tar.gz file from r and appends
@ -639,8 +741,10 @@ func (w *Writer) AppendTar(r io.Reader) error {
 			ModTime3339: formatModtime(h.ModTime),
 			Xattrs:      xattrs,
 		}
-		w.condOpenGz()
-		tw := tar.NewWriter(currentGzipWriter{w})
+		if err := w.condOpenGz(); err != nil {
+			return err
+		}
+		tw := tar.NewWriter(currentCompressionWriter{w})
 		if err := tw.WriteHeader(h); err != nil {
 			return err
 		}
@ -699,7 +803,9 @@ func (w *Writer) AppendTar(r io.Reader) error {
 				ent.ChunkOffset = written
 				chunkDigest := digest.Canonical.Digester()

-				w.condOpenGz()
+				if err := w.condOpenGz(); err != nil {
+					return err
+				}

 				teeChunk := io.TeeReader(tee, chunkDigest.Hash())
 				if _, err := io.CopyN(tw, teeChunk, chunkSize); err != nil {
@ -732,83 +838,54 @@ func (w *Writer) DiffID() string {
 	return fmt.Sprintf("sha256:%x", w.diffHash.Sum(nil))
 }

-// footerBytes returns the 51 bytes footer.
-func footerBytes(tocOff int64) []byte {
-	buf := bytes.NewBuffer(make([]byte, 0, FooterSize))
-	gz, _ := gzip.NewWriterLevel(buf, gzip.NoCompression) // MUST be NoCompression to keep 51 bytes
-
-	// Extra header indicating the offset of TOCJSON
-	// https://tools.ietf.org/html/rfc1952#section-2.3.1.1
-	header := make([]byte, 4)
-	header[0], header[1] = 'S', 'G'
-	subfield := fmt.Sprintf("%016xSTARGZ", tocOff)
-	binary.LittleEndian.PutUint16(header[2:4], uint16(len(subfield))) // little-endian per RFC1952
-	gz.Header.Extra = append(header, []byte(subfield)...)
-	gz.Close()
-	if buf.Len() != FooterSize {
-		panic(fmt.Sprintf("footer buffer = %d, not %d", buf.Len(), FooterSize))
+func maxFooterSize(blobSize int64, decompressors ...Decompressor) (res int64) {
+	for _, d := range decompressors {
+		if s := d.FooterSize(); res < s && s <= blobSize {
+			res = s
+		}
 	}
-	return buf.Bytes()
+	return
 }

-func parseFooter(p []byte) (tocOffset int64, footerSize int64, rErr error) {
-	var allErr []error
+func parseTOC(d Decompressor, sr *io.SectionReader, tocOff, tocSize int64, tocBytes []byte, opts openOpts) (*Reader, error) {
+	if len(tocBytes) > 0 {
+		start := time.Now()
+		toc, tocDgst, err := d.ParseTOC(bytes.NewReader(tocBytes))
+		if err == nil {
+			if opts.telemetry != nil && opts.telemetry.DeserializeTocLatency != nil {
+				opts.telemetry.DeserializeTocLatency(start)
+			}
+			return &Reader{
+				sr:           sr,
+				toc:          toc,
+				tocDigest:    tocDgst,
+				decompressor: d,
+			}, nil
+		}
+	}

-	tocOffset, err := parseEStargzFooter(p)
-	if err == nil {
-		return tocOffset, FooterSize, nil
+	start := time.Now()
+	tocBytes = make([]byte, tocSize)
+	if _, err := sr.ReadAt(tocBytes, tocOff); err != nil {
+		return nil, fmt.Errorf("error reading %d byte TOC targz: %v", len(tocBytes), err)
 	}
-	allErr = append(allErr, err)
-
-	pad := len(p) - legacyFooterSize
-	if pad < 0 {
-		pad = 0
+	if opts.telemetry != nil && opts.telemetry.GetTocLatency != nil {
+		opts.telemetry.GetTocLatency(start)
 	}
-	tocOffset, err = parseLegacyFooter(p[pad:])
-	if err == nil {
-		return tocOffset, legacyFooterSize, nil
-	}
-	return 0, 0, errorutil.Aggregate(append(allErr, err))
-}
-
-func parseEStargzFooter(p []byte) (tocOffset int64, err error) {
-	if len(p) != FooterSize {
-		return 0, fmt.Errorf("invalid length %d cannot be parsed", len(p))
-	}
-	zr, err := gzip.NewReader(bytes.NewReader(p))
+	start = time.Now()
+	toc, tocDgst, err := d.ParseTOC(bytes.NewReader(tocBytes))
 	if err != nil {
-		return 0, err
+		return nil, err
 	}
-	extra := zr.Header.Extra
-	si1, si2, subfieldlen, subfield := extra[0], extra[1], extra[2:4], extra[4:]
-	if si1 != 'S' || si2 != 'G' {
-		return 0, fmt.Errorf("invalid subfield IDs: %q, %q; want E, S", si1, si2)
+	if opts.telemetry != nil && opts.telemetry.DeserializeTocLatency != nil {
+		opts.telemetry.DeserializeTocLatency(start)
 	}
-	if slen := binary.LittleEndian.Uint16(subfieldlen); slen != uint16(16+len("STARGZ")) {
-		return 0, fmt.Errorf("invalid length of subfield %d; want %d", slen, 16+len("STARGZ"))
-	}
-	if string(subfield[16:]) != "STARGZ" {
-		return 0, fmt.Errorf("STARGZ magic string must be included in the footer subfield")
-	}
-	return strconv.ParseInt(string(subfield[:16]), 16, 64)
-}
-
-func parseLegacyFooter(p []byte) (tocOffset int64, err error) {
-	if len(p) != legacyFooterSize {
-		return 0, fmt.Errorf("legacy: invalid length %d cannot be parsed", len(p))
-	}
-	zr, err := gzip.NewReader(bytes.NewReader(p))
-	if err != nil {
-		return 0, errors.Wrapf(err, "legacy: failed to get footer gzip reader")
-	}
-	extra := zr.Header.Extra
-	if len(extra) != 16+len("STARGZ") {
-		return 0, fmt.Errorf("legacy: invalid stargz's extra field size")
-	}
-	if string(extra[16:]) != "STARGZ" {
-		return 0, fmt.Errorf("legacy: magic string STARGZ not found")
-	}
-	return strconv.ParseInt(string(extra[:16]), 16, 64)
+	return &Reader{
+		sr:           sr,
+		toc:          toc,
+		tocDigest:    tocDgst,
+		decompressor: d,
+	}, nil
 }

 func formatModtime(t time.Time) string {
@ -847,3 +924,10 @@ func isGzip(br *bufio.Reader) bool {
 	peek, _ := br.Peek(3)
 	return len(peek) >= 3 && peek[0] == gzipID1 && peek[1] == gzipID2 && peek[2] == gzipDeflate
 }
+
+func positive(n int64) int64 {
+	if n < 0 {
+		return 0
+	}
+	return n
+}
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/go.mod
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/go.mod
@ -3,7 +3,7 @@ module github.com/containerd/stargz-snapshotter/estargz
 go 1.16

 require (
-	github.com/klauspost/compress v1.12.3
+	github.com/klauspost/compress v1.13.5
 	github.com/opencontainers/go-digest v1.0.0
 	github.com/pkg/errors v0.9.1
 	golang.org/x/sync v0.0.0-20201207232520-09787c993a3a
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/go.sum
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/go.sum
@ -1,7 +1,5 @@
-github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
-github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/klauspost/compress v1.12.3 h1:G5AfA94pHPysR56qqrkO2pxEexdDzrpFJ6yt/VqWxVU=
-github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
+github.com/klauspost/compress v1.13.5 h1:9O69jUPDcsT9fEm74W92rZL9FQY7rCdaXVneq+yyzl4=
+github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/gzip.go
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/gzip.go
@ -0,0 +1,216 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+/*
+   Copyright 2019 The Go Authors. All rights reserved.
+   Use of this source code is governed by a BSD-style
+   license that can be found in the LICENSE file.
+*/
+
+package estargz
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"hash"
+	"io"
+	"strconv"
+
+	digest "github.com/opencontainers/go-digest"
+	"github.com/pkg/errors"
+)
+
+type gzipCompression struct {
+	*gzipCompressor
+	*GzipDecompressor
+}
+
+func newGzipCompressionWithLevel(level int) Compression {
+	return &gzipCompression{
+		&gzipCompressor{level},
+		&GzipDecompressor{},
+	}
+}
+
+func NewGzipCompressorWithLevel(level int) Compressor {
+	return &gzipCompressor{level}
+}
+
+type gzipCompressor struct {
+	compressionLevel int
+}
+
+func (gc *gzipCompressor) Writer(w io.Writer) (io.WriteCloser, error) {
+	return gzip.NewWriterLevel(w, gc.compressionLevel)
+}
+
+func (gc *gzipCompressor) WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (digest.Digest, error) {
+	tocJSON, err := json.MarshalIndent(toc, "", "\t")
+	if err != nil {
+		return "", err
+	}
+	gz, _ := gzip.NewWriterLevel(w, gc.compressionLevel)
+	gw := io.Writer(gz)
+	if diffHash != nil {
+		gw = io.MultiWriter(gz, diffHash)
+	}
+	tw := tar.NewWriter(gw)
+	if err := tw.WriteHeader(&tar.Header{
+		Typeflag: tar.TypeReg,
+		Name:     TOCTarName,
+		Size:     int64(len(tocJSON)),
+	}); err != nil {
+		return "", err
+	}
+	if _, err := tw.Write(tocJSON); err != nil {
+		return "", err
+	}
+
+	if err := tw.Close(); err != nil {
+		return "", err
+	}
+	if err := gz.Close(); err != nil {
+		return "", err
+	}
+	if _, err := w.Write(gzipFooterBytes(off)); err != nil {
+		return "", err
+	}
+	return digest.FromBytes(tocJSON), nil
+}
+
+// gzipFooterBytes returns the 51 bytes footer.
+func gzipFooterBytes(tocOff int64) []byte {
+	buf := bytes.NewBuffer(make([]byte, 0, FooterSize))
+	gz, _ := gzip.NewWriterLevel(buf, gzip.NoCompression) // MUST be NoCompression to keep 51 bytes
+
+	// Extra header indicating the offset of TOCJSON
+	// https://tools.ietf.org/html/rfc1952#section-2.3.1.1
+	header := make([]byte, 4)
+	header[0], header[1] = 'S', 'G'
+	subfield := fmt.Sprintf("%016xSTARGZ", tocOff)
+	binary.LittleEndian.PutUint16(header[2:4], uint16(len(subfield))) // little-endian per RFC1952
+	gz.Header.Extra = append(header, []byte(subfield)...)
+	gz.Close()
+	if buf.Len() != FooterSize {
+		panic(fmt.Sprintf("footer buffer = %d, not %d", buf.Len(), FooterSize))
+	}
+	return buf.Bytes()
+}
+
+type GzipDecompressor struct{}
+
+func (gz *GzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) {
+	return gzip.NewReader(r)
+}
+
+func (gz *GzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) {
+	return parseTOCEStargz(r)
+}
+
+func (gz *GzipDecompressor) ParseFooter(p []byte) (tocOffset, tocSize int64, err error) {
+	if len(p) != FooterSize {
+		return 0, 0, fmt.Errorf("invalid length %d cannot be parsed", len(p))
+	}
+	zr, err := gzip.NewReader(bytes.NewReader(p))
+	if err != nil {
+		return 0, 0, err
+	}
+	defer zr.Close()
+	extra := zr.Header.Extra
+	si1, si2, subfieldlen, subfield := extra[0], extra[1], extra[2:4], extra[4:]
+	if si1 != 'S' || si2 != 'G' {
+		return 0, 0, fmt.Errorf("invalid subfield IDs: %q, %q; want E, S", si1, si2)
+	}
+	if slen := binary.LittleEndian.Uint16(subfieldlen); slen != uint16(16+len("STARGZ")) {
+		return 0, 0, fmt.Errorf("invalid length of subfield %d; want %d", slen, 16+len("STARGZ"))
+	}
+	if string(subfield[16:]) != "STARGZ" {
+		return 0, 0, fmt.Errorf("STARGZ magic string must be included in the footer subfield")
+	}
+	tocOffset, err = strconv.ParseInt(string(subfield[:16]), 16, 64)
+	if err != nil {
+		return 0, 0, errors.Wrapf(err, "legacy: failed to parse toc offset")
+	}
+	return tocOffset, 0, nil
+}
+
+func (gz *GzipDecompressor) FooterSize() int64 {
+	return FooterSize
+}
+
+type legacyGzipDecompressor struct{}
+
+func (gz *legacyGzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) {
+	return gzip.NewReader(r)
+}
+
+func (gz *legacyGzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) {
+	return parseTOCEStargz(r)
+}
+
+func (gz *legacyGzipDecompressor) ParseFooter(p []byte) (tocOffset, tocSize int64, err error) {
+	if len(p) != legacyFooterSize {
+		return 0, 0, fmt.Errorf("legacy: invalid length %d cannot be parsed", len(p))
+	}
+	zr, err := gzip.NewReader(bytes.NewReader(p))
+	if err != nil {
+		return 0, 0, errors.Wrapf(err, "legacy: failed to get footer gzip reader")
+	}
+	defer zr.Close()
+	extra := zr.Header.Extra
+	if len(extra) != 16+len("STARGZ") {
+		return 0, 0, fmt.Errorf("legacy: invalid stargz's extra field size")
+	}
+	if string(extra[16:]) != "STARGZ" {
+		return 0, 0, fmt.Errorf("legacy: magic string STARGZ not found")
+	}
+	tocOffset, err = strconv.ParseInt(string(extra[:16]), 16, 64)
+	if err != nil {
+		return 0, 0, errors.Wrapf(err, "legacy: failed to parse toc offset")
+	}
+	return tocOffset, 0, nil
+}
+
+func (gz *legacyGzipDecompressor) FooterSize() int64 {
+	return legacyFooterSize
+}
+
+func parseTOCEStargz(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) {
+	zr, err := gzip.NewReader(r)
+	if err != nil {
+		return nil, "", fmt.Errorf("malformed TOC gzip header: %v", err)
+	}
+	defer zr.Close()
+	zr.Multistream(false)
+	tr := tar.NewReader(zr)
+	h, err := tr.Next()
+	if err != nil {
+		return nil, "", fmt.Errorf("failed to find tar header in TOC gzip stream: %v", err)
+	}
+	if h.Name != TOCTarName {
+		return nil, "", fmt.Errorf("TOC tar entry had name %q; expected %q", h.Name, TOCTarName)
+	}
+	dgstr := digest.Canonical.Digester()
+	toc = new(JTOC)
+	if err := json.NewDecoder(io.TeeReader(tr, dgstr.Hash())).Decode(&toc); err != nil {
+		return nil, "", fmt.Errorf("error decoding TOC JSON: %v", err)
+	}
+	return toc, dgstr.Digest(), nil
+}
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/types.go
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/types.go
@ -24,6 +24,8 @@ package estargz

 import (
 	"archive/tar"
+	"hash"
+	"io"
 	"os"
 	"path"
 	"time"
@ -90,8 +92,8 @@ const (
 	landmarkContents = 0xf
 )

-// jtoc is the JSON-serialized table of contents index of the files in the stargz file.
-type jtoc struct {
+// JTOC is the JSON-serialized table of contents index of the files in the stargz file.
+type JTOC struct {
 	Version int         `json:"version"`
 	Entries []*TOCEntry `json:"entries"`
 }
@ -262,3 +264,51 @@ type TOCEntryVerifier interface {
 	// contents of the specified TOCEntry.
 	Verifier(ce *TOCEntry) (digest.Verifier, error)
 }
+
+// Compression provides the compression helper to be used creating and parsing eStargz.
+// This package provides gzip-based Compression by default, but any compression
+// algorithm (e.g. zstd) can be used as long as it implements Compression.
+type Compression interface {
+	Compressor
+	Decompressor
+}
+
+// Compressor represents the helper mothods to be used for creating eStargz.
+type Compressor interface {
+	// Writer returns WriteCloser to be used for writing a chunk to eStargz.
+	// Everytime a chunk is written, the WriteCloser is closed and Writer is
+	// called again for writing the next chunk.
+	Writer(w io.Writer) (io.WriteCloser, error)
+
+	// WriteTOCAndFooter is called to write JTOC to the passed Writer.
+	// diffHash calculates the DiffID (uncompressed sha256 hash) of the blob
+	// WriteTOCAndFooter can optionally write anything that affects DiffID calculation
+	// (e.g. uncompressed TOC JSON).
+	//
+	// This function returns tocDgst that represents the digest of TOC that will be used
+	// to verify this blob when it's parsed.
+	WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (tocDgst digest.Digest, err error)
+}
+
+// Deompressor represents the helper mothods to be used for parsing eStargz.
+type Decompressor interface {
+	// Reader returns ReadCloser to be used for decompressing file payload.
+	Reader(r io.Reader) (io.ReadCloser, error)
+
+	// FooterSize returns the size of the footer of this blob.
+	FooterSize() int64
+
+	// ParseFooter parses the footer and returns the offset and (compressed) size of TOC.
+	//
+	// Here, tocSize is optional. If tocSize <= 0, it's by default the size of the range
+	// from tocOffset until the beginning of the footer (blob size - tocOff - FooterSize).
+	ParseFooter(p []byte) (tocOffset, tocSize int64, err error)
+
+	// ParseTOC parses TOC from the passed reader. The reader provides the partial contents
+	// of the underlying blob that has the range specified by ParseFooter method.
+	//
+	// This function returns tocDgst that represents the digest of TOC that will be used
+	// to verify this blob. This must match to the value returned from
+	// Compressor.WriteTOCAndFooter that is used when creating this blob.
+	ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)
+}
--- a/vendor/github.com/golang/snappy/.gitignore
+++ b/vendor/github.com/golang/snappy/.gitignore
@ -1,16 +0,0 @@
-cmd/snappytool/snappytool
-testdata/bench
-
-# These explicitly listed benchmark data files are for an obsolete version of
-# snappy_test.go.
-testdata/alice29.txt
-testdata/asyoulik.txt
-testdata/fireworks.jpeg
-testdata/geo.protodata
-testdata/html
-testdata/html_x_4
-testdata/kppkn.gtb
-testdata/lcet10.txt
-testdata/paper-100k.pdf
-testdata/plrabn12.txt
-testdata/urls.10K
--- a/vendor/github.com/golang/snappy/AUTHORS
+++ b/vendor/github.com/golang/snappy/AUTHORS
@ -1,18 +0,0 @@
-# This is the official list of Snappy-Go authors for copyright purposes.
-# This file is distinct from the CONTRIBUTORS files.
-# See the latter for an explanation.
-
-# Names should be added to this file as
-#	Name or Organization <email address>
-# The email address is not required for organizations.
-
-# Please keep the list sorted.
-
-Amazon.com, Inc
-Damian Gryski <dgryski@gmail.com>
-Eric Buth <eric@topos.com>
-Google Inc.
-Jan Mercl <0xjnml@gmail.com>
-Klaus Post <klauspost@gmail.com>
-Rodolfo Carvalho <rhcarvalho@gmail.com>
-Sebastien Binet <seb.binet@gmail.com>
--- a/vendor/github.com/golang/snappy/CONTRIBUTORS
+++ b/vendor/github.com/golang/snappy/CONTRIBUTORS
@ -1,41 +0,0 @@
-# This is the official list of people who can contribute
-# (and typically have contributed) code to the Snappy-Go repository.
-# The AUTHORS file lists the copyright holders; this file
-# lists people.  For example, Google employees are listed here
-# but not in AUTHORS, because Google holds the copyright.
-#
-# The submission process automatically checks to make sure
-# that people submitting code are listed in this file (by email address).
-#
-# Names should be added to this file only after verifying that
-# the individual or the individual's organization has agreed to
-# the appropriate Contributor License Agreement, found here:
-#
-#     http://code.google.com/legal/individual-cla-v1.0.html
-#     http://code.google.com/legal/corporate-cla-v1.0.html
-#
-# The agreement for individuals can be filled out on the web.
-#
-# When adding J Random Contributor's name to this file,
-# either J's name or J's organization's name should be
-# added to the AUTHORS file, depending on whether the
-# individual or corporate CLA was used.
-
-# Names should be added to this file like so:
-#     Name <email address>
-
-# Please keep the list sorted.
-
-Alex Legg <alexlegg@google.com>
-Damian Gryski <dgryski@gmail.com>
-Eric Buth <eric@topos.com>
-Jan Mercl <0xjnml@gmail.com>
-Jonathan Swinney <jswinney@amazon.com>
-Kai Backman <kaib@golang.org>
-Klaus Post <klauspost@gmail.com>
-Marc-Antoine Ruel <maruel@chromium.org>
-Nigel Tao <nigeltao@golang.org>
-Rob Pike <r@golang.org>
-Rodolfo Carvalho <rhcarvalho@gmail.com>
-Russ Cox <rsc@golang.org>
-Sebastien Binet <seb.binet@gmail.com>
--- a/vendor/github.com/golang/snappy/README
+++ b/vendor/github.com/golang/snappy/README
@ -1,107 +0,0 @@
-The Snappy compression format in the Go programming language.
-
-To download and install from source:
-$ go get github.com/golang/snappy
-
-Unless otherwise noted, the Snappy-Go source files are distributed
-under the BSD-style license found in the LICENSE file.
-
-
-
-Benchmarks.
-
-The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
-or so files, the same set used by the C++ Snappy code (github.com/google/snappy
-and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
-3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
-
-"go test -test.bench=."
-
-_UFlat0-8         2.19GB/s ± 0%  html
-_UFlat1-8         1.41GB/s ± 0%  urls
-_UFlat2-8         23.5GB/s ± 2%  jpg
-_UFlat3-8         1.91GB/s ± 0%  jpg_200
-_UFlat4-8         14.0GB/s ± 1%  pdf
-_UFlat5-8         1.97GB/s ± 0%  html4
-_UFlat6-8          814MB/s ± 0%  txt1
-_UFlat7-8          785MB/s ± 0%  txt2
-_UFlat8-8          857MB/s ± 0%  txt3
-_UFlat9-8          719MB/s ± 1%  txt4
-_UFlat10-8        2.84GB/s ± 0%  pb
-_UFlat11-8        1.05GB/s ± 0%  gaviota
-
-_ZFlat0-8         1.04GB/s ± 0%  html
-_ZFlat1-8          534MB/s ± 0%  urls
-_ZFlat2-8         15.7GB/s ± 1%  jpg
-_ZFlat3-8          740MB/s ± 3%  jpg_200
-_ZFlat4-8         9.20GB/s ± 1%  pdf
-_ZFlat5-8          991MB/s ± 0%  html4
-_ZFlat6-8          379MB/s ± 0%  txt1
-_ZFlat7-8          352MB/s ± 0%  txt2
-_ZFlat8-8          396MB/s ± 1%  txt3
-_ZFlat9-8          327MB/s ± 1%  txt4
-_ZFlat10-8        1.33GB/s ± 1%  pb
-_ZFlat11-8         605MB/s ± 1%  gaviota
-
-
-
-"go test -test.bench=. -tags=noasm"
-
-_UFlat0-8          621MB/s ± 2%  html
-_UFlat1-8          494MB/s ± 1%  urls
-_UFlat2-8         23.2GB/s ± 1%  jpg
-_UFlat3-8         1.12GB/s ± 1%  jpg_200
-_UFlat4-8         4.35GB/s ± 1%  pdf
-_UFlat5-8          609MB/s ± 0%  html4
-_UFlat6-8          296MB/s ± 0%  txt1
-_UFlat7-8          288MB/s ± 0%  txt2
-_UFlat8-8          309MB/s ± 1%  txt3
-_UFlat9-8          280MB/s ± 1%  txt4
-_UFlat10-8         753MB/s ± 0%  pb
-_UFlat11-8         400MB/s ± 0%  gaviota
-
-_ZFlat0-8          409MB/s ± 1%  html
-_ZFlat1-8          250MB/s ± 1%  urls
-_ZFlat2-8         12.3GB/s ± 1%  jpg
-_ZFlat3-8          132MB/s ± 0%  jpg_200
-_ZFlat4-8         2.92GB/s ± 0%  pdf
-_ZFlat5-8          405MB/s ± 1%  html4
-_ZFlat6-8          179MB/s ± 1%  txt1
-_ZFlat7-8          170MB/s ± 1%  txt2
-_ZFlat8-8          189MB/s ± 1%  txt3
-_ZFlat9-8          164MB/s ± 1%  txt4
-_ZFlat10-8         479MB/s ± 1%  pb
-_ZFlat11-8         270MB/s ± 1%  gaviota
-
-
-
-For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
-are the numbers from C++ Snappy's
-
-make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
-
-BM_UFlat/0     2.4GB/s  html
-BM_UFlat/1     1.4GB/s  urls
-BM_UFlat/2    21.8GB/s  jpg
-BM_UFlat/3     1.5GB/s  jpg_200
-BM_UFlat/4    13.3GB/s  pdf
-BM_UFlat/5     2.1GB/s  html4
-BM_UFlat/6     1.0GB/s  txt1
-BM_UFlat/7   959.4MB/s  txt2
-BM_UFlat/8     1.0GB/s  txt3
-BM_UFlat/9   864.5MB/s  txt4
-BM_UFlat/10    2.9GB/s  pb
-BM_UFlat/11    1.2GB/s  gaviota
-
-BM_ZFlat/0   944.3MB/s  html (22.31 %)
-BM_ZFlat/1   501.6MB/s  urls (47.78 %)
-BM_ZFlat/2    14.3GB/s  jpg (99.95 %)
-BM_ZFlat/3   538.3MB/s  jpg_200 (73.00 %)
-BM_ZFlat/4     8.3GB/s  pdf (83.30 %)
-BM_ZFlat/5   903.5MB/s  html4 (22.52 %)
-BM_ZFlat/6   336.0MB/s  txt1 (57.88 %)
-BM_ZFlat/7   312.3MB/s  txt2 (61.91 %)
-BM_ZFlat/8   353.1MB/s  txt3 (54.99 %)
-BM_ZFlat/9   289.9MB/s  txt4 (66.26 %)
-BM_ZFlat/10    1.2GB/s  pb (19.68 %)
-BM_ZFlat/11  527.4MB/s  gaviota (37.72 %)
--- a/vendor/github.com/golang/snappy/decode_amd64.s
+++ b/vendor/github.com/golang/snappy/decode_amd64.s
@ -1,490 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-// The asm code generally follows the pure Go code in decode_other.go, except
-// where marked with a "!!!".
-
-// func decode(dst, src []byte) int
-//
-// All local variables fit into registers. The non-zero stack size is only to
-// spill registers and push args when issuing a CALL. The register allocation:
-//	- AX	scratch
-//	- BX	scratch
-//	- CX	length or x
-//	- DX	offset
-//	- SI	&src[s]
-//	- DI	&dst[d]
-//	+ R8	dst_base
-//	+ R9	dst_len
-//	+ R10	dst_base + dst_len
-//	+ R11	src_base
-//	+ R12	src_len
-//	+ R13	src_base + src_len
-//	- R14	used by doCopy
-//	- R15	used by doCopy
-//
-// The registers R8-R13 (marked with a "+") are set at the start of the
-// function, and after a CALL returns, and are not otherwise modified.
-//
-// The d variable is implicitly DI - R8,  and len(dst)-d is R10 - DI.
-// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
-TEXT ·decode(SB), NOSPLIT, $48-56
-	// Initialize SI, DI and R8-R13.
-	MOVQ dst_base+0(FP), R8
-	MOVQ dst_len+8(FP), R9
-	MOVQ R8, DI
-	MOVQ R8, R10
-	ADDQ R9, R10
-	MOVQ src_base+24(FP), R11
-	MOVQ src_len+32(FP), R12
-	MOVQ R11, SI
-	MOVQ R11, R13
-	ADDQ R12, R13
-
-loop:
-	// for s < len(src)
-	CMPQ SI, R13
-	JEQ  end
-
-	// CX = uint32(src[s])
-	//
-	// switch src[s] & 0x03
-	MOVBLZX (SI), CX
-	MOVL    CX, BX
-	ANDL    $3, BX
-	CMPL    BX, $1
-	JAE     tagCopy
-
-	// ----------------------------------------
-	// The code below handles literal tags.
-
-	// case tagLiteral:
-	// x := uint32(src[s] >> 2)
-	// switch
-	SHRL $2, CX
-	CMPL CX, $60
-	JAE  tagLit60Plus
-
-	// case x < 60:
-	// s++
-	INCQ SI
-
-doLit:
-	// This is the end of the inner "switch", when we have a literal tag.
-	//
-	// We assume that CX == x and x fits in a uint32, where x is the variable
-	// used in the pure Go decode_other.go code.
-
-	// length = int(x) + 1
-	//
-	// Unlike the pure Go code, we don't need to check if length <= 0 because
-	// CX can hold 64 bits, so the increment cannot overflow.
-	INCQ CX
-
-	// Prepare to check if copying length bytes will run past the end of dst or
-	// src.
-	//
-	// AX = len(dst) - d
-	// BX = len(src) - s
-	MOVQ R10, AX
-	SUBQ DI, AX
-	MOVQ R13, BX
-	SUBQ SI, BX
-
-	// !!! Try a faster technique for short (16 or fewer bytes) copies.
-	//
-	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
-	//   goto callMemmove // Fall back on calling runtime·memmove.
-	// }
-	//
-	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
-	// against 21 instead of 16, because it cannot assume that all of its input
-	// is contiguous in memory and so it needs to leave enough source bytes to
-	// read the next tag without refilling buffers, but Go's Decode assumes
-	// contiguousness (the src argument is a []byte).
-	CMPQ CX, $16
-	JGT  callMemmove
-	CMPQ AX, $16
-	JLT  callMemmove
-	CMPQ BX, $16
-	JLT  callMemmove
-
-	// !!! Implement the copy from src to dst as a 16-byte load and store.
-	// (Decode's documentation says that dst and src must not overlap.)
-	//
-	// This always copies 16 bytes, instead of only length bytes, but that's
-	// OK. If the input is a valid Snappy encoding then subsequent iterations
-	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
-	// non-nil error), so the overrun will be ignored.
-	//
-	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
-	// 16-byte loads and stores. This technique probably wouldn't be as
-	// effective on architectures that are fussier about alignment.
-	MOVOU 0(SI), X0
-	MOVOU X0, 0(DI)
-
-	// d += length
-	// s += length
-	ADDQ CX, DI
-	ADDQ CX, SI
-	JMP  loop
-
-callMemmove:
-	// if length > len(dst)-d || length > len(src)-s { etc }
-	CMPQ CX, AX
-	JGT  errCorrupt
-	CMPQ CX, BX
-	JGT  errCorrupt
-
-	// copy(dst[d:], src[s:s+length])
-	//
-	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
-	// DI, SI and CX as arguments. Coincidentally, we also need to spill those
-	// three registers to the stack, to save local variables across the CALL.
-	MOVQ DI, 0(SP)
-	MOVQ SI, 8(SP)
-	MOVQ CX, 16(SP)
-	MOVQ DI, 24(SP)
-	MOVQ SI, 32(SP)
-	MOVQ CX, 40(SP)
-	CALL runtime·memmove(SB)
-
-	// Restore local variables: unspill registers from the stack and
-	// re-calculate R8-R13.
-	MOVQ 24(SP), DI
-	MOVQ 32(SP), SI
-	MOVQ 40(SP), CX
-	MOVQ dst_base+0(FP), R8
-	MOVQ dst_len+8(FP), R9
-	MOVQ R8, R10
-	ADDQ R9, R10
-	MOVQ src_base+24(FP), R11
-	MOVQ src_len+32(FP), R12
-	MOVQ R11, R13
-	ADDQ R12, R13
-
-	// d += length
-	// s += length
-	ADDQ CX, DI
-	ADDQ CX, SI
-	JMP  loop
-
-tagLit60Plus:
-	// !!! This fragment does the
-	//
-	// s += x - 58; if uint(s) > uint(len(src)) { etc }
-	//
-	// checks. In the asm version, we code it once instead of once per switch case.
-	ADDQ CX, SI
-	SUBQ $58, SI
-	MOVQ SI, BX
-	SUBQ R11, BX
-	CMPQ BX, R12
-	JA   errCorrupt
-
-	// case x == 60:
-	CMPL CX, $61
-	JEQ  tagLit61
-	JA   tagLit62Plus
-
-	// x = uint32(src[s-1])
-	MOVBLZX -1(SI), CX
-	JMP     doLit
-
-tagLit61:
-	// case x == 61:
-	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
-	MOVWLZX -2(SI), CX
-	JMP     doLit
-
-tagLit62Plus:
-	CMPL CX, $62
-	JA   tagLit63
-
-	// case x == 62:
-	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
-	MOVWLZX -3(SI), CX
-	MOVBLZX -1(SI), BX
-	SHLL    $16, BX
-	ORL     BX, CX
-	JMP     doLit
-
-tagLit63:
-	// case x == 63:
-	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
-	MOVL -4(SI), CX
-	JMP  doLit
-
-// The code above handles literal tags.
-// ----------------------------------------
-// The code below handles copy tags.
-
-tagCopy4:
-	// case tagCopy4:
-	// s += 5
-	ADDQ $5, SI
-
-	// if uint(s) > uint(len(src)) { etc }
-	MOVQ SI, BX
-	SUBQ R11, BX
-	CMPQ BX, R12
-	JA   errCorrupt
-
-	// length = 1 + int(src[s-5])>>2
-	SHRQ $2, CX
-	INCQ CX
-
-	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
-	MOVLQZX -4(SI), DX
-	JMP     doCopy
-
-tagCopy2:
-	// case tagCopy2:
-	// s += 3
-	ADDQ $3, SI
-
-	// if uint(s) > uint(len(src)) { etc }
-	MOVQ SI, BX
-	SUBQ R11, BX
-	CMPQ BX, R12
-	JA   errCorrupt
-
-	// length = 1 + int(src[s-3])>>2
-	SHRQ $2, CX
-	INCQ CX
-
-	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-	MOVWQZX -2(SI), DX
-	JMP     doCopy
-
-tagCopy:
-	// We have a copy tag. We assume that:
-	//	- BX == src[s] & 0x03
-	//	- CX == src[s]
-	CMPQ BX, $2
-	JEQ  tagCopy2
-	JA   tagCopy4
-
-	// case tagCopy1:
-	// s += 2
-	ADDQ $2, SI
-
-	// if uint(s) > uint(len(src)) { etc }
-	MOVQ SI, BX
-	SUBQ R11, BX
-	CMPQ BX, R12
-	JA   errCorrupt
-
-	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-	MOVQ    CX, DX
-	ANDQ    $0xe0, DX
-	SHLQ    $3, DX
-	MOVBQZX -1(SI), BX
-	ORQ     BX, DX
-
-	// length = 4 + int(src[s-2])>>2&0x7
-	SHRQ $2, CX
-	ANDQ $7, CX
-	ADDQ $4, CX
-
-doCopy:
-	// This is the end of the outer "switch", when we have a copy tag.
-	//
-	// We assume that:
-	//	- CX == length && CX > 0
-	//	- DX == offset
-
-	// if offset <= 0 { etc }
-	CMPQ DX, $0
-	JLE  errCorrupt
-
-	// if d < offset { etc }
-	MOVQ DI, BX
-	SUBQ R8, BX
-	CMPQ BX, DX
-	JLT  errCorrupt
-
-	// if length > len(dst)-d { etc }
-	MOVQ R10, BX
-	SUBQ DI, BX
-	CMPQ CX, BX
-	JGT  errCorrupt
-
-	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
-	//
-	// Set:
-	//	- R14 = len(dst)-d
-	//	- R15 = &dst[d-offset]
-	MOVQ R10, R14
-	SUBQ DI, R14
-	MOVQ DI, R15
-	SUBQ DX, R15
-
-	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
-	//
-	// First, try using two 8-byte load/stores, similar to the doLit technique
-	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
-	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
-	// and not one 16-byte load/store, and the first store has to be before the
-	// second load, due to the overlap if offset is in the range [8, 16).
-	//
-	// if length > 16 || offset < 8 || len(dst)-d < 16 {
-	//   goto slowForwardCopy
-	// }
-	// copy 16 bytes
-	// d += length
-	CMPQ CX, $16
-	JGT  slowForwardCopy
-	CMPQ DX, $8
-	JLT  slowForwardCopy
-	CMPQ R14, $16
-	JLT  slowForwardCopy
-	MOVQ 0(R15), AX
-	MOVQ AX, 0(DI)
-	MOVQ 8(R15), BX
-	MOVQ BX, 8(DI)
-	ADDQ CX, DI
-	JMP  loop
-
-slowForwardCopy:
-	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
-	// can still try 8-byte load stores, provided we can overrun up to 10 extra
-	// bytes. As above, the overrun will be fixed up by subsequent iterations
-	// of the outermost loop.
-	//
-	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
-	// commentary says:
-	//
-	// ----
-	//
-	// The main part of this loop is a simple copy of eight bytes at a time
-	// until we've copied (at least) the requested amount of bytes.  However,
-	// if d and d-offset are less than eight bytes apart (indicating a
-	// repeating pattern of length < 8), we first need to expand the pattern in
-	// order to get the correct results. For instance, if the buffer looks like
-	// this, with the eight-byte <d-offset> and <d> patterns marked as
-	// intervals:
-	//
-	//    abxxxxxxxxxxxx
-	//    [------]           d-offset
-	//      [------]         d
-	//
-	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
-	// once, after which we can move <d> two bytes without moving <d-offset>:
-	//
-	//    ababxxxxxxxxxx
-	//    [------]           d-offset
-	//        [------]       d
-	//
-	// and repeat the exercise until the two no longer overlap.
-	//
-	// This allows us to do very well in the special case of one single byte
-	// repeated many times, without taking a big hit for more general cases.
-	//
-	// The worst case of extra writing past the end of the match occurs when
-	// offset == 1 and length == 1; the last copy will read from byte positions
-	// [0..7] and write to [4..11], whereas it was only supposed to write to
-	// position 1. Thus, ten excess bytes.
-	//
-	// ----
-	//
-	// That "10 byte overrun" worst case is confirmed by Go's
-	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
-	// and finishSlowForwardCopy algorithm.
-	//
-	// if length > len(dst)-d-10 {
-	//   goto verySlowForwardCopy
-	// }
-	SUBQ $10, R14
-	CMPQ CX, R14
-	JGT  verySlowForwardCopy
-
-makeOffsetAtLeast8:
-	// !!! As above, expand the pattern so that offset >= 8 and we can use
-	// 8-byte load/stores.
-	//
-	// for offset < 8 {
-	//   copy 8 bytes from dst[d-offset:] to dst[d:]
-	//   length -= offset
-	//   d      += offset
-	//   offset += offset
-	//   // The two previous lines together means that d-offset, and therefore
-	//   // R15, is unchanged.
-	// }
-	CMPQ DX, $8
-	JGE  fixUpSlowForwardCopy
-	MOVQ (R15), BX
-	MOVQ BX, (DI)
-	SUBQ DX, CX
-	ADDQ DX, DI
-	ADDQ DX, DX
-	JMP  makeOffsetAtLeast8
-
-fixUpSlowForwardCopy:
-	// !!! Add length (which might be negative now) to d (implied by DI being
-	// &dst[d]) so that d ends up at the right place when we jump back to the
-	// top of the loop. Before we do that, though, we save DI to AX so that, if
-	// length is positive, copying the remaining length bytes will write to the
-	// right place.
-	MOVQ DI, AX
-	ADDQ CX, DI
-
-finishSlowForwardCopy:
-	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
-	// length means that we overrun, but as above, that will be fixed up by
-	// subsequent iterations of the outermost loop.
-	CMPQ CX, $0
-	JLE  loop
-	MOVQ (R15), BX
-	MOVQ BX, (AX)
-	ADDQ $8, R15
-	ADDQ $8, AX
-	SUBQ $8, CX
-	JMP  finishSlowForwardCopy
-
-verySlowForwardCopy:
-	// verySlowForwardCopy is a simple implementation of forward copy. In C
-	// parlance, this is a do/while loop instead of a while loop, since we know
-	// that length > 0. In Go syntax:
-	//
-	// for {
-	//   dst[d] = dst[d - offset]
-	//   d++
-	//   length--
-	//   if length == 0 {
-	//     break
-	//   }
-	// }
-	MOVB (R15), BX
-	MOVB BX, (DI)
-	INCQ R15
-	INCQ DI
-	DECQ CX
-	JNZ  verySlowForwardCopy
-	JMP  loop
-
-// The code above handles copy tags.
-// ----------------------------------------
-
-end:
-	// This is the end of the "for s < len(src)".
-	//
-	// if d != len(dst) { etc }
-	CMPQ DI, R10
-	JNE  errCorrupt
-
-	// return 0
-	MOVQ $0, ret+48(FP)
-	RET
-
-errCorrupt:
-	// return decodeErrCodeCorrupt
-	MOVQ $1, ret+48(FP)
-	RET
--- a/vendor/github.com/golang/snappy/decode_arm64.s
+++ b/vendor/github.com/golang/snappy/decode_arm64.s
@ -1,494 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-// The asm code generally follows the pure Go code in decode_other.go, except
-// where marked with a "!!!".
-
-// func decode(dst, src []byte) int
-//
-// All local variables fit into registers. The non-zero stack size is only to
-// spill registers and push args when issuing a CALL. The register allocation:
-//	- R2	scratch
-//	- R3	scratch
-//	- R4	length or x
-//	- R5	offset
-//	- R6	&src[s]
-//	- R7	&dst[d]
-//	+ R8	dst_base
-//	+ R9	dst_len
-//	+ R10	dst_base + dst_len
-//	+ R11	src_base
-//	+ R12	src_len
-//	+ R13	src_base + src_len
-//	- R14	used by doCopy
-//	- R15	used by doCopy
-//
-// The registers R8-R13 (marked with a "+") are set at the start of the
-// function, and after a CALL returns, and are not otherwise modified.
-//
-// The d variable is implicitly R7 - R8,  and len(dst)-d is R10 - R7.
-// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6.
-TEXT ·decode(SB), NOSPLIT, $56-56
-	// Initialize R6, R7 and R8-R13.
-	MOVD dst_base+0(FP), R8
-	MOVD dst_len+8(FP), R9
-	MOVD R8, R7
-	MOVD R8, R10
-	ADD  R9, R10, R10
-	MOVD src_base+24(FP), R11
-	MOVD src_len+32(FP), R12
-	MOVD R11, R6
-	MOVD R11, R13
-	ADD  R12, R13, R13
-
-loop:
-	// for s < len(src)
-	CMP R13, R6
-	BEQ end
-
-	// R4 = uint32(src[s])
-	//
-	// switch src[s] & 0x03
-	MOVBU (R6), R4
-	MOVW  R4, R3
-	ANDW  $3, R3
-	MOVW  $1, R1
-	CMPW  R1, R3
-	BGE   tagCopy
-
-	// ----------------------------------------
-	// The code below handles literal tags.
-
-	// case tagLiteral:
-	// x := uint32(src[s] >> 2)
-	// switch
-	MOVW $60, R1
-	LSRW $2, R4, R4
-	CMPW R4, R1
-	BLS  tagLit60Plus
-
-	// case x < 60:
-	// s++
-	ADD $1, R6, R6
-
-doLit:
-	// This is the end of the inner "switch", when we have a literal tag.
-	//
-	// We assume that R4 == x and x fits in a uint32, where x is the variable
-	// used in the pure Go decode_other.go code.
-
-	// length = int(x) + 1
-	//
-	// Unlike the pure Go code, we don't need to check if length <= 0 because
-	// R4 can hold 64 bits, so the increment cannot overflow.
-	ADD $1, R4, R4
-
-	// Prepare to check if copying length bytes will run past the end of dst or
-	// src.
-	//
-	// R2 = len(dst) - d
-	// R3 = len(src) - s
-	MOVD R10, R2
-	SUB  R7, R2, R2
-	MOVD R13, R3
-	SUB  R6, R3, R3
-
-	// !!! Try a faster technique for short (16 or fewer bytes) copies.
-	//
-	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
-	//   goto callMemmove // Fall back on calling runtime·memmove.
-	// }
-	//
-	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
-	// against 21 instead of 16, because it cannot assume that all of its input
-	// is contiguous in memory and so it needs to leave enough source bytes to
-	// read the next tag without refilling buffers, but Go's Decode assumes
-	// contiguousness (the src argument is a []byte).
-	CMP $16, R4
-	BGT callMemmove
-	CMP $16, R2
-	BLT callMemmove
-	CMP $16, R3
-	BLT callMemmove
-
-	// !!! Implement the copy from src to dst as a 16-byte load and store.
-	// (Decode's documentation says that dst and src must not overlap.)
-	//
-	// This always copies 16 bytes, instead of only length bytes, but that's
-	// OK. If the input is a valid Snappy encoding then subsequent iterations
-	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
-	// non-nil error), so the overrun will be ignored.
-	//
-	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
-	// 16-byte loads and stores. This technique probably wouldn't be as
-	// effective on architectures that are fussier about alignment.
-	LDP 0(R6), (R14, R15)
-	STP (R14, R15), 0(R7)
-
-	// d += length
-	// s += length
-	ADD R4, R7, R7
-	ADD R4, R6, R6
-	B   loop
-
-callMemmove:
-	// if length > len(dst)-d || length > len(src)-s { etc }
-	CMP R2, R4
-	BGT errCorrupt
-	CMP R3, R4
-	BGT errCorrupt
-
-	// copy(dst[d:], src[s:s+length])
-	//
-	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
-	// R7, R6 and R4 as arguments. Coincidentally, we also need to spill those
-	// three registers to the stack, to save local variables across the CALL.
-	MOVD R7, 8(RSP)
-	MOVD R6, 16(RSP)
-	MOVD R4, 24(RSP)
-	MOVD R7, 32(RSP)
-	MOVD R6, 40(RSP)
-	MOVD R4, 48(RSP)
-	CALL runtime·memmove(SB)
-
-	// Restore local variables: unspill registers from the stack and
-	// re-calculate R8-R13.
-	MOVD 32(RSP), R7
-	MOVD 40(RSP), R6
-	MOVD 48(RSP), R4
-	MOVD dst_base+0(FP), R8
-	MOVD dst_len+8(FP), R9
-	MOVD R8, R10
-	ADD  R9, R10, R10
-	MOVD src_base+24(FP), R11
-	MOVD src_len+32(FP), R12
-	MOVD R11, R13
-	ADD  R12, R13, R13
-
-	// d += length
-	// s += length
-	ADD R4, R7, R7
-	ADD R4, R6, R6
-	B   loop
-
-tagLit60Plus:
-	// !!! This fragment does the
-	//
-	// s += x - 58; if uint(s) > uint(len(src)) { etc }
-	//
-	// checks. In the asm version, we code it once instead of once per switch case.
-	ADD  R4, R6, R6
-	SUB  $58, R6, R6
-	MOVD R6, R3
-	SUB  R11, R3, R3
-	CMP  R12, R3
-	BGT  errCorrupt
-
-	// case x == 60:
-	MOVW $61, R1
-	CMPW R1, R4
-	BEQ  tagLit61
-	BGT  tagLit62Plus
-
-	// x = uint32(src[s-1])
-	MOVBU -1(R6), R4
-	B     doLit
-
-tagLit61:
-	// case x == 61:
-	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
-	MOVHU -2(R6), R4
-	B     doLit
-
-tagLit62Plus:
-	CMPW $62, R4
-	BHI  tagLit63
-
-	// case x == 62:
-	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
-	MOVHU -3(R6), R4
-	MOVBU -1(R6), R3
-	ORR   R3<<16, R4
-	B     doLit
-
-tagLit63:
-	// case x == 63:
-	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
-	MOVWU -4(R6), R4
-	B     doLit
-
-	// The code above handles literal tags.
-	// ----------------------------------------
-	// The code below handles copy tags.
-
-tagCopy4:
-	// case tagCopy4:
-	// s += 5
-	ADD $5, R6, R6
-
-	// if uint(s) > uint(len(src)) { etc }
-	MOVD R6, R3
-	SUB  R11, R3, R3
-	CMP  R12, R3
-	BGT  errCorrupt
-
-	// length = 1 + int(src[s-5])>>2
-	MOVD $1, R1
-	ADD  R4>>2, R1, R4
-
-	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
-	MOVWU -4(R6), R5
-	B     doCopy
-
-tagCopy2:
-	// case tagCopy2:
-	// s += 3
-	ADD $3, R6, R6
-
-	// if uint(s) > uint(len(src)) { etc }
-	MOVD R6, R3
-	SUB  R11, R3, R3
-	CMP  R12, R3
-	BGT  errCorrupt
-
-	// length = 1 + int(src[s-3])>>2
-	MOVD $1, R1
-	ADD  R4>>2, R1, R4
-
-	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-	MOVHU -2(R6), R5
-	B     doCopy
-
-tagCopy:
-	// We have a copy tag. We assume that:
-	//	- R3 == src[s] & 0x03
-	//	- R4 == src[s]
-	CMP $2, R3
-	BEQ tagCopy2
-	BGT tagCopy4
-
-	// case tagCopy1:
-	// s += 2
-	ADD $2, R6, R6
-
-	// if uint(s) > uint(len(src)) { etc }
-	MOVD R6, R3
-	SUB  R11, R3, R3
-	CMP  R12, R3
-	BGT  errCorrupt
-
-	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-	MOVD  R4, R5
-	AND   $0xe0, R5
-	MOVBU -1(R6), R3
-	ORR   R5<<3, R3, R5
-
-	// length = 4 + int(src[s-2])>>2&0x7
-	MOVD $7, R1
-	AND  R4>>2, R1, R4
-	ADD  $4, R4, R4
-
-doCopy:
-	// This is the end of the outer "switch", when we have a copy tag.
-	//
-	// We assume that:
-	//	- R4 == length && R4 > 0
-	//	- R5 == offset
-
-	// if offset <= 0 { etc }
-	MOVD $0, R1
-	CMP  R1, R5
-	BLE  errCorrupt
-
-	// if d < offset { etc }
-	MOVD R7, R3
-	SUB  R8, R3, R3
-	CMP  R5, R3
-	BLT  errCorrupt
-
-	// if length > len(dst)-d { etc }
-	MOVD R10, R3
-	SUB  R7, R3, R3
-	CMP  R3, R4
-	BGT  errCorrupt
-
-	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
-	//
-	// Set:
-	//	- R14 = len(dst)-d
-	//	- R15 = &dst[d-offset]
-	MOVD R10, R14
-	SUB  R7, R14, R14
-	MOVD R7, R15
-	SUB  R5, R15, R15
-
-	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
-	//
-	// First, try using two 8-byte load/stores, similar to the doLit technique
-	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
-	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
-	// and not one 16-byte load/store, and the first store has to be before the
-	// second load, due to the overlap if offset is in the range [8, 16).
-	//
-	// if length > 16 || offset < 8 || len(dst)-d < 16 {
-	//   goto slowForwardCopy
-	// }
-	// copy 16 bytes
-	// d += length
-	CMP  $16, R4
-	BGT  slowForwardCopy
-	CMP  $8, R5
-	BLT  slowForwardCopy
-	CMP  $16, R14
-	BLT  slowForwardCopy
-	MOVD 0(R15), R2
-	MOVD R2, 0(R7)
-	MOVD 8(R15), R3
-	MOVD R3, 8(R7)
-	ADD  R4, R7, R7
-	B    loop
-
-slowForwardCopy:
-	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
-	// can still try 8-byte load stores, provided we can overrun up to 10 extra
-	// bytes. As above, the overrun will be fixed up by subsequent iterations
-	// of the outermost loop.
-	//
-	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
-	// commentary says:
-	//
-	// ----
-	//
-	// The main part of this loop is a simple copy of eight bytes at a time
-	// until we've copied (at least) the requested amount of bytes.  However,
-	// if d and d-offset are less than eight bytes apart (indicating a
-	// repeating pattern of length < 8), we first need to expand the pattern in
-	// order to get the correct results. For instance, if the buffer looks like
-	// this, with the eight-byte <d-offset> and <d> patterns marked as
-	// intervals:
-	//
-	//    abxxxxxxxxxxxx
-	//    [------]           d-offset
-	//      [------]         d
-	//
-	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
-	// once, after which we can move <d> two bytes without moving <d-offset>:
-	//
-	//    ababxxxxxxxxxx
-	//    [------]           d-offset
-	//        [------]       d
-	//
-	// and repeat the exercise until the two no longer overlap.
-	//
-	// This allows us to do very well in the special case of one single byte
-	// repeated many times, without taking a big hit for more general cases.
-	//
-	// The worst case of extra writing past the end of the match occurs when
-	// offset == 1 and length == 1; the last copy will read from byte positions
-	// [0..7] and write to [4..11], whereas it was only supposed to write to
-	// position 1. Thus, ten excess bytes.
-	//
-	// ----
-	//
-	// That "10 byte overrun" worst case is confirmed by Go's
-	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
-	// and finishSlowForwardCopy algorithm.
-	//
-	// if length > len(dst)-d-10 {
-	//   goto verySlowForwardCopy
-	// }
-	SUB $10, R14, R14
-	CMP R14, R4
-	BGT verySlowForwardCopy
-
-makeOffsetAtLeast8:
-	// !!! As above, expand the pattern so that offset >= 8 and we can use
-	// 8-byte load/stores.
-	//
-	// for offset < 8 {
-	//   copy 8 bytes from dst[d-offset:] to dst[d:]
-	//   length -= offset
-	//   d      += offset
-	//   offset += offset
-	//   // The two previous lines together means that d-offset, and therefore
-	//   // R15, is unchanged.
-	// }
-	CMP  $8, R5
-	BGE  fixUpSlowForwardCopy
-	MOVD (R15), R3
-	MOVD R3, (R7)
-	SUB  R5, R4, R4
-	ADD  R5, R7, R7
-	ADD  R5, R5, R5
-	B    makeOffsetAtLeast8
-
-fixUpSlowForwardCopy:
-	// !!! Add length (which might be negative now) to d (implied by R7 being
-	// &dst[d]) so that d ends up at the right place when we jump back to the
-	// top of the loop. Before we do that, though, we save R7 to R2 so that, if
-	// length is positive, copying the remaining length bytes will write to the
-	// right place.
-	MOVD R7, R2
-	ADD  R4, R7, R7
-
-finishSlowForwardCopy:
-	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
-	// length means that we overrun, but as above, that will be fixed up by
-	// subsequent iterations of the outermost loop.
-	MOVD $0, R1
-	CMP  R1, R4
-	BLE  loop
-	MOVD (R15), R3
-	MOVD R3, (R2)
-	ADD  $8, R15, R15
-	ADD  $8, R2, R2
-	SUB  $8, R4, R4
-	B    finishSlowForwardCopy
-
-verySlowForwardCopy:
-	// verySlowForwardCopy is a simple implementation of forward copy. In C
-	// parlance, this is a do/while loop instead of a while loop, since we know
-	// that length > 0. In Go syntax:
-	//
-	// for {
-	//   dst[d] = dst[d - offset]
-	//   d++
-	//   length--
-	//   if length == 0 {
-	//     break
-	//   }
-	// }
-	MOVB (R15), R3
-	MOVB R3, (R7)
-	ADD  $1, R15, R15
-	ADD  $1, R7, R7
-	SUB  $1, R4, R4
-	CBNZ R4, verySlowForwardCopy
-	B    loop
-
-	// The code above handles copy tags.
-	// ----------------------------------------
-
-end:
-	// This is the end of the "for s < len(src)".
-	//
-	// if d != len(dst) { etc }
-	CMP R10, R7
-	BNE errCorrupt
-
-	// return 0
-	MOVD $0, ret+48(FP)
-	RET
-
-errCorrupt:
-	// return decodeErrCodeCorrupt
-	MOVD $1, R2
-	MOVD R2, ret+48(FP)
-	RET
--- a/vendor/github.com/golang/snappy/decode_asm.go
+++ b/vendor/github.com/golang/snappy/decode_asm.go
@ -1,15 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-// +build amd64 arm64
-
-package snappy
-
-// decode has the same semantics as in decode_other.go.
-//
-//go:noescape
-func decode(dst, src []byte) int
--- a/vendor/github.com/golang/snappy/encode_amd64.s
+++ b/vendor/github.com/golang/snappy/encode_amd64.s
@ -1,730 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
-// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
-// https://github.com/golang/snappy/issues/29
-//
-// As a workaround, the package was built with a known good assembler, and
-// those instructions were disassembled by "objdump -d" to yield the
-//	4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
-// style comments, in AT&T asm syntax. Note that rsp here is a physical
-// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
-// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
-// fine on Go 1.6.
-
-// The asm code generally follows the pure Go code in encode_other.go, except
-// where marked with a "!!!".
-
-// ----------------------------------------------------------------------------
-
-// func emitLiteral(dst, lit []byte) int
-//
-// All local variables fit into registers. The register allocation:
-//	- AX	len(lit)
-//	- BX	n
-//	- DX	return value
-//	- DI	&dst[i]
-//	- R10	&lit[0]
-//
-// The 24 bytes of stack space is to call runtime·memmove.
-//
-// The unusual register allocation of local variables, such as R10 for the
-// source pointer, matches the allocation used at the call site in encodeBlock,
-// which makes it easier to manually inline this function.
-TEXT ·emitLiteral(SB), NOSPLIT, $24-56
-	MOVQ dst_base+0(FP), DI
-	MOVQ lit_base+24(FP), R10
-	MOVQ lit_len+32(FP), AX
-	MOVQ AX, DX
-	MOVL AX, BX
-	SUBL $1, BX
-
-	CMPL BX, $60
-	JLT  oneByte
-	CMPL BX, $256
-	JLT  twoBytes
-
-threeBytes:
-	MOVB $0xf4, 0(DI)
-	MOVW BX, 1(DI)
-	ADDQ $3, DI
-	ADDQ $3, DX
-	JMP  memmove
-
-twoBytes:
-	MOVB $0xf0, 0(DI)
-	MOVB BX, 1(DI)
-	ADDQ $2, DI
-	ADDQ $2, DX
-	JMP  memmove
-
-oneByte:
-	SHLB $2, BX
-	MOVB BX, 0(DI)
-	ADDQ $1, DI
-	ADDQ $1, DX
-
-memmove:
-	MOVQ DX, ret+48(FP)
-
-	// copy(dst[i:], lit)
-	//
-	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
-	// DI, R10 and AX as arguments.
-	MOVQ DI, 0(SP)
-	MOVQ R10, 8(SP)
-	MOVQ AX, 16(SP)
-	CALL runtime·memmove(SB)
-	RET
-
-// ----------------------------------------------------------------------------
-
-// func emitCopy(dst []byte, offset, length int) int
-//
-// All local variables fit into registers. The register allocation:
-//	- AX	length
-//	- SI	&dst[0]
-//	- DI	&dst[i]
-//	- R11	offset
-//
-// The unusual register allocation of local variables, such as R11 for the
-// offset, matches the allocation used at the call site in encodeBlock, which
-// makes it easier to manually inline this function.
-TEXT ·emitCopy(SB), NOSPLIT, $0-48
-	MOVQ dst_base+0(FP), DI
-	MOVQ DI, SI
-	MOVQ offset+24(FP), R11
-	MOVQ length+32(FP), AX
-
-loop0:
-	// for length >= 68 { etc }
-	CMPL AX, $68
-	JLT  step1
-
-	// Emit a length 64 copy, encoded as 3 bytes.
-	MOVB $0xfe, 0(DI)
-	MOVW R11, 1(DI)
-	ADDQ $3, DI
-	SUBL $64, AX
-	JMP  loop0
-
-step1:
-	// if length > 64 { etc }
-	CMPL AX, $64
-	JLE  step2
-
-	// Emit a length 60 copy, encoded as 3 bytes.
-	MOVB $0xee, 0(DI)
-	MOVW R11, 1(DI)
-	ADDQ $3, DI
-	SUBL $60, AX
-
-step2:
-	// if length >= 12 || offset >= 2048 { goto step3 }
-	CMPL AX, $12
-	JGE  step3
-	CMPL R11, $2048
-	JGE  step3
-
-	// Emit the remaining copy, encoded as 2 bytes.
-	MOVB R11, 1(DI)
-	SHRL $8, R11
-	SHLB $5, R11
-	SUBB $4, AX
-	SHLB $2, AX
-	ORB  AX, R11
-	ORB  $1, R11
-	MOVB R11, 0(DI)
-	ADDQ $2, DI
-
-	// Return the number of bytes written.
-	SUBQ SI, DI
-	MOVQ DI, ret+40(FP)
-	RET
-
-step3:
-	// Emit the remaining copy, encoded as 3 bytes.
-	SUBL $1, AX
-	SHLB $2, AX
-	ORB  $2, AX
-	MOVB AX, 0(DI)
-	MOVW R11, 1(DI)
-	ADDQ $3, DI
-
-	// Return the number of bytes written.
-	SUBQ SI, DI
-	MOVQ DI, ret+40(FP)
-	RET
-
-// ----------------------------------------------------------------------------
-
-// func extendMatch(src []byte, i, j int) int
-//
-// All local variables fit into registers. The register allocation:
-//	- DX	&src[0]
-//	- SI	&src[j]
-//	- R13	&src[len(src) - 8]
-//	- R14	&src[len(src)]
-//	- R15	&src[i]
-//
-// The unusual register allocation of local variables, such as R15 for a source
-// pointer, matches the allocation used at the call site in encodeBlock, which
-// makes it easier to manually inline this function.
-TEXT ·extendMatch(SB), NOSPLIT, $0-48
-	MOVQ src_base+0(FP), DX
-	MOVQ src_len+8(FP), R14
-	MOVQ i+24(FP), R15
-	MOVQ j+32(FP), SI
-	ADDQ DX, R14
-	ADDQ DX, R15
-	ADDQ DX, SI
-	MOVQ R14, R13
-	SUBQ $8, R13
-
-cmp8:
-	// As long as we are 8 or more bytes before the end of src, we can load and
-	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
-	CMPQ SI, R13
-	JA   cmp1
-	MOVQ (R15), AX
-	MOVQ (SI), BX
-	CMPQ AX, BX
-	JNE  bsf
-	ADDQ $8, R15
-	ADDQ $8, SI
-	JMP  cmp8
-
-bsf:
-	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
-	// the index of the first byte that differs. The BSF instruction finds the
-	// least significant 1 bit, the amd64 architecture is little-endian, and
-	// the shift by 3 converts a bit index to a byte index.
-	XORQ AX, BX
-	BSFQ BX, BX
-	SHRQ $3, BX
-	ADDQ BX, SI
-
-	// Convert from &src[ret] to ret.
-	SUBQ DX, SI
-	MOVQ SI, ret+40(FP)
-	RET
-
-cmp1:
-	// In src's tail, compare 1 byte at a time.
-	CMPQ SI, R14
-	JAE  extendMatchEnd
-	MOVB (R15), AX
-	MOVB (SI), BX
-	CMPB AX, BX
-	JNE  extendMatchEnd
-	ADDQ $1, R15
-	ADDQ $1, SI
-	JMP  cmp1
-
-extendMatchEnd:
-	// Convert from &src[ret] to ret.
-	SUBQ DX, SI
-	MOVQ SI, ret+40(FP)
-	RET
-
-// ----------------------------------------------------------------------------
-
-// func encodeBlock(dst, src []byte) (d int)
-//
-// All local variables fit into registers, other than "var table". The register
-// allocation:
-//	- AX	.	.
-//	- BX	.	.
-//	- CX	56	shift (note that amd64 shifts by non-immediates must use CX).
-//	- DX	64	&src[0], tableSize
-//	- SI	72	&src[s]
-//	- DI	80	&dst[d]
-//	- R9	88	sLimit
-//	- R10	.	&src[nextEmit]
-//	- R11	96	prevHash, currHash, nextHash, offset
-//	- R12	104	&src[base], skip
-//	- R13	.	&src[nextS], &src[len(src) - 8]
-//	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
-//	- R15	112	candidate
-//
-// The second column (56, 64, etc) is the stack offset to spill the registers
-// when calling other functions. We could pack this slightly tighter, but it's
-// simpler to have a dedicated spill map independent of the function called.
-//
-// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
-// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
-// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
-TEXT ·encodeBlock(SB), 0, $32888-56
-	MOVQ dst_base+0(FP), DI
-	MOVQ src_base+24(FP), SI
-	MOVQ src_len+32(FP), R14
-
-	// shift, tableSize := uint32(32-8), 1<<8
-	MOVQ $24, CX
-	MOVQ $256, DX
-
-calcShift:
-	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
-	//	shift--
-	// }
-	CMPQ DX, $16384
-	JGE  varTable
-	CMPQ DX, R14
-	JGE  varTable
-	SUBQ $1, CX
-	SHLQ $1, DX
-	JMP  calcShift
-
-varTable:
-	// var table [maxTableSize]uint16
-	//
-	// In the asm code, unlike the Go code, we can zero-initialize only the
-	// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
-	// writes 16 bytes, so we can do only tableSize/8 writes instead of the
-	// 2048 writes that would zero-initialize all of table's 32768 bytes.
-	SHRQ $3, DX
-	LEAQ table-32768(SP), BX
-	PXOR X0, X0
-
-memclr:
-	MOVOU X0, 0(BX)
-	ADDQ  $16, BX
-	SUBQ  $1, DX
-	JNZ   memclr
-
-	// !!! DX = &src[0]
-	MOVQ SI, DX
-
-	// sLimit := len(src) - inputMargin
-	MOVQ R14, R9
-	SUBQ $15, R9
-
-	// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
-	// change for the rest of the function.
-	MOVQ CX, 56(SP)
-	MOVQ DX, 64(SP)
-	MOVQ R9, 88(SP)
-
-	// nextEmit := 0
-	MOVQ DX, R10
-
-	// s := 1
-	ADDQ $1, SI
-
-	// nextHash := hash(load32(src, s), shift)
-	MOVL  0(SI), R11
-	IMULL $0x1e35a7bd, R11
-	SHRL  CX, R11
-
-outer:
-	// for { etc }
-
-	// skip := 32
-	MOVQ $32, R12
-
-	// nextS := s
-	MOVQ SI, R13
-
-	// candidate := 0
-	MOVQ $0, R15
-
-inner0:
-	// for { etc }
-
-	// s := nextS
-	MOVQ R13, SI
-
-	// bytesBetweenHashLookups := skip >> 5
-	MOVQ R12, R14
-	SHRQ $5, R14
-
-	// nextS = s + bytesBetweenHashLookups
-	ADDQ R14, R13
-
-	// skip += bytesBetweenHashLookups
-	ADDQ R14, R12
-
-	// if nextS > sLimit { goto emitRemainder }
-	MOVQ R13, AX
-	SUBQ DX, AX
-	CMPQ AX, R9
-	JA   emitRemainder
-
-	// candidate = int(table[nextHash])
-	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
-	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
-	BYTE $0x4e
-	BYTE $0x0f
-	BYTE $0xb7
-	BYTE $0x7c
-	BYTE $0x5c
-	BYTE $0x78
-
-	// table[nextHash] = uint16(s)
-	MOVQ SI, AX
-	SUBQ DX, AX
-
-	// XXX: MOVW AX, table-32768(SP)(R11*2)
-	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
-	BYTE $0x66
-	BYTE $0x42
-	BYTE $0x89
-	BYTE $0x44
-	BYTE $0x5c
-	BYTE $0x78
-
-	// nextHash = hash(load32(src, nextS), shift)
-	MOVL  0(R13), R11
-	IMULL $0x1e35a7bd, R11
-	SHRL  CX, R11
-
-	// if load32(src, s) != load32(src, candidate) { continue } break
-	MOVL 0(SI), AX
-	MOVL (DX)(R15*1), BX
-	CMPL AX, BX
-	JNE  inner0
-
-fourByteMatch:
-	// As per the encode_other.go code:
-	//
-	// A 4-byte match has been found. We'll later see etc.
-
-	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
-	// on inputMargin in encode.go.
-	MOVQ SI, AX
-	SUBQ R10, AX
-	CMPQ AX, $16
-	JLE  emitLiteralFastPath
-
-	// ----------------------------------------
-	// Begin inline of the emitLiteral call.
-	//
-	// d += emitLiteral(dst[d:], src[nextEmit:s])
-
-	MOVL AX, BX
-	SUBL $1, BX
-
-	CMPL BX, $60
-	JLT  inlineEmitLiteralOneByte
-	CMPL BX, $256
-	JLT  inlineEmitLiteralTwoBytes
-
-inlineEmitLiteralThreeBytes:
-	MOVB $0xf4, 0(DI)
-	MOVW BX, 1(DI)
-	ADDQ $3, DI
-	JMP  inlineEmitLiteralMemmove
-
-inlineEmitLiteralTwoBytes:
-	MOVB $0xf0, 0(DI)
-	MOVB BX, 1(DI)
-	ADDQ $2, DI
-	JMP  inlineEmitLiteralMemmove
-
-inlineEmitLiteralOneByte:
-	SHLB $2, BX
-	MOVB BX, 0(DI)
-	ADDQ $1, DI
-
-inlineEmitLiteralMemmove:
-	// Spill local variables (registers) onto the stack; call; unspill.
-	//
-	// copy(dst[i:], lit)
-	//
-	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
-	// DI, R10 and AX as arguments.
-	MOVQ DI, 0(SP)
-	MOVQ R10, 8(SP)
-	MOVQ AX, 16(SP)
-	ADDQ AX, DI              // Finish the "d +=" part of "d += emitLiteral(etc)".
-	MOVQ SI, 72(SP)
-	MOVQ DI, 80(SP)
-	MOVQ R15, 112(SP)
-	CALL runtime·memmove(SB)
-	MOVQ 56(SP), CX
-	MOVQ 64(SP), DX
-	MOVQ 72(SP), SI
-	MOVQ 80(SP), DI
-	MOVQ 88(SP), R9
-	MOVQ 112(SP), R15
-	JMP  inner1
-
-inlineEmitLiteralEnd:
-	// End inline of the emitLiteral call.
-	// ----------------------------------------
-
-emitLiteralFastPath:
-	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
-	MOVB AX, BX
-	SUBB $1, BX
-	SHLB $2, BX
-	MOVB BX, (DI)
-	ADDQ $1, DI
-
-	// !!! Implement the copy from lit to dst as a 16-byte load and store.
-	// (Encode's documentation says that dst and src must not overlap.)
-	//
-	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
-	// OK. Subsequent iterations will fix up the overrun.
-	//
-	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
-	// 16-byte loads and stores. This technique probably wouldn't be as
-	// effective on architectures that are fussier about alignment.
-	MOVOU 0(R10), X0
-	MOVOU X0, 0(DI)
-	ADDQ  AX, DI
-
-inner1:
-	// for { etc }
-
-	// base := s
-	MOVQ SI, R12
-
-	// !!! offset := base - candidate
-	MOVQ R12, R11
-	SUBQ R15, R11
-	SUBQ DX, R11
-
-	// ----------------------------------------
-	// Begin inline of the extendMatch call.
-	//
-	// s = extendMatch(src, candidate+4, s+4)
-
-	// !!! R14 = &src[len(src)]
-	MOVQ src_len+32(FP), R14
-	ADDQ DX, R14
-
-	// !!! R13 = &src[len(src) - 8]
-	MOVQ R14, R13
-	SUBQ $8, R13
-
-	// !!! R15 = &src[candidate + 4]
-	ADDQ $4, R15
-	ADDQ DX, R15
-
-	// !!! s += 4
-	ADDQ $4, SI
-
-inlineExtendMatchCmp8:
-	// As long as we are 8 or more bytes before the end of src, we can load and
-	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
-	CMPQ SI, R13
-	JA   inlineExtendMatchCmp1
-	MOVQ (R15), AX
-	MOVQ (SI), BX
-	CMPQ AX, BX
-	JNE  inlineExtendMatchBSF
-	ADDQ $8, R15
-	ADDQ $8, SI
-	JMP  inlineExtendMatchCmp8
-
-inlineExtendMatchBSF:
-	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
-	// the index of the first byte that differs. The BSF instruction finds the
-	// least significant 1 bit, the amd64 architecture is little-endian, and
-	// the shift by 3 converts a bit index to a byte index.
-	XORQ AX, BX
-	BSFQ BX, BX
-	SHRQ $3, BX
-	ADDQ BX, SI
-	JMP  inlineExtendMatchEnd
-
-inlineExtendMatchCmp1:
-	// In src's tail, compare 1 byte at a time.
-	CMPQ SI, R14
-	JAE  inlineExtendMatchEnd
-	MOVB (R15), AX
-	MOVB (SI), BX
-	CMPB AX, BX
-	JNE  inlineExtendMatchEnd
-	ADDQ $1, R15
-	ADDQ $1, SI
-	JMP  inlineExtendMatchCmp1
-
-inlineExtendMatchEnd:
-	// End inline of the extendMatch call.
-	// ----------------------------------------
-
-	// ----------------------------------------
-	// Begin inline of the emitCopy call.
-	//
-	// d += emitCopy(dst[d:], base-candidate, s-base)
-
-	// !!! length := s - base
-	MOVQ SI, AX
-	SUBQ R12, AX
-
-inlineEmitCopyLoop0:
-	// for length >= 68 { etc }
-	CMPL AX, $68
-	JLT  inlineEmitCopyStep1
-
-	// Emit a length 64 copy, encoded as 3 bytes.
-	MOVB $0xfe, 0(DI)
-	MOVW R11, 1(DI)
-	ADDQ $3, DI
-	SUBL $64, AX
-	JMP  inlineEmitCopyLoop0
-
-inlineEmitCopyStep1:
-	// if length > 64 { etc }
-	CMPL AX, $64
-	JLE  inlineEmitCopyStep2
-
-	// Emit a length 60 copy, encoded as 3 bytes.
-	MOVB $0xee, 0(DI)
-	MOVW R11, 1(DI)
-	ADDQ $3, DI
-	SUBL $60, AX
-
-inlineEmitCopyStep2:
-	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
-	CMPL AX, $12
-	JGE  inlineEmitCopyStep3
-	CMPL R11, $2048
-	JGE  inlineEmitCopyStep3
-
-	// Emit the remaining copy, encoded as 2 bytes.
-	MOVB R11, 1(DI)
-	SHRL $8, R11
-	SHLB $5, R11
-	SUBB $4, AX
-	SHLB $2, AX
-	ORB  AX, R11
-	ORB  $1, R11
-	MOVB R11, 0(DI)
-	ADDQ $2, DI
-	JMP  inlineEmitCopyEnd
-
-inlineEmitCopyStep3:
-	// Emit the remaining copy, encoded as 3 bytes.
-	SUBL $1, AX
-	SHLB $2, AX
-	ORB  $2, AX
-	MOVB AX, 0(DI)
-	MOVW R11, 1(DI)
-	ADDQ $3, DI
-
-inlineEmitCopyEnd:
-	// End inline of the emitCopy call.
-	// ----------------------------------------
-
-	// nextEmit = s
-	MOVQ SI, R10
-
-	// if s >= sLimit { goto emitRemainder }
-	MOVQ SI, AX
-	SUBQ DX, AX
-	CMPQ AX, R9
-	JAE  emitRemainder
-
-	// As per the encode_other.go code:
-	//
-	// We could immediately etc.
-
-	// x := load64(src, s-1)
-	MOVQ -1(SI), R14
-
-	// prevHash := hash(uint32(x>>0), shift)
-	MOVL  R14, R11
-	IMULL $0x1e35a7bd, R11
-	SHRL  CX, R11
-
-	// table[prevHash] = uint16(s-1)
-	MOVQ SI, AX
-	SUBQ DX, AX
-	SUBQ $1, AX
-
-	// XXX: MOVW AX, table-32768(SP)(R11*2)
-	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
-	BYTE $0x66
-	BYTE $0x42
-	BYTE $0x89
-	BYTE $0x44
-	BYTE $0x5c
-	BYTE $0x78
-
-	// currHash := hash(uint32(x>>8), shift)
-	SHRQ  $8, R14
-	MOVL  R14, R11
-	IMULL $0x1e35a7bd, R11
-	SHRL  CX, R11
-
-	// candidate = int(table[currHash])
-	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
-	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
-	BYTE $0x4e
-	BYTE $0x0f
-	BYTE $0xb7
-	BYTE $0x7c
-	BYTE $0x5c
-	BYTE $0x78
-
-	// table[currHash] = uint16(s)
-	ADDQ $1, AX
-
-	// XXX: MOVW AX, table-32768(SP)(R11*2)
-	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
-	BYTE $0x66
-	BYTE $0x42
-	BYTE $0x89
-	BYTE $0x44
-	BYTE $0x5c
-	BYTE $0x78
-
-	// if uint32(x>>8) == load32(src, candidate) { continue }
-	MOVL (DX)(R15*1), BX
-	CMPL R14, BX
-	JEQ  inner1
-
-	// nextHash = hash(uint32(x>>16), shift)
-	SHRQ  $8, R14
-	MOVL  R14, R11
-	IMULL $0x1e35a7bd, R11
-	SHRL  CX, R11
-
-	// s++
-	ADDQ $1, SI
-
-	// break out of the inner1 for loop, i.e. continue the outer loop.
-	JMP outer
-
-emitRemainder:
-	// if nextEmit < len(src) { etc }
-	MOVQ src_len+32(FP), AX
-	ADDQ DX, AX
-	CMPQ R10, AX
-	JEQ  encodeBlockEnd
-
-	// d += emitLiteral(dst[d:], src[nextEmit:])
-	//
-	// Push args.
-	MOVQ DI, 0(SP)
-	MOVQ $0, 8(SP)   // Unnecessary, as the callee ignores it, but conservative.
-	MOVQ $0, 16(SP)  // Unnecessary, as the callee ignores it, but conservative.
-	MOVQ R10, 24(SP)
-	SUBQ R10, AX
-	MOVQ AX, 32(SP)
-	MOVQ AX, 40(SP)  // Unnecessary, as the callee ignores it, but conservative.
-
-	// Spill local variables (registers) onto the stack; call; unspill.
-	MOVQ DI, 80(SP)
-	CALL ·emitLiteral(SB)
-	MOVQ 80(SP), DI
-
-	// Finish the "d +=" part of "d += emitLiteral(etc)".
-	ADDQ 48(SP), DI
-
-encodeBlockEnd:
-	MOVQ dst_base+0(FP), AX
-	SUBQ AX, DI
-	MOVQ DI, d+48(FP)
-	RET
--- a/vendor/github.com/golang/snappy/encode_arm64.s
+++ b/vendor/github.com/golang/snappy/encode_arm64.s
@ -1,722 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-// The asm code generally follows the pure Go code in encode_other.go, except
-// where marked with a "!!!".
-
-// ----------------------------------------------------------------------------
-
-// func emitLiteral(dst, lit []byte) int
-//
-// All local variables fit into registers. The register allocation:
-//	- R3	len(lit)
-//	- R4	n
-//	- R6	return value
-//	- R8	&dst[i]
-//	- R10	&lit[0]
-//
-// The 32 bytes of stack space is to call runtime·memmove.
-//
-// The unusual register allocation of local variables, such as R10 for the
-// source pointer, matches the allocation used at the call site in encodeBlock,
-// which makes it easier to manually inline this function.
-TEXT ·emitLiteral(SB), NOSPLIT, $32-56
-	MOVD dst_base+0(FP), R8
-	MOVD lit_base+24(FP), R10
-	MOVD lit_len+32(FP), R3
-	MOVD R3, R6
-	MOVW R3, R4
-	SUBW $1, R4, R4
-
-	CMPW $60, R4
-	BLT  oneByte
-	CMPW $256, R4
-	BLT  twoBytes
-
-threeBytes:
-	MOVD $0xf4, R2
-	MOVB R2, 0(R8)
-	MOVW R4, 1(R8)
-	ADD  $3, R8, R8
-	ADD  $3, R6, R6
-	B    memmove
-
-twoBytes:
-	MOVD $0xf0, R2
-	MOVB R2, 0(R8)
-	MOVB R4, 1(R8)
-	ADD  $2, R8, R8
-	ADD  $2, R6, R6
-	B    memmove
-
-oneByte:
-	LSLW $2, R4, R4
-	MOVB R4, 0(R8)
-	ADD  $1, R8, R8
-	ADD  $1, R6, R6
-
-memmove:
-	MOVD R6, ret+48(FP)
-
-	// copy(dst[i:], lit)
-	//
-	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
-	// R8, R10 and R3 as arguments.
-	MOVD R8, 8(RSP)
-	MOVD R10, 16(RSP)
-	MOVD R3, 24(RSP)
-	CALL runtime·memmove(SB)
-	RET
-
-// ----------------------------------------------------------------------------
-
-// func emitCopy(dst []byte, offset, length int) int
-//
-// All local variables fit into registers. The register allocation:
-//	- R3	length
-//	- R7	&dst[0]
-//	- R8	&dst[i]
-//	- R11	offset
-//
-// The unusual register allocation of local variables, such as R11 for the
-// offset, matches the allocation used at the call site in encodeBlock, which
-// makes it easier to manually inline this function.
-TEXT ·emitCopy(SB), NOSPLIT, $0-48
-	MOVD dst_base+0(FP), R8
-	MOVD R8, R7
-	MOVD offset+24(FP), R11
-	MOVD length+32(FP), R3
-
-loop0:
-	// for length >= 68 { etc }
-	CMPW $68, R3
-	BLT  step1
-
-	// Emit a length 64 copy, encoded as 3 bytes.
-	MOVD $0xfe, R2
-	MOVB R2, 0(R8)
-	MOVW R11, 1(R8)
-	ADD  $3, R8, R8
-	SUB  $64, R3, R3
-	B    loop0
-
-step1:
-	// if length > 64 { etc }
-	CMP $64, R3
-	BLE step2
-
-	// Emit a length 60 copy, encoded as 3 bytes.
-	MOVD $0xee, R2
-	MOVB R2, 0(R8)
-	MOVW R11, 1(R8)
-	ADD  $3, R8, R8
-	SUB  $60, R3, R3
-
-step2:
-	// if length >= 12 || offset >= 2048 { goto step3 }
-	CMP  $12, R3
-	BGE  step3
-	CMPW $2048, R11
-	BGE  step3
-
-	// Emit the remaining copy, encoded as 2 bytes.
-	MOVB R11, 1(R8)
-	LSRW $3, R11, R11
-	AND  $0xe0, R11, R11
-	SUB  $4, R3, R3
-	LSLW $2, R3
-	AND  $0xff, R3, R3
-	ORRW R3, R11, R11
-	ORRW $1, R11, R11
-	MOVB R11, 0(R8)
-	ADD  $2, R8, R8
-
-	// Return the number of bytes written.
-	SUB  R7, R8, R8
-	MOVD R8, ret+40(FP)
-	RET
-
-step3:
-	// Emit the remaining copy, encoded as 3 bytes.
-	SUB  $1, R3, R3
-	AND  $0xff, R3, R3
-	LSLW $2, R3, R3
-	ORRW $2, R3, R3
-	MOVB R3, 0(R8)
-	MOVW R11, 1(R8)
-	ADD  $3, R8, R8
-
-	// Return the number of bytes written.
-	SUB  R7, R8, R8
-	MOVD R8, ret+40(FP)
-	RET
-
-// ----------------------------------------------------------------------------
-
-// func extendMatch(src []byte, i, j int) int
-//
-// All local variables fit into registers. The register allocation:
-//	- R6	&src[0]
-//	- R7	&src[j]
-//	- R13	&src[len(src) - 8]
-//	- R14	&src[len(src)]
-//	- R15	&src[i]
-//
-// The unusual register allocation of local variables, such as R15 for a source
-// pointer, matches the allocation used at the call site in encodeBlock, which
-// makes it easier to manually inline this function.
-TEXT ·extendMatch(SB), NOSPLIT, $0-48
-	MOVD src_base+0(FP), R6
-	MOVD src_len+8(FP), R14
-	MOVD i+24(FP), R15
-	MOVD j+32(FP), R7
-	ADD  R6, R14, R14
-	ADD  R6, R15, R15
-	ADD  R6, R7, R7
-	MOVD R14, R13
-	SUB  $8, R13, R13
-
-cmp8:
-	// As long as we are 8 or more bytes before the end of src, we can load and
-	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
-	CMP  R13, R7
-	BHI  cmp1
-	MOVD (R15), R3
-	MOVD (R7), R4
-	CMP  R4, R3
-	BNE  bsf
-	ADD  $8, R15, R15
-	ADD  $8, R7, R7
-	B    cmp8
-
-bsf:
-	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
-	// the index of the first byte that differs.
-	// RBIT reverses the bit order, then CLZ counts the leading zeros, the
-	// combination of which finds the least significant bit which is set.
-	// The arm64 architecture is little-endian, and the shift by 3 converts
-	// a bit index to a byte index.
-	EOR  R3, R4, R4
-	RBIT R4, R4
-	CLZ  R4, R4
-	ADD  R4>>3, R7, R7
-
-	// Convert from &src[ret] to ret.
-	SUB  R6, R7, R7
-	MOVD R7, ret+40(FP)
-	RET
-
-cmp1:
-	// In src's tail, compare 1 byte at a time.
-	CMP  R7, R14
-	BLS  extendMatchEnd
-	MOVB (R15), R3
-	MOVB (R7), R4
-	CMP  R4, R3
-	BNE  extendMatchEnd
-	ADD  $1, R15, R15
-	ADD  $1, R7, R7
-	B    cmp1
-
-extendMatchEnd:
-	// Convert from &src[ret] to ret.
-	SUB  R6, R7, R7
-	MOVD R7, ret+40(FP)
-	RET
-
-// ----------------------------------------------------------------------------
-
-// func encodeBlock(dst, src []byte) (d int)
-//
-// All local variables fit into registers, other than "var table". The register
-// allocation:
-//	- R3	.	.
-//	- R4	.	.
-//	- R5	64	shift
-//	- R6	72	&src[0], tableSize
-//	- R7	80	&src[s]
-//	- R8	88	&dst[d]
-//	- R9	96	sLimit
-//	- R10	.	&src[nextEmit]
-//	- R11	104	prevHash, currHash, nextHash, offset
-//	- R12	112	&src[base], skip
-//	- R13	.	&src[nextS], &src[len(src) - 8]
-//	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
-//	- R15	120	candidate
-//	- R16	.	hash constant, 0x1e35a7bd
-//	- R17	.	&table
-//	- .  	128	table
-//
-// The second column (64, 72, etc) is the stack offset to spill the registers
-// when calling other functions. We could pack this slightly tighter, but it's
-// simpler to have a dedicated spill map independent of the function called.
-//
-// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
-// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill
-// local variables (registers) during calls gives 32768 + 64 + 64 = 32896.
-TEXT ·encodeBlock(SB), 0, $32896-56
-	MOVD dst_base+0(FP), R8
-	MOVD src_base+24(FP), R7
-	MOVD src_len+32(FP), R14
-
-	// shift, tableSize := uint32(32-8), 1<<8
-	MOVD  $24, R5
-	MOVD  $256, R6
-	MOVW  $0xa7bd, R16
-	MOVKW $(0x1e35<<16), R16
-
-calcShift:
-	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
-	//	shift--
-	// }
-	MOVD $16384, R2
-	CMP  R2, R6
-	BGE  varTable
-	CMP  R14, R6
-	BGE  varTable
-	SUB  $1, R5, R5
-	LSL  $1, R6, R6
-	B    calcShift
-
-varTable:
-	// var table [maxTableSize]uint16
-	//
-	// In the asm code, unlike the Go code, we can zero-initialize only the
-	// first tableSize elements. Each uint16 element is 2 bytes and each
-	// iterations writes 64 bytes, so we can do only tableSize/32 writes
-	// instead of the 2048 writes that would zero-initialize all of table's
-	// 32768 bytes. This clear could overrun the first tableSize elements, but
-	// it won't overrun the allocated stack size.
-	ADD  $128, RSP, R17
-	MOVD R17, R4
-
-	// !!! R6 = &src[tableSize]
-	ADD R6<<1, R17, R6
-
-memclr:
-	STP.P (ZR, ZR), 64(R4)
-	STP   (ZR, ZR), -48(R4)
-	STP   (ZR, ZR), -32(R4)
-	STP   (ZR, ZR), -16(R4)
-	CMP   R4, R6
-	BHI   memclr
-
-	// !!! R6 = &src[0]
-	MOVD R7, R6
-
-	// sLimit := len(src) - inputMargin
-	MOVD R14, R9
-	SUB  $15, R9, R9
-
-	// !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't
-	// change for the rest of the function.
-	MOVD R5, 64(RSP)
-	MOVD R6, 72(RSP)
-	MOVD R9, 96(RSP)
-
-	// nextEmit := 0
-	MOVD R6, R10
-
-	// s := 1
-	ADD $1, R7, R7
-
-	// nextHash := hash(load32(src, s), shift)
-	MOVW 0(R7), R11
-	MULW R16, R11, R11
-	LSRW R5, R11, R11
-
-outer:
-	// for { etc }
-
-	// skip := 32
-	MOVD $32, R12
-
-	// nextS := s
-	MOVD R7, R13
-
-	// candidate := 0
-	MOVD $0, R15
-
-inner0:
-	// for { etc }
-
-	// s := nextS
-	MOVD R13, R7
-
-	// bytesBetweenHashLookups := skip >> 5
-	MOVD R12, R14
-	LSR  $5, R14, R14
-
-	// nextS = s + bytesBetweenHashLookups
-	ADD R14, R13, R13
-
-	// skip += bytesBetweenHashLookups
-	ADD R14, R12, R12
-
-	// if nextS > sLimit { goto emitRemainder }
-	MOVD R13, R3
-	SUB  R6, R3, R3
-	CMP  R9, R3
-	BHI  emitRemainder
-
-	// candidate = int(table[nextHash])
-	MOVHU 0(R17)(R11<<1), R15
-
-	// table[nextHash] = uint16(s)
-	MOVD R7, R3
-	SUB  R6, R3, R3
-
-	MOVH R3, 0(R17)(R11<<1)
-
-	// nextHash = hash(load32(src, nextS), shift)
-	MOVW 0(R13), R11
-	MULW R16, R11
-	LSRW R5, R11, R11
-
-	// if load32(src, s) != load32(src, candidate) { continue } break
-	MOVW 0(R7), R3
-	MOVW (R6)(R15), R4
-	CMPW R4, R3
-	BNE  inner0
-
-fourByteMatch:
-	// As per the encode_other.go code:
-	//
-	// A 4-byte match has been found. We'll later see etc.
-
-	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
-	// on inputMargin in encode.go.
-	MOVD R7, R3
-	SUB  R10, R3, R3
-	CMP  $16, R3
-	BLE  emitLiteralFastPath
-
-	// ----------------------------------------
-	// Begin inline of the emitLiteral call.
-	//
-	// d += emitLiteral(dst[d:], src[nextEmit:s])
-
-	MOVW R3, R4
-	SUBW $1, R4, R4
-
-	MOVW $60, R2
-	CMPW R2, R4
-	BLT  inlineEmitLiteralOneByte
-	MOVW $256, R2
-	CMPW R2, R4
-	BLT  inlineEmitLiteralTwoBytes
-
-inlineEmitLiteralThreeBytes:
-	MOVD $0xf4, R1
-	MOVB R1, 0(R8)
-	MOVW R4, 1(R8)
-	ADD  $3, R8, R8
-	B    inlineEmitLiteralMemmove
-
-inlineEmitLiteralTwoBytes:
-	MOVD $0xf0, R1
-	MOVB R1, 0(R8)
-	MOVB R4, 1(R8)
-	ADD  $2, R8, R8
-	B    inlineEmitLiteralMemmove
-
-inlineEmitLiteralOneByte:
-	LSLW $2, R4, R4
-	MOVB R4, 0(R8)
-	ADD  $1, R8, R8
-
-inlineEmitLiteralMemmove:
-	// Spill local variables (registers) onto the stack; call; unspill.
-	//
-	// copy(dst[i:], lit)
-	//
-	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
-	// R8, R10 and R3 as arguments.
-	MOVD R8, 8(RSP)
-	MOVD R10, 16(RSP)
-	MOVD R3, 24(RSP)
-
-	// Finish the "d +=" part of "d += emitLiteral(etc)".
-	ADD   R3, R8, R8
-	MOVD  R7, 80(RSP)
-	MOVD  R8, 88(RSP)
-	MOVD  R15, 120(RSP)
-	CALL  runtime·memmove(SB)
-	MOVD  64(RSP), R5
-	MOVD  72(RSP), R6
-	MOVD  80(RSP), R7
-	MOVD  88(RSP), R8
-	MOVD  96(RSP), R9
-	MOVD  120(RSP), R15
-	ADD   $128, RSP, R17
-	MOVW  $0xa7bd, R16
-	MOVKW $(0x1e35<<16), R16
-	B     inner1
-
-inlineEmitLiteralEnd:
-	// End inline of the emitLiteral call.
-	// ----------------------------------------
-
-emitLiteralFastPath:
-	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
-	MOVB R3, R4
-	SUBW $1, R4, R4
-	AND  $0xff, R4, R4
-	LSLW $2, R4, R4
-	MOVB R4, (R8)
-	ADD  $1, R8, R8
-
-	// !!! Implement the copy from lit to dst as a 16-byte load and store.
-	// (Encode's documentation says that dst and src must not overlap.)
-	//
-	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
-	// OK. Subsequent iterations will fix up the overrun.
-	//
-	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
-	// 16-byte loads and stores. This technique probably wouldn't be as
-	// effective on architectures that are fussier about alignment.
-	LDP 0(R10), (R0, R1)
-	STP (R0, R1), 0(R8)
-	ADD R3, R8, R8
-
-inner1:
-	// for { etc }
-
-	// base := s
-	MOVD R7, R12
-
-	// !!! offset := base - candidate
-	MOVD R12, R11
-	SUB  R15, R11, R11
-	SUB  R6, R11, R11
-
-	// ----------------------------------------
-	// Begin inline of the extendMatch call.
-	//
-	// s = extendMatch(src, candidate+4, s+4)
-
-	// !!! R14 = &src[len(src)]
-	MOVD src_len+32(FP), R14
-	ADD  R6, R14, R14
-
-	// !!! R13 = &src[len(src) - 8]
-	MOVD R14, R13
-	SUB  $8, R13, R13
-
-	// !!! R15 = &src[candidate + 4]
-	ADD $4, R15, R15
-	ADD R6, R15, R15
-
-	// !!! s += 4
-	ADD $4, R7, R7
-
-inlineExtendMatchCmp8:
-	// As long as we are 8 or more bytes before the end of src, we can load and
-	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
-	CMP  R13, R7
-	BHI  inlineExtendMatchCmp1
-	MOVD (R15), R3
-	MOVD (R7), R4
-	CMP  R4, R3
-	BNE  inlineExtendMatchBSF
-	ADD  $8, R15, R15
-	ADD  $8, R7, R7
-	B    inlineExtendMatchCmp8
-
-inlineExtendMatchBSF:
-	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
-	// the index of the first byte that differs.
-	// RBIT reverses the bit order, then CLZ counts the leading zeros, the
-	// combination of which finds the least significant bit which is set.
-	// The arm64 architecture is little-endian, and the shift by 3 converts
-	// a bit index to a byte index.
-	EOR  R3, R4, R4
-	RBIT R4, R4
-	CLZ  R4, R4
-	ADD  R4>>3, R7, R7
-	B    inlineExtendMatchEnd
-
-inlineExtendMatchCmp1:
-	// In src's tail, compare 1 byte at a time.
-	CMP  R7, R14
-	BLS  inlineExtendMatchEnd
-	MOVB (R15), R3
-	MOVB (R7), R4
-	CMP  R4, R3
-	BNE  inlineExtendMatchEnd
-	ADD  $1, R15, R15
-	ADD  $1, R7, R7
-	B    inlineExtendMatchCmp1
-
-inlineExtendMatchEnd:
-	// End inline of the extendMatch call.
-	// ----------------------------------------
-
-	// ----------------------------------------
-	// Begin inline of the emitCopy call.
-	//
-	// d += emitCopy(dst[d:], base-candidate, s-base)
-
-	// !!! length := s - base
-	MOVD R7, R3
-	SUB  R12, R3, R3
-
-inlineEmitCopyLoop0:
-	// for length >= 68 { etc }
-	MOVW $68, R2
-	CMPW R2, R3
-	BLT  inlineEmitCopyStep1
-
-	// Emit a length 64 copy, encoded as 3 bytes.
-	MOVD $0xfe, R1
-	MOVB R1, 0(R8)
-	MOVW R11, 1(R8)
-	ADD  $3, R8, R8
-	SUBW $64, R3, R3
-	B    inlineEmitCopyLoop0
-
-inlineEmitCopyStep1:
-	// if length > 64 { etc }
-	MOVW $64, R2
-	CMPW R2, R3
-	BLE  inlineEmitCopyStep2
-
-	// Emit a length 60 copy, encoded as 3 bytes.
-	MOVD $0xee, R1
-	MOVB R1, 0(R8)
-	MOVW R11, 1(R8)
-	ADD  $3, R8, R8
-	SUBW $60, R3, R3
-
-inlineEmitCopyStep2:
-	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
-	MOVW $12, R2
-	CMPW R2, R3
-	BGE  inlineEmitCopyStep3
-	MOVW $2048, R2
-	CMPW R2, R11
-	BGE  inlineEmitCopyStep3
-
-	// Emit the remaining copy, encoded as 2 bytes.
-	MOVB R11, 1(R8)
-	LSRW $8, R11, R11
-	LSLW $5, R11, R11
-	SUBW $4, R3, R3
-	AND  $0xff, R3, R3
-	LSLW $2, R3, R3
-	ORRW R3, R11, R11
-	ORRW $1, R11, R11
-	MOVB R11, 0(R8)
-	ADD  $2, R8, R8
-	B    inlineEmitCopyEnd
-
-inlineEmitCopyStep3:
-	// Emit the remaining copy, encoded as 3 bytes.
-	SUBW $1, R3, R3
-	LSLW $2, R3, R3
-	ORRW $2, R3, R3
-	MOVB R3, 0(R8)
-	MOVW R11, 1(R8)
-	ADD  $3, R8, R8
-
-inlineEmitCopyEnd:
-	// End inline of the emitCopy call.
-	// ----------------------------------------
-
-	// nextEmit = s
-	MOVD R7, R10
-
-	// if s >= sLimit { goto emitRemainder }
-	MOVD R7, R3
-	SUB  R6, R3, R3
-	CMP  R3, R9
-	BLS  emitRemainder
-
-	// As per the encode_other.go code:
-	//
-	// We could immediately etc.
-
-	// x := load64(src, s-1)
-	MOVD -1(R7), R14
-
-	// prevHash := hash(uint32(x>>0), shift)
-	MOVW R14, R11
-	MULW R16, R11, R11
-	LSRW R5, R11, R11
-
-	// table[prevHash] = uint16(s-1)
-	MOVD R7, R3
-	SUB  R6, R3, R3
-	SUB  $1, R3, R3
-
-	MOVHU R3, 0(R17)(R11<<1)
-
-	// currHash := hash(uint32(x>>8), shift)
-	LSR  $8, R14, R14
-	MOVW R14, R11
-	MULW R16, R11, R11
-	LSRW R5, R11, R11
-
-	// candidate = int(table[currHash])
-	MOVHU 0(R17)(R11<<1), R15
-
-	// table[currHash] = uint16(s)
-	ADD   $1, R3, R3
-	MOVHU R3, 0(R17)(R11<<1)
-
-	// if uint32(x>>8) == load32(src, candidate) { continue }
-	MOVW (R6)(R15), R4
-	CMPW R4, R14
-	BEQ  inner1
-
-	// nextHash = hash(uint32(x>>16), shift)
-	LSR  $8, R14, R14
-	MOVW R14, R11
-	MULW R16, R11, R11
-	LSRW R5, R11, R11
-
-	// s++
-	ADD $1, R7, R7
-
-	// break out of the inner1 for loop, i.e. continue the outer loop.
-	B outer
-
-emitRemainder:
-	// if nextEmit < len(src) { etc }
-	MOVD src_len+32(FP), R3
-	ADD  R6, R3, R3
-	CMP  R3, R10
-	BEQ  encodeBlockEnd
-
-	// d += emitLiteral(dst[d:], src[nextEmit:])
-	//
-	// Push args.
-	MOVD R8, 8(RSP)
-	MOVD $0, 16(RSP)  // Unnecessary, as the callee ignores it, but conservative.
-	MOVD $0, 24(RSP)  // Unnecessary, as the callee ignores it, but conservative.
-	MOVD R10, 32(RSP)
-	SUB  R10, R3, R3
-	MOVD R3, 40(RSP)
-	MOVD R3, 48(RSP)  // Unnecessary, as the callee ignores it, but conservative.
-
-	// Spill local variables (registers) onto the stack; call; unspill.
-	MOVD R8, 88(RSP)
-	CALL ·emitLiteral(SB)
-	MOVD 88(RSP), R8
-
-	// Finish the "d +=" part of "d += emitLiteral(etc)".
-	MOVD 56(RSP), R1
-	ADD  R1, R8, R8
-
-encodeBlockEnd:
-	MOVD dst_base+0(FP), R3
-	SUB  R3, R8, R8
-	MOVD R8, d+48(FP)
-	RET
--- a/vendor/github.com/golang/snappy/encode_asm.go
+++ b/vendor/github.com/golang/snappy/encode_asm.go
@ -1,30 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-// +build amd64 arm64
-
-package snappy
-
-// emitLiteral has the same semantics as in encode_other.go.
-//
-//go:noescape
-func emitLiteral(dst, lit []byte) int
-
-// emitCopy has the same semantics as in encode_other.go.
-//
-//go:noescape
-func emitCopy(dst []byte, offset, length int) int
-
-// extendMatch has the same semantics as in encode_other.go.
-//
-//go:noescape
-func extendMatch(src []byte, i, j int) int
-
-// encodeBlock has the same semantics as in encode_other.go.
-//
-//go:noescape
-func encodeBlock(dst, src []byte) (d int)
--- a/vendor/github.com/golang/snappy/go.mod
+++ b/vendor/github.com/golang/snappy/go.mod
@ -1 +0,0 @@
-module github.com/golang/snappy
--- a/vendor/github.com/klauspost/compress/.gitattributes
+++ b/vendor/github.com/klauspost/compress/.gitattributes
@ -0,0 +1,2 @@
+* -text
+*.bin -text -diff
--- a/vendor/github.com/klauspost/compress/.gitignore
+++ b/vendor/github.com/klauspost/compress/.gitignore
@ -0,0 +1,25 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
+/s2/cmd/_s2sx/sfx-exe
--- a/vendor/github.com/klauspost/compress/.goreleaser.yml
+++ b/vendor/github.com/klauspost/compress/.goreleaser.yml
@ -0,0 +1,137 @@
+# This is an example goreleaser.yaml file with some sane defaults.
+# Make sure to check the documentation at http://goreleaser.com
+before:
+  hooks:
+    - ./gen.sh
+
+builds:
+  -
+    id: "s2c"
+    binary: s2c
+    main: ./s2/cmd/s2c/main.go
+    flags:
+      - -trimpath
+    env:
+      - CGO_ENABLED=0
+    goos:
+      - aix
+      - linux
+      - freebsd
+      - netbsd
+      - windows
+      - darwin
+    goarch:
+      - 386
+      - amd64
+      - arm
+      - arm64
+      - ppc64
+      - ppc64le
+      - mips64
+      - mips64le
+    goarm:
+      - 7
+  -
+    id: "s2d"
+    binary: s2d
+    main: ./s2/cmd/s2d/main.go
+    flags:
+      - -trimpath
+    env:
+      - CGO_ENABLED=0
+    goos:
+      - aix
+      - linux
+      - freebsd
+      - netbsd
+      - windows
+      - darwin
+    goarch:
+      - 386
+      - amd64
+      - arm
+      - arm64
+      - ppc64
+      - ppc64le
+      - mips64
+      - mips64le
+    goarm:
+      - 7
+  -
+    id: "s2sx"
+    binary: s2sx
+    main: ./s2/cmd/_s2sx/main.go
+    flags:
+      - -modfile=s2sx.mod
+      - -trimpath
+    env:
+      - CGO_ENABLED=0
+    goos:
+      - aix
+      - linux
+      - freebsd
+      - netbsd
+      - windows
+      - darwin
+    goarch:
+      - 386
+      - amd64
+      - arm
+      - arm64
+      - ppc64
+      - ppc64le
+      - mips64
+      - mips64le
+    goarm:
+      - 7
+
+archives:
+  -
+    id: s2-binaries
+    name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
+    replacements:
+      aix: AIX
+      darwin: OSX
+      linux: Linux
+      windows: Windows
+      386: i386
+      amd64: x86_64
+      freebsd: FreeBSD
+      netbsd: NetBSD
+    format_overrides:
+      - goos: windows
+        format: zip
+    files:
+      - unpack/*
+      - s2/LICENSE
+      - s2/README.md
+checksum:
+  name_template: 'checksums.txt'
+snapshot:
+  name_template: "{{ .Tag }}-next"
+changelog:
+  sort: asc
+  filters:
+    exclude:
+    - '^doc:'
+    - '^docs:'
+    - '^test:'
+    - '^tests:'
+    - '^Update\sREADME.md'
+
+nfpms:
+  -
+    file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+    vendor: Klaus Post
+    homepage: https://github.com/klauspost/compress
+    maintainer: Klaus Post <klauspost@gmail.com>
+    description: S2 Compression Tool
+    license: BSD 3-Clause
+    formats:
+      - deb
+      - rpm
+    replacements:
+      darwin: Darwin
+      linux: Linux
+      freebsd: FreeBSD
+      amd64: x86_64
--- a/vendor/github.com/klauspost/compress/LICENSE
+++ b/vendor/github.com/klauspost/compress/LICENSE
@ -26,3 +26,267 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+------------------
+
+Files: gzhttp/*
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2016-2017 The New York Times Company
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+------------------
+
+Files: s2/cmd/internal/readahead/*
+
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+---------------------
+Files: snappy/*
+Files: internal/snapref/*
+
+Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@ -0,0 +1,430 @@
+# compress
+
+This package provides various compression algorithms.
+
+* [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression in pure Go.
+* [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) is a high performance replacement for Snappy.
+* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib).
+* [snappy](https://github.com/klauspost/compress/tree/master/snappy) is a drop-in replacement for `github.com/golang/snappy` offering better compression and concurrent streams.
+* [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding.
+* [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently.
+* [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation.
+* [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here.
+
+[![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories)
+[![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml)
+[![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge)
+
+# changelog
+
+* Aug 12, 2021 (v1.13.4)
+	* Add [snappy replacement package](https://github.com/klauspost/compress/tree/master/snappy).
+	* zstd: Fix incorrect encoding in "best" mode [#415](https://github.com/klauspost/compress/pull/415)
+
+* Aug 3, 2021 (v1.13.3) 
+
+	* zstd: Improve Best compression [#404](https://github.com/klauspost/compress/pull/404)
+	* zstd: Fix WriteTo error forwarding [#411](https://github.com/klauspost/compress/pull/411)
+	* gzhttp: Return http.HandlerFunc instead of http.Handler. Unlikely breaking change. [#406](https://github.com/klauspost/compress/pull/406)
+	* s2sx: Fix max size error [#399](https://github.com/klauspost/compress/pull/399)
+	* zstd: Add optional stream content size on reset [#401](https://github.com/klauspost/compress/pull/401)
+	* zstd: use SpeedBestCompression for level >= 10 [#410](https://github.com/klauspost/compress/pull/410)
+
+* Jun 14, 2021 (v1.13.1)
+
+	* s2: Add full Snappy output support  [#396](https://github.com/klauspost/compress/pull/396)
+	* zstd: Add configurable [Decoder window](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithDecoderMaxWindow) size [#394](https://github.com/klauspost/compress/pull/394)
+	* gzhttp: Add header to skip compression  [#389](https://github.com/klauspost/compress/pull/389)
+	* s2: Improve speed with bigger output margin  [#395](https://github.com/klauspost/compress/pull/395)
+
+* Jun 3, 2021 (v1.13.0)
+	* Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors.
+	* zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382)
+	* zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380)
+
+* May 25, 2021 (v1.12.3)
+	* deflate: Better/faster Huffman encoding [#374](https://github.com/klauspost/compress/pull/374)
+	* deflate: Allocate less for history. [#375](https://github.com/klauspost/compress/pull/375)
+	* zstd: Forward read errors [#373](https://github.com/klauspost/compress/pull/373) 
+
+* Apr 27, 2021 (v1.12.2)
+	* zstd: Improve better/best compression [#360](https://github.com/klauspost/compress/pull/360) [#364](https://github.com/klauspost/compress/pull/364) [#365](https://github.com/klauspost/compress/pull/365)
+	* zstd: Add helpers to compress/decompress zstd inside zip files [#363](https://github.com/klauspost/compress/pull/363)
+	* deflate: Improve level 5+6 compression [#367](https://github.com/klauspost/compress/pull/367)
+	* s2: Improve better/best compression [#358](https://github.com/klauspost/compress/pull/358) [#359](https://github.com/klauspost/compress/pull/358)
+	* s2: Load after checking src limit on amd64. [#362](https://github.com/klauspost/compress/pull/362)
+	* s2sx: Limit max executable size [#368](https://github.com/klauspost/compress/pull/368) 
+
+* Apr 14, 2021 (v1.12.1)
+	* snappy package removed. Upstream added as dependency.
+	* s2: Better compression in "best" mode [#353](https://github.com/klauspost/compress/pull/353)
+	* s2sx: Add stdin input and detect pre-compressed from signature [#352](https://github.com/klauspost/compress/pull/352)
+	* s2c/s2d: Add http as possible input [#348](https://github.com/klauspost/compress/pull/348)
+	* s2c/s2d/s2sx: Always truncate when writing files [#352](https://github.com/klauspost/compress/pull/352)
+	* zstd: Reduce memory usage further when using [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) [#346](https://github.com/klauspost/compress/pull/346)
+	* s2: Fix potential problem with amd64 assembly and profilers [#349](https://github.com/klauspost/compress/pull/349)
+
+* Mar 26, 2021 (v1.11.13)
+	* zstd: Big speedup on small dictionary encodes [#344](https://github.com/klauspost/compress/pull/344) [#345](https://github.com/klauspost/compress/pull/345)
+	* zstd: Add [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) encoder option [#336](https://github.com/klauspost/compress/pull/336)
+	* deflate: Improve entropy compression [#338](https://github.com/klauspost/compress/pull/338)
+	* s2: Clean up and minor performance improvement in best [#341](https://github.com/klauspost/compress/pull/341)
+
+* Mar 5, 2021 (v1.11.12)
+	* s2: Add `s2sx` binary that creates [self extracting archives](https://github.com/klauspost/compress/tree/master/s2#s2sx-self-extracting-archives).
+	* s2: Speed up decompression on non-assembly platforms [#328](https://github.com/klauspost/compress/pull/328)
+
+* Mar 1, 2021 (v1.11.9)
+	* s2: Add ARM64 decompression assembly. Around 2x output speed. [#324](https://github.com/klauspost/compress/pull/324)
+	* s2: Improve "better" speed and efficiency. [#325](https://github.com/klauspost/compress/pull/325)
+	* s2: Fix binaries.
+
+* Feb 25, 2021 (v1.11.8)
+	* s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended.
+	* s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315)
+	* s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322)
+	* zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314)
+	* zip: Fix zip64 headers. [#313](https://github.com/klauspost/compress/pull/313)
+  
+* Jan 14, 2021 (v1.11.7)
+	* Use Bytes() interface to get bytes across packages. [#309](https://github.com/klauspost/compress/pull/309)
+	* s2: Add 'best' compression option.  [#310](https://github.com/klauspost/compress/pull/310)
+	* s2: Add ReaderMaxBlockSize, changes `s2.NewReader` signature to include varargs. [#311](https://github.com/klauspost/compress/pull/311)
+	* s2: Fix crash on small better buffers. [#308](https://github.com/klauspost/compress/pull/308)
+	* s2: Clean up decoder. [#312](https://github.com/klauspost/compress/pull/312)
+
+* Jan 7, 2021 (v1.11.6)
+	* zstd: Make decoder allocations smaller [#306](https://github.com/klauspost/compress/pull/306)
+	* zstd: Free Decoder resources when Reset is called with a nil io.Reader  [#305](https://github.com/klauspost/compress/pull/305)
+
+* Dec 20, 2020 (v1.11.4)
+	* zstd: Add Best compression mode [#304](https://github.com/klauspost/compress/pull/304)
+	* Add header decoder [#299](https://github.com/klauspost/compress/pull/299)
+	* s2: Add uncompressed stream option [#297](https://github.com/klauspost/compress/pull/297)
+	* Simplify/speed up small blocks with known max size. [#300](https://github.com/klauspost/compress/pull/300)
+	* zstd: Always reset literal dict encoder [#303](https://github.com/klauspost/compress/pull/303)
+
+* Nov 15, 2020 (v1.11.3)
+	* inflate: 10-15% faster decompression  [#293](https://github.com/klauspost/compress/pull/293)
+	* zstd: Tweak DecodeAll default allocation [#295](https://github.com/klauspost/compress/pull/295)
+
+* Oct 11, 2020 (v1.11.2)
+	* s2: Fix out of bounds read in "better" block compression [#291](https://github.com/klauspost/compress/pull/291)
+
+* Oct 1, 2020 (v1.11.1)
+	* zstd: Set allLitEntropy true in default configuration [#286](https://github.com/klauspost/compress/pull/286)
+
+* Sept 8, 2020 (v1.11.0)
+	* zstd: Add experimental compression [dictionaries](https://github.com/klauspost/compress/tree/master/zstd#dictionaries) [#281](https://github.com/klauspost/compress/pull/281)
+	* zstd: Fix mixed Write and ReadFrom calls [#282](https://github.com/klauspost/compress/pull/282)
+	* inflate/gz: Limit variable shifts, ~5% faster decompression [#274](https://github.com/klauspost/compress/pull/274)
+
+<details>
+	<summary>See changes prior to v1.11.0</summary>
+ 
+* July 8, 2020 (v1.10.11) 
+	* zstd: Fix extra block when compressing with ReadFrom. [#278](https://github.com/klauspost/compress/pull/278)
+	* huff0: Also populate compression table when reading decoding table. [#275](https://github.com/klauspost/compress/pull/275)
+	
+* June 23, 2020 (v1.10.10) 
+	* zstd: Skip entropy compression in fastest mode when no matches. [#270](https://github.com/klauspost/compress/pull/270)
+	
+* June 16, 2020 (v1.10.9): 
+	* zstd: API change for specifying dictionaries. See [#268](https://github.com/klauspost/compress/pull/268)
+	* zip: update CreateHeaderRaw to handle zip64 fields. [#266](https://github.com/klauspost/compress/pull/266)
+	* Fuzzit tests removed. The service has been purchased and is no longer available.
+	
+* June 5, 2020 (v1.10.8): 
+	* 1.15x faster zstd block decompression. [#265](https://github.com/klauspost/compress/pull/265)
+	
+* June 1, 2020 (v1.10.7): 
+	* Added zstd decompression [dictionary support](https://github.com/klauspost/compress/tree/master/zstd#dictionaries)
+	* Increase zstd decompression speed up to 1.19x.  [#259](https://github.com/klauspost/compress/pull/259)
+	* Remove internal reset call in zstd compression and reduce allocations. [#263](https://github.com/klauspost/compress/pull/263)
+	
+* May 21, 2020: (v1.10.6) 
+	* zstd: Reduce allocations while decoding. [#258](https://github.com/klauspost/compress/pull/258), [#252](https://github.com/klauspost/compress/pull/252)
+	* zstd: Stricter decompression checks.
+	
+* April 12, 2020: (v1.10.5)
+	* s2-commands: Flush output when receiving SIGINT. [#239](https://github.com/klauspost/compress/pull/239)
+	
+* Apr 8, 2020: (v1.10.4) 
+	* zstd: Minor/special case optimizations. [#251](https://github.com/klauspost/compress/pull/251),  [#250](https://github.com/klauspost/compress/pull/250),  [#249](https://github.com/klauspost/compress/pull/249),  [#247](https://github.com/klauspost/compress/pull/247)
+* Mar 11, 2020: (v1.10.3) 
+	* s2: Use S2 encoder in pure Go mode for Snappy output as well. [#245](https://github.com/klauspost/compress/pull/245)
+	* s2: Fix pure Go block encoder. [#244](https://github.com/klauspost/compress/pull/244)
+	* zstd: Added "better compression" mode. [#240](https://github.com/klauspost/compress/pull/240)
+	* zstd: Improve speed of fastest compression mode by 5-10% [#241](https://github.com/klauspost/compress/pull/241)
+	* zstd: Skip creating encoders when not needed. [#238](https://github.com/klauspost/compress/pull/238)
+	
+* Feb 27, 2020: (v1.10.2) 
+	* Close to 50% speedup in inflate (gzip/zip decompression). [#236](https://github.com/klauspost/compress/pull/236) [#234](https://github.com/klauspost/compress/pull/234) [#232](https://github.com/klauspost/compress/pull/232)
+	* Reduce deflate level 1-6 memory usage up to 59%. [#227](https://github.com/klauspost/compress/pull/227)
+	
+* Feb 18, 2020: (v1.10.1)
+	* Fix zstd crash when resetting multiple times without sending data. [#226](https://github.com/klauspost/compress/pull/226)
+	* deflate: Fix dictionary use on level 1-6. [#224](https://github.com/klauspost/compress/pull/224)
+	* Remove deflate writer reference when closing. [#224](https://github.com/klauspost/compress/pull/224)
+	
+* Feb 4, 2020: (v1.10.0) 
+	* Add optional dictionary to [stateless deflate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc#StatelessDeflate). Breaking change, send `nil` for previous behaviour. [#216](https://github.com/klauspost/compress/pull/216)
+	* Fix buffer overflow on repeated small block deflate.  [#218](https://github.com/klauspost/compress/pull/218)
+	* Allow copying content from an existing ZIP file without decompressing+compressing. [#214](https://github.com/klauspost/compress/pull/214)
+	* Added [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) AMD64 assembler and various optimizations. Stream speed >10GB/s.  [#186](https://github.com/klauspost/compress/pull/186)
+
+</details>
+
+<details>
+	<summary>See changes prior to v1.10.0</summary>
+
+* Jan 20,2020 (v1.9.8) Optimize gzip/deflate with better size estimates and faster table generation. [#207](https://github.com/klauspost/compress/pull/207) by [luyu6056](https://github.com/luyu6056),  [#206](https://github.com/klauspost/compress/pull/206).
+* Jan 11, 2020: S2 Encode/Decode will use provided buffer if capacity is big enough. [#204](https://github.com/klauspost/compress/pull/204) 
+* Jan 5, 2020: (v1.9.7) Fix another zstd regression in v1.9.5 - v1.9.6 removed.
+* Jan 4, 2020: (v1.9.6) Regression in v1.9.5 fixed causing corrupt zstd encodes in rare cases.
+* Jan 4, 2020: Faster IO in [s2c + s2d commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) compression/decompression. [#192](https://github.com/klauspost/compress/pull/192)
+* Dec 29, 2019: Removed v1.9.5 since fuzz tests showed a compatibility problem with the reference zstandard decoder.
+* Dec 29, 2019: (v1.9.5) zstd: 10-20% faster block compression. [#199](https://github.com/klauspost/compress/pull/199)
+* Dec 29, 2019: [zip](https://godoc.org/github.com/klauspost/compress/zip) package updated with latest Go features
+* Dec 29, 2019: zstd: Single segment flag condintions tweaked. [#197](https://github.com/klauspost/compress/pull/197)
+* Dec 18, 2019: s2: Faster compression when ReadFrom is used. [#198](https://github.com/klauspost/compress/pull/198)
+* Dec 10, 2019: s2: Fix repeat length output when just above at 16MB limit.
+* Dec 10, 2019: zstd: Add function to get decoder as io.ReadCloser. [#191](https://github.com/klauspost/compress/pull/191)
+* Dec 3, 2019: (v1.9.4) S2: limit max repeat length. [#188](https://github.com/klauspost/compress/pull/188)
+* Dec 3, 2019: Add [WithNoEntropyCompression](https://godoc.org/github.com/klauspost/compress/zstd#WithNoEntropyCompression) to zstd [#187](https://github.com/klauspost/compress/pull/187)
+* Dec 3, 2019: Reduce memory use for tests. Check for leaked goroutines.
+* Nov 28, 2019 (v1.9.3) Less allocations in stateless deflate.
+* Nov 28, 2019: 5-20% Faster huff0 decode. Impacts zstd as well. [#184](https://github.com/klauspost/compress/pull/184)
+* Nov 12, 2019 (v1.9.2) Added [Stateless Compression](#stateless-compression) for gzip/deflate.
+* Nov 12, 2019: Fixed zstd decompression of large single blocks. [#180](https://github.com/klauspost/compress/pull/180)
+* Nov 11, 2019: Set default  [s2c](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) block size to 4MB.
+* Nov 11, 2019: Reduce inflate memory use by 1KB.
+* Nov 10, 2019: Less allocations in deflate bit writer.
+* Nov 10, 2019: Fix inconsistent error returned by zstd decoder.
+* Oct 28, 2019 (v1.9.1) ztsd: Fix crash when compressing blocks. [#174](https://github.com/klauspost/compress/pull/174)
+* Oct 24, 2019 (v1.9.0) zstd: Fix rare data corruption [#173](https://github.com/klauspost/compress/pull/173)
+* Oct 24, 2019 zstd: Fix huff0 out of buffer write [#171](https://github.com/klauspost/compress/pull/171) and always return errors [#172](https://github.com/klauspost/compress/pull/172) 
+* Oct 10, 2019: Big deflate rewrite, 30-40% faster with better compression [#105](https://github.com/klauspost/compress/pull/105)
+
+</details>
+
+<details>
+	<summary>See changes prior to v1.9.0</summary>
+
+* Oct 10, 2019: (v1.8.6) zstd: Allow partial reads to get flushed data. [#169](https://github.com/klauspost/compress/pull/169)
+* Oct 3, 2019: Fix inconsistent results on broken zstd streams.
+* Sep 25, 2019: Added `-rm` (remove source files) and `-q` (no output except errors) to `s2c` and `s2d` [commands](https://github.com/klauspost/compress/tree/master/s2#commandline-tools)
+* Sep 16, 2019: (v1.8.4) Add `s2c` and `s2d` [commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools).
+* Sep 10, 2019: (v1.8.3) Fix s2 decoder [Skip](https://godoc.org/github.com/klauspost/compress/s2#Reader.Skip).
+* Sep 7, 2019: zstd: Added [WithWindowSize](https://godoc.org/github.com/klauspost/compress/zstd#WithWindowSize), contributed by [ianwilkes](https://github.com/ianwilkes).
+* Sep 5, 2019: (v1.8.2) Add [WithZeroFrames](https://godoc.org/github.com/klauspost/compress/zstd#WithZeroFrames) which adds full zero payload block encoding option.
+* Sep 5, 2019: Lazy initialization of zstandard predefined en/decoder tables.
+* Aug 26, 2019: (v1.8.1) S2: 1-2% compression increase in "better" compression mode.
+* Aug 26, 2019: zstd: Check maximum size of Huffman 1X compressed literals while decoding.
+* Aug 24, 2019: (v1.8.0) Added [S2 compression](https://github.com/klauspost/compress/tree/master/s2#s2-compression), a high performance replacement for Snappy. 
+* Aug 21, 2019: (v1.7.6) Fixed minor issues found by fuzzer. One could lead to zstd not decompressing.
+* Aug 18, 2019: Add [fuzzit](https://fuzzit.dev/) continuous fuzzing.
+* Aug 14, 2019: zstd: Skip incompressible data 2x faster.  [#147](https://github.com/klauspost/compress/pull/147)
+* Aug 4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146)
+* Aug 4, 2019: Faster zstd compression. [#143](https://github.com/klauspost/compress/pull/143) [#144](https://github.com/klauspost/compress/pull/144)
+* Aug 4, 2019: Faster zstd decompression. [#145](https://github.com/klauspost/compress/pull/145) [#143](https://github.com/klauspost/compress/pull/143) [#142](https://github.com/klauspost/compress/pull/142)
+* July 15, 2019 (v1.7.4): Fix double EOF block in rare cases on zstd encoder.
+* July 15, 2019 (v1.7.3): Minor speedup/compression increase in default zstd encoder.
+* July 14, 2019: zstd decoder: Fix decompression error on multiple uses with mixed content.
+* July 7, 2019 (v1.7.2): Snappy update, zstd decoder potential race fix.
+* June 17, 2019: zstd decompression bugfix.
+* June 17, 2019: fix 32 bit builds.
+* June 17, 2019: Easier use in modules (less dependencies).
+* June 9, 2019: New stronger "default" [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression mode. Matches zstd default compression ratio.
+* June 5, 2019: 20-40% throughput in [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and better compression.
+* June 5, 2019: deflate/gzip compression: Reduce memory usage of lower compression levels.
+* June 2, 2019: Added [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression!
+* May 25, 2019: deflate/gzip: 10% faster bit writer, mostly visible in lower levels.
+* Apr 22, 2019: [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) decompression added.
+* Aug 1, 2018: Added [huff0 README](https://github.com/klauspost/compress/tree/master/huff0#huff0-entropy-compression).
+* Jul 8, 2018: Added [Performance Update 2018](#performance-update-2018) below.
+* Jun 23, 2018: Merged [Go 1.11 inflate optimizations](https://go-review.googlesource.com/c/go/+/102235). Go 1.9 is now required. Backwards compatible version tagged with [v1.3.0](https://github.com/klauspost/compress/releases/tag/v1.3.0).
+* Apr 2, 2018: Added [huff0](https://godoc.org/github.com/klauspost/compress/huff0) en/decoder. Experimental for now, API may change.
+* Mar 4, 2018: Added [FSE Entropy](https://godoc.org/github.com/klauspost/compress/fse) en/decoder. Experimental for now, API may change.
+* Nov 3, 2017: Add compression [Estimate](https://godoc.org/github.com/klauspost/compress#Estimate) function.
+* May 28, 2017: Reduce allocations when resetting decoder.
+* Apr 02, 2017: Change back to official crc32, since changes were merged in Go 1.7.
+* Jan 14, 2017: Reduce stack pressure due to array copies. See [Issue #18625](https://github.com/golang/go/issues/18625).
+* Oct 25, 2016: Level 2-4 have been rewritten and now offers significantly better performance than before.
+* Oct 20, 2016: Port zlib changes from Go 1.7 to fix zlib writer issue. Please update.
+* Oct 16, 2016: Go 1.7 changes merged. Apples to apples this package is a few percent faster, but has a significantly better balance between speed and compression per level. 
+* Mar 24, 2016: Always attempt Huffman encoding on level 4-7. This improves base 64 encoded data compression.
+* Mar 24, 2016: Small speedup for level 1-3.
+* Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster.
+* Feb 19, 2016: Handle small payloads faster in level 1-3.
+* Feb 19, 2016: Added faster level 2 + 3 compression modes.
+* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5.
+* Feb 14, 2016: Snappy: Merge upstream changes. 
+* Feb 14, 2016: Snappy: Fix aggressive skipping.
+* Feb 14, 2016: Snappy: Update benchmark.
+* Feb 13, 2016: Deflate: Fixed assembler problem that could lead to sub-optimal compression.
+* Feb 12, 2016: Snappy: Added AMD64 SSE 4.2 optimizations to matching, which makes easy to compress material run faster. Typical speedup is around 25%.
+* Feb 9, 2016: Added Snappy package fork. This version is 5-7% faster, much more on hard to compress content.
+* Jan 30, 2016: Optimize level 1 to 3 by not considering static dictionary or storing uncompressed. ~4-5% speedup.
+* Jan 16, 2016: Optimization on deflate level 1,2,3 compression.
+* Jan 8 2016: Merge [CL 18317](https://go-review.googlesource.com/#/c/18317): fix reading, writing of zip64 archives.
+* Dec 8 2015: Make level 1 and -2 deterministic even if write size differs.
+* Dec 8 2015: Split encoding functions, so hashing and matching can potentially be inlined. 1-3% faster on AMD64. 5% faster on other platforms.
+* Dec 8 2015: Fixed rare [one byte out-of bounds read](https://github.com/klauspost/compress/issues/20). Please update!
+* Nov 23 2015: Optimization on token writer. ~2-4% faster. Contributed by [@dsnet](https://github.com/dsnet).
+* Nov 20 2015: Small optimization to bit writer on 64 bit systems.
+* Nov 17 2015: Fixed out-of-bound errors if the underlying Writer returned an error. See [#15](https://github.com/klauspost/compress/issues/15).
+* Nov 12 2015: Added [io.WriterTo](https://golang.org/pkg/io/#WriterTo) support to gzip/inflate.
+* Nov 11 2015: Merged [CL 16669](https://go-review.googlesource.com/#/c/16669/4): archive/zip: enable overriding (de)compressors per file
+* Oct 15 2015: Added skipping on uncompressible data. Random data speed up >5x.
+
+</details>
+
+# deflate usage
+
+* [High Throughput Benchmark](http://blog.klauspost.com/go-gzipdeflate-benchmarks/).
+* [Small Payload/Webserver Benchmarks](http://blog.klauspost.com/gzip-performance-for-go-webservers/).
+* [Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/).
+* [Re-balancing Deflate Compression Levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/)
+
+The packages are drop-in replacements for standard libraries. Simply replace the import path to use them:
+
+| old import         | new import                              | Documentation
+|--------------------|-----------------------------------------|--------------------|
+| `compress/gzip`    | `github.com/klauspost/compress/gzip`    | [gzip](https://pkg.go.dev/github.com/klauspost/compress/gzip?tab=doc)
+| `compress/zlib`    | `github.com/klauspost/compress/zlib`    | [zlib](https://pkg.go.dev/github.com/klauspost/compress/zlib?tab=doc)
+| `archive/zip`      | `github.com/klauspost/compress/zip`     | [zip](https://pkg.go.dev/github.com/klauspost/compress/zip?tab=doc)
+| `compress/flate`   | `github.com/klauspost/compress/flate`   | [flate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc)
+
+* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib).
+
+You may also be interested in [pgzip](https://github.com/klauspost/pgzip), which is a drop in replacement for gzip, which support multithreaded compression on big files and the optimized [crc32](https://github.com/klauspost/crc32) package used by these packages.
+
+The packages contains the same as the standard library, so you can use the godoc for that: [gzip](http://golang.org/pkg/compress/gzip/), [zip](http://golang.org/pkg/archive/zip/),  [zlib](http://golang.org/pkg/compress/zlib/), [flate](http://golang.org/pkg/compress/flate/).
+
+Currently there is only minor speedup on decompression (mostly CRC32 calculation).
+
+Memory usage is typically 1MB for a Writer. stdlib is in the same range. 
+If you expect to have a lot of concurrently allocated Writers consider using 
+the stateless compress described below.
+
+# Stateless compression
+
+This package offers stateless compression as a special option for gzip/deflate. 
+It will do compression but without maintaining any state between Write calls.
+
+This means there will be no memory kept between Write calls, but compression and speed will be suboptimal.
+
+This is only relevant in cases where you expect to run many thousands of compressors concurrently, 
+but with very little activity. This is *not* intended for regular web servers serving individual requests.  
+
+Because of this, the size of actual Write calls will affect output size.
+
+In gzip, specify level `-3` / `gzip.StatelessCompression` to enable.
+
+For direct deflate use, NewStatelessWriter and StatelessDeflate are available. See [documentation](https://godoc.org/github.com/klauspost/compress/flate#NewStatelessWriter)
+
+A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer:
+
+```
+	// replace 'ioutil.Discard' with your output.
+	gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression)
+	if err != nil {
+		return err
+	}
+	defer gzw.Close()
+
+	w := bufio.NewWriterSize(gzw, 4096)
+	defer w.Flush()
+	
+	// Write to 'w' 
+```
+
+This will only use up to 4KB in memory when the writer is idle. 
+
+Compression is almost always worse than the fastest compression level 
+and each write will allocate (a little) memory. 
+
+# Performance Update 2018
+
+It has been a while since we have been looking at the speed of this package compared to the standard library, so I thought I would re-do my tests and give some overall recommendations based on the current state. All benchmarks have been performed with Go 1.10 on my Desktop Intel(R) Core(TM) i7-2600 CPU @3.40GHz. Since I last ran the tests, I have gotten more RAM, which means tests with big files are no longer limited by my SSD.
+
+The raw results are in my [updated spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). Due to cgo changes and upstream updates i could not get the cgo version of gzip to compile. Instead I included the [zstd](https://github.com/datadog/zstd) cgo implementation. If I get cgo gzip to work again, I might replace the results in the sheet.
+
+The columns to take note of are: *MB/s* - the throughput. *Reduction* - the data size reduction in percent of the original. *Rel Speed* relative speed compared to the standard library at the same level. *Smaller* - how many percent smaller is the compressed output compared to stdlib. Negative means the output was bigger. *Loss* means the loss (or gain) in compression as a percentage difference of the input.
+
+The `gzstd` (standard library gzip) and `gzkp` (this package gzip) only uses one CPU core. [`pgzip`](https://github.com/klauspost/pgzip), [`bgzf`](https://github.com/biogo/hts/tree/master/bgzf) uses all 4 cores. [`zstd`](https://github.com/DataDog/zstd) uses one core, and is a beast (but not Go, yet).
+
+
+## Overall differences.
+
+There appears to be a roughly 5-10% speed advantage over the standard library when comparing at similar compression levels.
+
+The biggest difference you will see is the result of [re-balancing](https://blog.klauspost.com/rebalancing-deflate-compression-levels/) the compression levels. I wanted by library to give a smoother transition between the compression levels than the standard library.
+
+This package attempts to provide a more smooth transition, where "1" is taking a lot of shortcuts, "5" is the reasonable trade-off and "9" is the "give me the best compression", and the values in between gives something reasonable in between. The standard library has big differences in levels 1-4, but levels 5-9 having no significant gains - often spending a lot more time than can be justified by the achieved compression.
+
+There are links to all the test data in the [spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing) in the top left field on each tab.
+
+## Web Content
+
+This test set aims to emulate typical use in a web server. The test-set is 4GB data in 53k files, and is a mixture of (mostly) HTML, JS, CSS.
+
+Since level 1 and 9 are close to being the same code, they are quite close. But looking at the levels in-between the differences are quite big.
+
+Looking at level 6, this package is 88% faster, but will output about 6% more data. For a web server, this means you can serve 88% more data, but have to pay for 6% more bandwidth. You can draw your own conclusions on what would be the most expensive for your case.
+
+## Object files
+
+This test is for typical data files stored on a server. In this case it is a collection of Go precompiled objects. They are very compressible.
+
+The picture is similar to the web content, but with small differences since this is very compressible. Levels 2-3 offer good speed, but is sacrificing quite a bit of compression. 
+
+The standard library seems suboptimal on level 3 and 4 - offering both worse compression and speed than level 6 & 7 of this package respectively.
+
+## Highly Compressible File
+
+This is a JSON file with very high redundancy. The reduction starts at 95% on level 1, so in real life terms we are dealing with something like a highly redundant stream of data, etc.
+
+It is definitely visible that we are dealing with specialized content here, so the results are very scattered. This package does not do very well at levels 1-4, but picks up significantly at level 5 and levels 7 and 8 offering great speed for the achieved compression.
+
+So if you know you content is extremely compressible you might want to go slightly higher than the defaults. The standard library has a huge gap between levels 3 and 4 in terms of speed (2.75x slowdown), so it offers little "middle ground".
+
+## Medium-High Compressible
+
+This is a pretty common test corpus: [enwik9](http://mattmahoney.net/dc/textdata.html). It contains the first 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. This is a very good test of typical text based compression and more data heavy streams.
+
+We see a similar picture here as in "Web Content". On equal levels some compression is sacrificed for more speed. Level 5 seems to be the best trade-off between speed and size, beating stdlib level 3 in both.
+
+## Medium Compressible
+
+I will combine two test sets, one [10GB file set](http://mattmahoney.net/dc/10gb.html) and a VM disk image (~8GB). Both contain different data types and represent a typical backup scenario.
+
+The most notable thing is how quickly the standard library drops to very low compression speeds around level 5-6 without any big gains in compression. Since this type of data is fairly common, this does not seem like good behavior.
+
+
+## Un-compressible Content
+
+This is mainly a test of how good the algorithms are at detecting un-compressible input. The standard library only offers this feature with very conservative settings at level 1. Obviously there is no reason for the algorithms to try to compress input that cannot be compressed.  The only downside is that it might skip some compressible data on false detections.
+
+
+## Huffman only compression
+
+This compression library adds a special compression level, named `HuffmanOnly`, which allows near linear time compression. This is done by completely disabling matching of previous data, and only reduce the number of bits to represent each character. 
+
+This means that often used characters, like 'e' and ' ' (space) in text use the fewest bits to represent, and rare characters like '¤' takes more bits to represent. For more information see [wikipedia](https://en.wikipedia.org/wiki/Huffman_coding) or this nice [video](https://youtu.be/ZdooBTdW5bM).
+
+Since this type of compression has much less variance, the compression speed is mostly unaffected by the input data, and is usually more than *180MB/s* for a single core.
+
+The downside is that the compression ratio is usually considerably worse than even the fastest conventional compression. The compression ratio can never be better than 8:1 (12.5%). 
+
+The linear time compression can be used as a "better than nothing" mode, where you cannot risk the encoder to slow down on some content. For comparison, the size of the "Twain" text is *233460 bytes* (+29% vs. level 1) and encode speed is 144MB/s (4.5x level 1). So in this case you trade a 30% size increase for a 4 times speedup.
+
+For more information see my blog post on [Fast Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/).
+
+This is implemented on Go 1.7 as "Huffman Only" mode, though not exposed for gzip.
+
+
+# license
+
+This code is licensed under the same conditions as the original Go code. See LICENSE file.
--- a/vendor/github.com/klauspost/compress/compressible.go
+++ b/vendor/github.com/klauspost/compress/compressible.go
@ -0,0 +1,85 @@
+package compress
+
+import "math"
+
+// Estimate returns a normalized compressibility estimate of block b.
+// Values close to zero are likely uncompressible.
+// Values above 0.1 are likely to be compressible.
+// Values above 0.5 are very compressible.
+// Very small lengths will return 0.
+func Estimate(b []byte) float64 {
+	if len(b) < 16 {
+		return 0
+	}
+
+	// Correctly predicted order 1
+	hits := 0
+	lastMatch := false
+	var o1 [256]byte
+	var hist [256]int
+	c1 := byte(0)
+	for _, c := range b {
+		if c == o1[c1] {
+			// We only count a hit if there was two correct predictions in a row.
+			if lastMatch {
+				hits++
+			}
+			lastMatch = true
+		} else {
+			lastMatch = false
+		}
+		o1[c1] = c
+		c1 = c
+		hist[c]++
+	}
+
+	// Use x^0.6 to give better spread
+	prediction := math.Pow(float64(hits)/float64(len(b)), 0.6)
+
+	// Calculate histogram distribution
+	variance := float64(0)
+	avg := float64(len(b)) / 256
+
+	for _, v := range hist {
+		Δ := float64(v) - avg
+		variance += Δ * Δ
+	}
+
+	stddev := math.Sqrt(float64(variance)) / float64(len(b))
+	exp := math.Sqrt(1 / float64(len(b)))
+
+	// Subtract expected stddev
+	stddev -= exp
+	if stddev < 0 {
+		stddev = 0
+	}
+	stddev *= 1 + exp
+
+	// Use x^0.4 to give better spread
+	entropy := math.Pow(stddev, 0.4)
+
+	// 50/50 weight between prediction and histogram distribution
+	return math.Pow((prediction+entropy)/2, 0.9)
+}
+
+// ShannonEntropyBits returns the number of bits minimum required to represent
+// an entropy encoding of the input bytes.
+// https://en.wiktionary.org/wiki/Shannon_entropy
+func ShannonEntropyBits(b []byte) int {
+	if len(b) == 0 {
+		return 0
+	}
+	var hist [256]int
+	for _, c := range b {
+		hist[c]++
+	}
+	shannon := float64(0)
+	invTotal := 1.0 / float64(len(b))
+	for _, v := range hist[:] {
+		if v > 0 {
+			n := float64(v)
+			shannon += math.Ceil(-math.Log2(n*invTotal) * n)
+		}
+	}
+	return int(math.Ceil(shannon))
+}
--- a/vendor/github.com/klauspost/compress/gen.sh
+++ b/vendor/github.com/klauspost/compress/gen.sh
@ -0,0 +1,4 @@
+#!/bin/sh
+
+cd s2/cmd/_s2sx/ || exit 1
+go generate .
--- a/vendor/github.com/klauspost/compress/go.mod
+++ b/vendor/github.com/klauspost/compress/go.mod
@ -0,0 +1,3 @@
+module github.com/klauspost/compress
+
+go 1.15
--- a/vendor/github.com/klauspost/compress/go.sum
+++ b/vendor/github.com/klauspost/compress/go.sum
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@ -161,6 +161,70 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
 	return s.Out, false, nil
 }

+// EstimateSizes will estimate the data sizes
+func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err error) {
+	s, err = s.prepare(in)
+	if err != nil {
+		return 0, 0, 0, err
+	}
+
+	// Create histogram, if none was provided.
+	tableSz, dataSz, reuseSz = -1, -1, -1
+	maxCount := s.maxCount
+	var canReuse = false
+	if maxCount == 0 {
+		maxCount, canReuse = s.countSimple(in)
+	} else {
+		canReuse = s.canUseTable(s.prevTable)
+	}
+
+	// We want the output size to be less than this:
+	wantSize := len(in)
+	if s.WantLogLess > 0 {
+		wantSize -= wantSize >> s.WantLogLess
+	}
+
+	// Reset for next run.
+	s.clearCount = true
+	s.maxCount = 0
+	if maxCount >= len(in) {
+		if maxCount > len(in) {
+			return 0, 0, 0, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in))
+		}
+		if len(in) == 1 {
+			return 0, 0, 0, ErrIncompressible
+		}
+		// One symbol, use RLE
+		return 0, 0, 0, ErrUseRLE
+	}
+	if maxCount == 1 || maxCount < (len(in)>>7) {
+		// Each symbol present maximum once or too well distributed.
+		return 0, 0, 0, ErrIncompressible
+	}
+
+	// Calculate new table.
+	err = s.buildCTable()
+	if err != nil {
+		return 0, 0, 0, err
+	}
+
+	if false && !s.canUseTable(s.cTable) {
+		panic("invalid table generated")
+	}
+
+	tableSz, err = s.cTable.estTableSize(s)
+	if err != nil {
+		return 0, 0, 0, err
+	}
+	if canReuse {
+		reuseSz = s.prevTable.estimateSize(s.count[:s.symbolLen])
+	}
+	dataSz = s.cTable.estimateSize(s.count[:s.symbolLen])
+
+	// Restore
+	return tableSz, dataSz, reuseSz, nil
+}
+
 func (s *Scratch) compress1X(src []byte) ([]byte, error) {
 	return s.compress1xDo(s.Out, src)
 }
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@ -245,6 +245,68 @@ func (c cTable) write(s *Scratch) error {
 	return nil
 }

+func (c cTable) estTableSize(s *Scratch) (sz int, err error) {
+	var (
+		// precomputed conversion table
+		bitsToWeight [tableLogMax + 1]byte
+		huffLog      = s.actualTableLog
+		// last weight is not saved.
+		maxSymbolValue = uint8(s.symbolLen - 1)
+		huffWeight     = s.huffWeight[:256]
+	)
+	const (
+		maxFSETableLog = 6
+	)
+	// convert to weight
+	bitsToWeight[0] = 0
+	for n := uint8(1); n < huffLog+1; n++ {
+		bitsToWeight[n] = huffLog + 1 - n
+	}
+
+	// Acquire histogram for FSE.
+	hist := s.fse.Histogram()
+	hist = hist[:256]
+	for i := range hist[:16] {
+		hist[i] = 0
+	}
+	for n := uint8(0); n < maxSymbolValue; n++ {
+		v := bitsToWeight[c[n].nBits] & 15
+		huffWeight[n] = v
+		hist[v]++
+	}
+
+	// FSE compress if feasible.
+	if maxSymbolValue >= 2 {
+		huffMaxCnt := uint32(0)
+		huffMax := uint8(0)
+		for i, v := range hist[:16] {
+			if v == 0 {
+				continue
+			}
+			huffMax = byte(i)
+			if v > huffMaxCnt {
+				huffMaxCnt = v
+			}
+		}
+		s.fse.HistogramFinished(huffMax, int(huffMaxCnt))
+		s.fse.TableLog = maxFSETableLog
+		b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse)
+		if err == nil && len(b) < int(s.symbolLen>>1) {
+			sz += 1 + len(b)
+			return sz, nil
+		}
+		// Unable to compress (RLE/uncompressible)
+	}
+	// write raw values as 4-bits (max : 15)
+	if maxSymbolValue > (256 - 128) {
+		// should not happen : likely means source cannot be compressed
+		return 0, ErrIncompressible
+	}
+	// special case, pack weights 4 bits/weight.
+	sz += 1 + int(maxSymbolValue/2)
+	return sz, nil
+}
+
 // estimateSize returns the estimated size in bytes of the input represented in the
 // histogram supplied.
 func (c cTable) estimateSize(hist []uint32) int {
--- a/vendor/github.com/klauspost/compress/internal/snapref/LICENSE
+++ b/vendor/github.com/klauspost/compress/internal/snapref/LICENSE
--- a/vendor/github.com/klauspost/compress/internal/snapref/decode.go
+++ b/vendor/github.com/klauspost/compress/internal/snapref/decode.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package snappy
+package snapref

 import (
 	"encoding/binary"
--- a/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go
+++ b/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go
@ -2,9 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build !amd64,!arm64 appengine !gc noasm
-
-package snappy
+package snapref

 // decode writes the decoding of src to dst. It assumes that the varint-encoded
 // length of the decompressed bytes has already been read, and that len(dst)
--- a/vendor/github.com/klauspost/compress/internal/snapref/encode.go
+++ b/vendor/github.com/klauspost/compress/internal/snapref/encode.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package snappy
+package snapref

 import (
 	"encoding/binary"
--- a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
+++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
@ -2,9 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build !amd64,!arm64 appengine !gc noasm
-
-package snappy
+package snapref

 func load32(b []byte, i int) uint32 {
 	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
--- a/vendor/github.com/klauspost/compress/internal/snapref/snappy.go
+++ b/vendor/github.com/klauspost/compress/internal/snapref/snappy.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// Package snappy implements the Snappy compression format. It aims for very
+// Package snapref implements the Snappy compression format. It aims for very
 // high speeds and reasonable compression.
 //
 // There are actually two Snappy formats: block and stream. They are related,
@ -17,7 +17,7 @@
 //
 // The canonical, C++ implementation is at https://github.com/google/snappy and
 // it only implements the block format.
-package snappy // import "github.com/golang/snappy"
+package snapref

 import (
 	"hash/crc32"
--- a/vendor/github.com/klauspost/compress/s2sx.mod
+++ b/vendor/github.com/klauspost/compress/s2sx.mod
@ -0,0 +1,4 @@
+module github.com/klauspost/compress
+
+go 1.16
+
--- a/vendor/github.com/klauspost/compress/s2sx.sum
+++ b/vendor/github.com/klauspost/compress/s2sx.sum
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@ -152,7 +152,7 @@ file    out     level   insize      outsize     millis  mb/s
 silesia.tar zskp    1   211947520   73101992    643     313.87
 silesia.tar zskp    2   211947520   67504318    969     208.38
 silesia.tar zskp    3   211947520   64595893    2007    100.68
-silesia.tar zskp    4   211947520   60995370    7691    26.28
+silesia.tar zskp    4   211947520   60995370    8825    22.90

 cgo zstd:
 silesia.tar zstd    1   211947520   73605392    543     371.56
@ -162,7 +162,7 @@ silesia.tar zstd    9   211947520   60212393    5063    39.92

 gzip, stdlib/this package:
 silesia.tar gzstd   1   211947520   80007735    1654    122.21
-silesia.tar gzkp    1   211947520   80369488    1168    173.06
+silesia.tar gzkp    1   211947520   80136201    1152    175.45

 GOB stream of binary data. Highly compressible.
 https://files.klauspost.com/compress/gob-stream.7z
@ -171,13 +171,15 @@ file        out     level   insize  outsize     millis  mb/s
 gob-stream  zskp    1   1911399616  235022249   3088    590.30
 gob-stream  zskp    2   1911399616  205669791   3786    481.34
 gob-stream  zskp    3   1911399616  175034659   9636    189.17
-gob-stream  zskp    4   1911399616  167273881   29337   62.13
+gob-stream  zskp    4   1911399616  165609838   50369   36.19
+
 gob-stream  zstd    1   1911399616  249810424   2637    691.26
 gob-stream  zstd    3   1911399616  208192146   3490    522.31
 gob-stream  zstd    6   1911399616  193632038   6687    272.56
 gob-stream  zstd    9   1911399616  177620386   16175   112.70
+
 gob-stream  gzstd   1   1911399616  357382641   10251   177.82
-gob-stream  gzkp    1   1911399616  362156523   5695    320.08
+gob-stream  gzkp    1   1911399616  359753026   5438    335.20

 The test data for the Large Text Compression Benchmark is the first
 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
@ -187,11 +189,13 @@ file    out level   insize      outsize     millis  mb/s
 enwik9  zskp    1   1000000000  343848582   3609    264.18
 enwik9  zskp    2   1000000000  317276632   5746    165.97
 enwik9  zskp    3   1000000000  292243069   12162   78.41
-enwik9  zskp    4   1000000000  275241169   36430   26.18
+enwik9  zskp    4   1000000000  262183768   82837   11.51
+
 enwik9  zstd    1   1000000000  358072021   3110    306.65
 enwik9  zstd    3   1000000000  313734672   4784    199.35
 enwik9  zstd    6   1000000000  295138875   10290   92.68
 enwik9  zstd    9   1000000000  278348700   28549   33.40
+
 enwik9  gzstd   1   1000000000  382578136   9604    99.30
 enwik9  gzkp    1   1000000000  383825945   6544    145.73

@ -202,13 +206,15 @@ file                        out level   insize      outsize     millis  mb/s
 github-june-2days-2019.json zskp    1   6273951764  699045015   10620   563.40
 github-june-2days-2019.json zskp    2   6273951764  617881763   11687   511.96
 github-june-2days-2019.json zskp    3   6273951764  524340691   34043   175.75
-github-june-2days-2019.json zskp    4   6273951764  503314661   93811   63.78
+github-june-2days-2019.json zskp    4   6273951764  470320075   170190  35.16
+
 github-june-2days-2019.json zstd    1   6273951764  766284037   8450    708.00
 github-june-2days-2019.json zstd    3   6273951764  661889476   10927   547.57
 github-june-2days-2019.json zstd    6   6273951764  642756859   22996   260.18
 github-june-2days-2019.json zstd    9   6273951764  601974523   52413   114.16
+
 github-june-2days-2019.json gzstd   1   6273951764  1164400847  29948   199.79
-github-june-2days-2019.json gzkp    1   6273951764  1128755542  19236   311.03
+github-june-2days-2019.json gzkp    1   6273951764  1125417694  21788   274.61

 VM Image, Linux mint with a few installed applications:
 https://files.klauspost.com/compress/rawstudio-mint14.7z
@ -217,13 +223,15 @@ file                    out level   insize      outsize     millis  mb/s
 rawstudio-mint14.tar    zskp    1   8558382592  3667489370  20210   403.84
 rawstudio-mint14.tar    zskp    2   8558382592  3364592300  31873   256.07
 rawstudio-mint14.tar    zskp    3   8558382592  3158085214  77675   105.08
-rawstudio-mint14.tar    zskp    4   8558382592  3020370044  404956  20.16
+rawstudio-mint14.tar    zskp    4   8558382592  2965110639  857750  9.52
+
 rawstudio-mint14.tar    zstd    1   8558382592  3609250104  17136   476.27
 rawstudio-mint14.tar    zstd    3   8558382592  3341679997  29262   278.92
 rawstudio-mint14.tar    zstd    6   8558382592  3235846406  77904   104.77
 rawstudio-mint14.tar    zstd    9   8558382592  3160778861  140946  57.91
+
 rawstudio-mint14.tar    gzstd   1   8558382592  3926257486  57722   141.40
-rawstudio-mint14.tar    gzkp    1   8558382592  3970463184  41749   195.49
+rawstudio-mint14.tar    gzkp    1   8558382592  3962605659  45113   180.92

 CSV data:
 https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
@ -232,13 +240,15 @@ file                    out level   insize      outsize     millis  mb/s
 nyc-taxi-data-10M.csv   zskp    1   3325605752  641339945   8925    355.35
 nyc-taxi-data-10M.csv   zskp    2   3325605752  591748091   11268   281.44
 nyc-taxi-data-10M.csv   zskp    3   3325605752  530289687   25239   125.66
-nyc-taxi-data-10M.csv   zskp    4   3325605752  490907191   65939   48.10
+nyc-taxi-data-10M.csv   zskp    4   3325605752  476268884   135958  23.33
+
 nyc-taxi-data-10M.csv   zstd    1   3325605752  687399637   8233    385.18
 nyc-taxi-data-10M.csv   zstd    3   3325605752  598514411   10065   315.07
 nyc-taxi-data-10M.csv   zstd    6   3325605752  570522953   20038   158.27
 nyc-taxi-data-10M.csv   zstd    9   3325605752  517554797   64565   49.12
+
 nyc-taxi-data-10M.csv   gzstd   1   3325605752  928656485   23876   132.83
-nyc-taxi-data-10M.csv   gzkp    1   3325605752  924718719   16388   193.53
+nyc-taxi-data-10M.csv   gzkp    1   3325605752  922257165   16780   189.00
 ```

 ## Decompressor
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@ -260,9 +260,10 @@ func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
 		if len(d.current.b) > 0 {
 			n2, err2 := w.Write(d.current.b)
 			n += int64(n2)
-			if err2 != nil && d.current.err == nil {
+			if err2 != nil && (d.current.err == nil || d.current.err == io.EOF) {
 				d.current.err = err2
-				break
+			} else if n2 != len(d.current.b) {
+				d.current.err = io.ErrShortWrite
 			}
 		}
 		if d.current.err != nil {
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@ -38,8 +38,8 @@ func (e *fastBase) AppendCRC(dst []byte) []byte {

 // WindowSize returns the window size of the encoder,
 // or a window size small enough to contain the input size, if > 0.
-func (e *fastBase) WindowSize(size int) int32 {
-	if size > 0 && size < int(e.maxMatchOff) {
+func (e *fastBase) WindowSize(size int64) int32 {
+	if size > 0 && size < int64(e.maxMatchOff) {
 		b := int32(1) << uint(bits.Len(uint(size)))
 		// Keep minimum window.
 		if b < 1024 {
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@ -5,22 +5,61 @@
 package zstd

 import (
+	"bytes"
 	"fmt"
-	"math/bits"
+
+	"github.com/klauspost/compress"
 )

 const (
-	bestLongTableBits = 20                     // Bits used in the long match table
+	bestLongTableBits = 22                     // Bits used in the long match table
 	bestLongTableSize = 1 << bestLongTableBits // Size of the table
+	bestLongLen       = 8                      // Bytes used for table hash

 	// Note: Increasing the short table bits or making the hash shorter
 	// can actually lead to compression degradation since it will 'steal' more from the
 	// long match table and match offsets are quite big.
 	// This greatly depends on the type of input.
-	bestShortTableBits = 16                      // Bits used in the short match table
+	bestShortTableBits = 18                      // Bits used in the short match table
 	bestShortTableSize = 1 << bestShortTableBits // Size of the table
+	bestShortLen       = 4                       // Bytes used for table hash
+
 )

+type match struct {
+	offset int32
+	s      int32
+	length int32
+	rep    int32
+	est    int32
+}
+
+const highScore = 25000
+
+// estBits will estimate output bits from predefined tables.
+func (m *match) estBits(bitsPerByte int32) {
+	mlc := mlCode(uint32(m.length - zstdMinMatch))
+	var ofc uint8
+	if m.rep < 0 {
+		ofc = ofCode(uint32(m.s-m.offset) + 3)
+	} else {
+		ofc = ofCode(uint32(m.rep))
+	}
+	// Cost, excluding
+	ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
+
+	// Add cost of match encoding...
+	m.est = int32(ofTT.outBits + mlTT.outBits)
+	m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
+	// Subtract savings compared to literal encoding...
+	m.est -= (m.length * bitsPerByte) >> 10
+	if m.est > 0 {
+		// Unlikely gain..
+		m.length = 0
+		m.est = highScore
+	}
+}
+
 // bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
 // The long match table contains the previous entry with the same hash,
 // effectively making it a "chain" of length 2.
@ -109,6 +148,14 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 		return
 	}

+	// Use this to estimate literal cost.
+	// Scaled by 10 bits.
+	bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src))
+	// Huffman can never go < 1 bit/byte
+	if bitsPerByte < 1024 {
+		bitsPerByte = 1024
+	}
+
 	// Override src
 	src = e.hist
 	sLimit := int32(len(src)) - inputMargin
@ -145,51 +192,49 @@ encodeLoop:
 			panic("offset0 was 0")
 		}

-		type match struct {
-			offset int32
-			s      int32
-			length int32
-			rep    int32
-		}
-		matchAt := func(offset int32, s int32, first uint32, rep int32) match {
-			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
-				return match{offset: offset, s: s}
-			}
-			return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
-		}
-
 		bestOf := func(a, b match) match {
-			aScore := b.s - a.s + a.length
-			bScore := a.s - b.s + b.length
-			if a.rep < 0 {
-				aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
-			}
-			if b.rep < 0 {
-				bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
-			}
-			if aScore >= bScore {
+			if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
 				return a
 			}
 			return b
 		}
 		const goodEnough = 100

-		nextHashL := hash8(cv, bestLongTableBits)
-		nextHashS := hash4x64(cv, bestShortTableBits)
+		nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
+		nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
 		candidateL := e.longTable[nextHashL]
 		candidateS := e.table[nextHashS]

+		matchAt := func(offset int32, s int32, first uint32, rep int32) match {
+			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
+				return match{s: s, est: highScore}
+			}
+			if debugAsserts {
+				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
+					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
+				}
+			}
+			m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
+			m.estBits(bitsPerByte)
+			return m
+		}
+
 		best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
 		best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
 		best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
+
 		if canRepeat && best.length < goodEnough {
-			best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
-			best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
-			best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
+			cv32 := uint32(cv >> 8)
+			spp := s + 1
+			best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
+			best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
+			best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
 			if best.length > 0 {
-				best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
-				best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
-				best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
+				cv32 = uint32(cv >> 24)
+				spp += 2
+				best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
+				best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
+				best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
 			}
 		}
 		// Load next and check...
@ -209,22 +254,28 @@ encodeLoop:
 			}

 			s++
-			candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
+			candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
 			cv = load6432(src, s)
 			cv2 := load6432(src, s+1)
-			candidateL = e.longTable[hash8(cv, bestLongTableBits)]
-			candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
+			candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
+			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]

+			// Short at s+1
 			best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+			// Long at s+1, s+2
 			best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
 			best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
 			best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
 			best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
-
+			if false {
+				// Short at s+3.
+				// Too often worse...
+				best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
+			}
 			// See if we can find a better match by checking where the current best ends.
 			// Use that offset to see if we can find a better full match.
 			if sAt := best.s + best.length; sAt < sLimit {
-				nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
+				nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
 				candidateEnd := e.longTable[nextHashL]
 				if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
 					bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
@ -236,6 +287,12 @@ encodeLoop:
 			}
 		}

+		if debugAsserts {
+			if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
+				panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
+			}
+		}
+
 		// We have a match, we can store the forward value
 		if best.rep > 0 {
 			s = best.s
@ -284,8 +341,8 @@ encodeLoop:
 			off := index0 + e.cur
 			for index0 < s-1 {
 				cv0 := load6432(src, index0)
-				h0 := hash8(cv0, bestLongTableBits)
-				h1 := hash4x64(cv0, bestShortTableBits)
+				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
+				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
 				e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 				e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
 				off++
@ -311,7 +368,7 @@ encodeLoop:
 			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}

-		if debugAsserts && canRepeat && int(offset1) > len(src) {
+		if debugAsserts && int(offset1) > len(src) {
 			panic("invalid offset")
 		}

@ -352,8 +409,8 @@ encodeLoop:
 		// every entry
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
-			h0 := hash8(cv0, bestLongTableBits)
-			h1 := hash4x64(cv0, bestShortTableBits)
+			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
+			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
 			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
@ -374,8 +431,8 @@ encodeLoop:
 			}

 			// Store this, since we have it.
-			nextHashS := hash4x64(cv, bestShortTableBits)
-			nextHashL := hash8(cv, bestLongTableBits)
+			nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
+			nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)

 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
@ -425,7 +482,7 @@ func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	e.Encode(blk, src)
 }

-// ResetDict will reset and set a dictionary if not nil
+// Reset will reset and set a dictionary if not nil
 func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
 	e.resetBase(d, singleBlock)
 	if d == nil {
@ -441,10 +498,10 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
 			const hashLog = bestShortTableBits

 			cv := load6432(d.content, i-e.maxMatchOff)
-			nextHash := hash4x64(cv, hashLog)      // 0 -> 4
-			nextHash1 := hash4x64(cv>>8, hashLog)  // 1 -> 5
-			nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
-			nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
+			nextHash := hashLen(cv, hashLog, bestShortLen)      // 0 -> 4
+			nextHash1 := hashLen(cv>>8, hashLog, bestShortLen)  // 1 -> 5
+			nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6
+			nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7
 			e.dictTable[nextHash] = prevEntry{
 				prev:   e.dictTable[nextHash].offset,
 				offset: i,
@ -472,7 +529,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
 		}
 		if len(d.content) >= 8 {
 			cv := load6432(d.content, 0)
-			h := hash8(cv, bestLongTableBits)
+			h := hashLen(cv, bestLongTableBits, bestLongLen)
 			e.dictLongTable[h] = prevEntry{
 				offset: e.maxMatchOff,
 				prev:   e.dictLongTable[h].offset,
@ -482,7 +539,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
 			off := 8 // First to read
 			for i := e.maxMatchOff + 1; i < end; i++ {
 				cv = cv>>8 | (uint64(d.content[off]) << 56)
-				h := hash8(cv, bestLongTableBits)
+				h := hashLen(cv, bestLongTableBits, bestLongLen)
 				e.dictLongTable[h] = prevEntry{
 					offset: i,
 					prev:   e.dictLongTable[h].offset,
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@ -9,6 +9,7 @@ import "fmt"
 const (
 	betterLongTableBits = 19                       // Bits used in the long match table
 	betterLongTableSize = 1 << betterLongTableBits // Size of the table
+	betterLongLen       = 8                        // Bytes used for table hash

 	// Note: Increasing the short table bits or making the hash shorter
 	// can actually lead to compression degradation since it will 'steal' more from the
@ -16,6 +17,7 @@ const (
 	// This greatly depends on the type of input.
 	betterShortTableBits = 13                        // Bits used in the short match table
 	betterShortTableSize = 1 << betterShortTableBits // Size of the table
+	betterShortLen       = 5                         // Bytes used for table hash

 	betterLongTableShardCnt  = 1 << (betterLongTableBits - dictShardBits)    // Number of shards in the table
 	betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
@ -154,8 +156,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}

-			nextHashS := hash5(cv, betterShortTableBits)
-			nextHashL := hash8(cv, betterLongTableBits)
+			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
+			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]

@ -214,10 +216,10 @@ encodeLoop:
 					for index0 < s-1 {
 						cv0 := load6432(src, index0)
 						cv1 := cv0 >> 8
-						h0 := hash8(cv0, betterLongTableBits)
+						h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
 						off := index0 + e.cur
 						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
-						e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
+						e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
 						index0 += 2
 					}
 					cv = load6432(src, s)
@ -275,10 +277,10 @@ encodeLoop:
 					for index0 < s-1 {
 						cv0 := load6432(src, index0)
 						cv1 := cv0 >> 8
-						h0 := hash8(cv0, betterLongTableBits)
+						h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
 						off := index0 + e.cur
 						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
-						e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
+						e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
 						index0 += 2
 					}
 					cv = load6432(src, s)
@ -353,7 +355,7 @@ encodeLoop:
 				// See if we can find a long match at s+1
 				const checkAt = 1
 				cv := load6432(src, s+checkAt)
-				nextHashL = hash8(cv, betterLongTableBits)
+				nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
 				candidateL = e.longTable[nextHashL]
 				coffsetL = candidateL.offset - e.cur

@ -413,8 +415,8 @@ encodeLoop:
 		}

 		// Try to find a better match by searching for a long match at the end of the current best match
-		if true && s+matched < sLimit {
-			nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
+		if s+matched < sLimit {
+			nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
 			cv := load3232(src, s)
 			candidateL := e.longTable[nextHashL]
 			coffsetL := candidateL.offset - e.cur - matched
@ -495,10 +497,10 @@ encodeLoop:
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
-			h0 := hash8(cv0, betterLongTableBits)
+			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
 			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
-			e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
+			e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			index0 += 2
 		}

@ -516,8 +518,8 @@ encodeLoop:
 			}

 			// Store this, since we have it.
-			nextHashS := hash5(cv, betterShortTableBits)
-			nextHashL := hash8(cv, betterLongTableBits)
+			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
+			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)

 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
@ -672,8 +674,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}

-			nextHashS := hash5(cv, betterShortTableBits)
-			nextHashL := hash8(cv, betterLongTableBits)
+			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
+			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]

@ -734,11 +736,11 @@ encodeLoop:
 					for index0 < s-1 {
 						cv0 := load6432(src, index0)
 						cv1 := cv0 >> 8
-						h0 := hash8(cv0, betterLongTableBits)
+						h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
 						off := index0 + e.cur
 						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 						e.markLongShardDirty(h0)
-						h1 := hash5(cv1, betterShortTableBits)
+						h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
 						e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
 						e.markShortShardDirty(h1)
 						index0 += 2
@ -798,11 +800,11 @@ encodeLoop:
 					for index0 < s-1 {
 						cv0 := load6432(src, index0)
 						cv1 := cv0 >> 8
-						h0 := hash8(cv0, betterLongTableBits)
+						h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
 						off := index0 + e.cur
 						e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 						e.markLongShardDirty(h0)
-						h1 := hash5(cv1, betterShortTableBits)
+						h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
 						e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
 						e.markShortShardDirty(h1)
 						index0 += 2
@ -879,7 +881,7 @@ encodeLoop:
 				// See if we can find a long match at s+1
 				const checkAt = 1
 				cv := load6432(src, s+checkAt)
-				nextHashL = hash8(cv, betterLongTableBits)
+				nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
 				candidateL = e.longTable[nextHashL]
 				coffsetL = candidateL.offset - e.cur

@ -940,7 +942,7 @@ encodeLoop:
 		}
 		// Try to find a better match by searching for a long match at the end of the current best match
 		if s+matched < sLimit {
-			nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
+			nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
 			cv := load3232(src, s)
 			candidateL := e.longTable[nextHashL]
 			coffsetL := candidateL.offset - e.cur - matched
@ -1021,11 +1023,11 @@ encodeLoop:
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
-			h0 := hash8(cv0, betterLongTableBits)
+			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
 			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.markLongShardDirty(h0)
-			h1 := hash5(cv1, betterShortTableBits)
+			h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
 			e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			e.markShortShardDirty(h1)
 			index0 += 2
@ -1045,8 +1047,8 @@ encodeLoop:
 			}

 			// Store this, since we have it.
-			nextHashS := hash5(cv, betterShortTableBits)
-			nextHashL := hash8(cv, betterLongTableBits)
+			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
+			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)

 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
@ -1113,10 +1115,10 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
 			const hashLog = betterShortTableBits

 			cv := load6432(d.content, i-e.maxMatchOff)
-			nextHash := hash5(cv, hashLog)      // 0 -> 4
-			nextHash1 := hash5(cv>>8, hashLog)  // 1 -> 5
-			nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
-			nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
+			nextHash := hashLen(cv, hashLog, betterShortLen)      // 0 -> 4
+			nextHash1 := hashLen(cv>>8, hashLog, betterShortLen)  // 1 -> 5
+			nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6
+			nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7
 			e.dictTable[nextHash] = tableEntry{
 				val:    uint32(cv),
 				offset: i,
@ -1145,7 +1147,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
 		}
 		if len(d.content) >= 8 {
 			cv := load6432(d.content, 0)
-			h := hash8(cv, betterLongTableBits)
+			h := hashLen(cv, betterLongTableBits, betterLongLen)
 			e.dictLongTable[h] = prevEntry{
 				offset: e.maxMatchOff,
 				prev:   e.dictLongTable[h].offset,
@ -1155,7 +1157,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
 			off := 8 // First to read
 			for i := e.maxMatchOff + 1; i < end; i++ {
 				cv = cv>>8 | (uint64(d.content[off]) << 56)
-				h := hash8(cv, betterLongTableBits)
+				h := hashLen(cv, betterLongTableBits, betterLongLen)
 				e.dictLongTable[h] = prevEntry{
 					offset: i,
 					prev:   e.dictLongTable[h].offset,
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@ -10,6 +10,7 @@ const (
 	dFastLongTableBits = 17                      // Bits used in the long match table
 	dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
 	dFastLongTableMask = dFastLongTableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
+	dFastLongLen       = 8                       // Bytes used for table hash

 	dLongTableShardCnt  = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
 	dLongTableShardSize = dFastLongTableSize / tableShardCnt        // Size of an individual shard
@ -17,6 +18,8 @@ const (
 	dFastShortTableBits = tableBits                // Bits used in the short match table
 	dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
 	dFastShortTableMask = dFastShortTableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
+	dFastShortLen       = 5                        // Bytes used for table hash
+
 )

 type doubleFastEncoder struct {
@ -124,8 +127,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}

-			nextHashS := hash5(cv, dFastShortTableBits)
-			nextHashL := hash8(cv, dFastLongTableBits)
+			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
+			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]

@ -208,7 +211,7 @@ encodeLoop:
 				// See if we can find a long match at s+1
 				const checkAt = 1
 				cv := load6432(src, s+checkAt)
-				nextHashL = hash8(cv, dFastLongTableBits)
+				nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
 				candidateL = e.longTable[nextHashL]
 				coffsetL = s - (candidateL.offset - e.cur) + checkAt

@ -304,16 +307,16 @@ encodeLoop:
 		cv1 := load6432(src, index1)
 		te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
 		te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
-		e.longTable[hash8(cv0, dFastLongTableBits)] = te0
-		e.longTable[hash8(cv1, dFastLongTableBits)] = te1
+		e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
+		e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
 		cv0 >>= 8
 		cv1 >>= 8
 		te0.offset++
 		te1.offset++
 		te0.val = uint32(cv0)
 		te1.val = uint32(cv1)
-		e.table[hash5(cv0, dFastShortTableBits)] = te0
-		e.table[hash5(cv1, dFastShortTableBits)] = te1
+		e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
+		e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1

 		cv = load6432(src, s)

@ -330,8 +333,8 @@ encodeLoop:
 			}

 			// Store this, since we have it.
-			nextHashS := hash5(cv, dFastShortTableBits)
-			nextHashL := hash8(cv, dFastLongTableBits)
+			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
+			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)

 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
@ -436,8 +439,8 @@ encodeLoop:
 		var t int32
 		for {

-			nextHashS := hash5(cv, dFastShortTableBits)
-			nextHashL := hash8(cv, dFastLongTableBits)
+			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
+			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]

@ -521,7 +524,7 @@ encodeLoop:
 				// See if we can find a long match at s+1
 				const checkAt = 1
 				cv := load6432(src, s+checkAt)
-				nextHashL = hash8(cv, dFastLongTableBits)
+				nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
 				candidateL = e.longTable[nextHashL]
 				coffsetL = s - (candidateL.offset - e.cur) + checkAt

@ -614,16 +617,16 @@ encodeLoop:
 		cv1 := load6432(src, index1)
 		te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
 		te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
-		e.longTable[hash8(cv0, dFastLongTableBits)] = te0
-		e.longTable[hash8(cv1, dFastLongTableBits)] = te1
+		e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
+		e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
 		cv0 >>= 8
 		cv1 >>= 8
 		te0.offset++
 		te1.offset++
 		te0.val = uint32(cv0)
 		te1.val = uint32(cv1)
-		e.table[hash5(cv0, dFastShortTableBits)] = te0
-		e.table[hash5(cv1, dFastShortTableBits)] = te1
+		e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
+		e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1

 		cv = load6432(src, s)

@ -640,8 +643,8 @@ encodeLoop:
 			}

 			// Store this, since we have it.
-			nextHashS := hash5(cv1>>8, dFastShortTableBits)
-			nextHashL := hash8(cv, dFastLongTableBits)
+			nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen)
+			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)

 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
@ -782,8 +785,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}

-			nextHashS := hash5(cv, dFastShortTableBits)
-			nextHashL := hash8(cv, dFastLongTableBits)
+			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
+			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]

@ -868,7 +871,7 @@ encodeLoop:
 				// See if we can find a long match at s+1
 				const checkAt = 1
 				cv := load6432(src, s+checkAt)
-				nextHashL = hash8(cv, dFastLongTableBits)
+				nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
 				candidateL = e.longTable[nextHashL]
 				coffsetL = s - (candidateL.offset - e.cur) + checkAt

@ -965,8 +968,8 @@ encodeLoop:
 		cv1 := load6432(src, index1)
 		te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
 		te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
-		longHash1 := hash8(cv0, dFastLongTableBits)
-		longHash2 := hash8(cv0, dFastLongTableBits)
+		longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
+		longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
 		e.longTable[longHash1] = te0
 		e.longTable[longHash2] = te1
 		e.markLongShardDirty(longHash1)
@ -977,8 +980,8 @@ encodeLoop:
 		te1.offset++
 		te0.val = uint32(cv0)
 		te1.val = uint32(cv1)
-		hashVal1 := hash5(cv0, dFastShortTableBits)
-		hashVal2 := hash5(cv1, dFastShortTableBits)
+		hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen)
+		hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen)
 		e.table[hashVal1] = te0
 		e.markShardDirty(hashVal1)
 		e.table[hashVal2] = te1
@ -999,8 +1002,8 @@ encodeLoop:
 			}

 			// Store this, since we have it.
-			nextHashS := hash5(cv, dFastShortTableBits)
-			nextHashL := hash8(cv, dFastLongTableBits)
+			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
+			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)

 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
@ -1071,14 +1074,14 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
 		}
 		if len(d.content) >= 8 {
 			cv := load6432(d.content, 0)
-			e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
+			e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
 				val:    uint32(cv),
 				offset: e.maxMatchOff,
 			}
 			end := int32(len(d.content)) - 8 + e.maxMatchOff
 			for i := e.maxMatchOff + 1; i < end; i++ {
 				cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
-				e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
+				e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
 					val:    uint32(cv),
 					offset: i,
 				}
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@ -11,12 +11,13 @@ import (
 )

 const (
-	tableBits      = 15                               // Bits used in the table
-	tableSize      = 1 << tableBits                   // Size of the table
-	tableShardCnt  = 1 << (tableBits - dictShardBits) // Number of shards in the table
-	tableShardSize = tableSize / tableShardCnt        // Size of an individual shard
-	tableMask      = tableSize - 1                    // Mask for table indices. Redundant, but can eliminate bounds checks.
-	maxMatchLength = 131074
+	tableBits        = 15                               // Bits used in the table
+	tableSize        = 1 << tableBits                   // Size of the table
+	tableShardCnt    = 1 << (tableBits - dictShardBits) // Number of shards in the table
+	tableShardSize   = tableSize / tableShardCnt        // Size of an individual shard
+	tableFastHashLen = 6
+	tableMask        = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
+	maxMatchLength   = 131074
 )

 type tableEntry struct {
@ -122,8 +123,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}

-			nextHash := hash6(cv, hashLog)
-			nextHash2 := hash6(cv>>8, hashLog)
+			nextHash := hashLen(cv, hashLog, tableFastHashLen)
+			nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
 			candidate := e.table[nextHash]
 			candidate2 := e.table[nextHash2]
 			repIndex := s - offset1 + 2
@ -301,7 +302,7 @@ encodeLoop:
 			}

 			// Store this, since we have it.
-			nextHash := hash6(cv, hashLog)
+			nextHash := hashLen(cv, hashLog, tableFastHashLen)
 			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
 			seq.matchLen = uint32(l) - zstdMinMatch
 			seq.litLen = 0
@ -405,8 +406,8 @@ encodeLoop:
 		// By not using them for the first 3 matches

 		for {
-			nextHash := hash6(cv, hashLog)
-			nextHash2 := hash6(cv>>8, hashLog)
+			nextHash := hashLen(cv, hashLog, tableFastHashLen)
+			nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
 			candidate := e.table[nextHash]
 			candidate2 := e.table[nextHash2]
 			repIndex := s - offset1 + 2
@ -589,7 +590,7 @@ encodeLoop:
 			}

 			// Store this, since we have it.
-			nextHash := hash6(cv, hashLog)
+			nextHash := hashLen(cv, hashLog, tableFastHashLen)
 			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
 			seq.matchLen = uint32(l) - zstdMinMatch
 			seq.litLen = 0
@ -715,8 +716,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}

-			nextHash := hash6(cv, hashLog)
-			nextHash2 := hash6(cv>>8, hashLog)
+			nextHash := hashLen(cv, hashLog, tableFastHashLen)
+			nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
 			candidate := e.table[nextHash]
 			candidate2 := e.table[nextHash2]
 			repIndex := s - offset1 + 2
@ -896,7 +897,7 @@ encodeLoop:
 			}

 			// Store this, since we have it.
-			nextHash := hash6(cv, hashLog)
+			nextHash := hashLen(cv, hashLog, tableFastHashLen)
 			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
 			e.markShardDirty(nextHash)
 			seq.matchLen = uint32(l) - zstdMinMatch
@ -957,9 +958,9 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 				const hashLog = tableBits

 				cv := load6432(d.content, i-e.maxMatchOff)
-				nextHash := hash6(cv, hashLog)      // 0 -> 5
-				nextHash1 := hash6(cv>>8, hashLog)  // 1 -> 6
-				nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7
+				nextHash := hashLen(cv, hashLog, tableFastHashLen)      // 0 -> 5
+				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen)  // 1 -> 6
+				nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
 				e.dictTable[nextHash] = tableEntry{
 					val:    uint32(cv),
 					offset: i,
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@ -33,7 +33,7 @@ type encoder interface {
 	Block() *blockEnc
 	CRC() *xxhash.Digest
 	AppendCRC([]byte) []byte
-	WindowSize(size int) int32
+	WindowSize(size int64) int32
 	UseBlock(*blockEnc)
 	Reset(d *dict, singleBlock bool)
 }
@ -48,6 +48,8 @@ type encoderState struct {
 	err              error
 	writeErr         error
 	nWritten         int64
+	nInput           int64
+	frameContentSize int64
 	headerWritten    bool
 	eofWritten       bool
 	fullFrameWritten bool
@ -120,7 +122,21 @@ func (e *Encoder) Reset(w io.Writer) {
 	s.w = w
 	s.err = nil
 	s.nWritten = 0
+	s.nInput = 0
 	s.writeErr = nil
+	s.frameContentSize = 0
+}
+
+// ResetContentSize will reset and set a content size for the next stream.
+// If the bytes written does not match the size given an error will be returned
+// when calling Close().
+// This is removed when Reset is called.
+// Sizes <= 0 results in no content size set.
+func (e *Encoder) ResetContentSize(w io.Writer, size int64) {
+	e.Reset(w)
+	if size >= 0 {
+		e.state.frameContentSize = size
+	}
 }

 // Write data to the encoder.
@ -190,6 +206,7 @@ func (e *Encoder) nextBlock(final bool) error {
 				return s.err
 			}
 			s.nWritten += int64(n2)
+			s.nInput += int64(len(s.filling))
 			s.current = s.current[:0]
 			s.filling = s.filling[:0]
 			s.headerWritten = true
@ -200,8 +217,8 @@ func (e *Encoder) nextBlock(final bool) error {

 		var tmp [maxHeaderSize]byte
 		fh := frameHeader{
-			ContentSize:   0,
-			WindowSize:    uint32(s.encoder.WindowSize(0)),
+			ContentSize:   uint64(s.frameContentSize),
+			WindowSize:    uint32(s.encoder.WindowSize(s.frameContentSize)),
 			SingleSegment: false,
 			Checksum:      e.o.crc,
 			DictID:        e.o.dict.ID(),
@ -243,6 +260,7 @@ func (e *Encoder) nextBlock(final bool) error {

 	// Move blocks forward.
 	s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
+	s.nInput += int64(len(s.current))
 	s.wg.Add(1)
 	go func(src []byte) {
 		if debugEncoder {
@ -394,6 +412,11 @@ func (e *Encoder) Close() error {
 	if err != nil {
 		return err
 	}
+	if s.frameContentSize > 0 {
+		if s.nInput != s.frameContentSize {
+			return fmt.Errorf("frame content size %d given, but %d bytes was written", s.frameContentSize, s.nInput)
+		}
+	}
 	if e.state.fullFrameWritten {
 		return s.err
 	}
@ -470,7 +493,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	}
 	fh := frameHeader{
 		ContentSize:   uint64(len(src)),
-		WindowSize:    uint32(enc.WindowSize(len(src))),
+		WindowSize:    uint32(enc.WindowSize(int64(len(src)))),
 		SingleSegment: single,
 		Checksum:      e.o.crc,
 		DictID:        e.o.dict.ID(),
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@ -189,7 +189,7 @@ func EncoderLevelFromZstd(level int) EncoderLevel {
 	case level >= 6 && level < 10:
 		return SpeedBetterCompression
 	case level >= 10:
-		return SpeedBetterCompression
+		return SpeedBestCompression
 	}
 	return SpeedDefault
 }
--- a/vendor/github.com/klauspost/compress/zstd/hash.go
+++ b/vendor/github.com/klauspost/compress/zstd/hash.go
@ -13,24 +13,24 @@ const (
 	prime8bytes = 0xcf1bbcdcb7a56463
 )

-// hashLen returns a hash of the lowest l bytes of u for a size size of h bytes.
-// l must be >=4 and <=8. Any other value will return hash for 4 bytes.
-// h should always be <32.
-// Preferably h and l should be a constant.
-// FIXME: This does NOT get resolved, if 'mls' is constant,
-//  so this cannot be used.
-func hashLen(u uint64, hashLog, mls uint8) uint32 {
+// hashLen returns a hash of the lowest mls bytes of with length output bits.
+// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
+// length should always be < 32.
+// Preferably length and mls should be a constant for inlining.
+func hashLen(u uint64, length, mls uint8) uint32 {
 	switch mls {
+	case 3:
+		return (uint32(u<<8) * prime3bytes) >> (32 - length)
 	case 5:
-		return hash5(u, hashLog)
+		return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
 	case 6:
-		return hash6(u, hashLog)
+		return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
 	case 7:
-		return hash7(u, hashLog)
+		return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
 	case 8:
-		return hash8(u, hashLog)
+		return uint32((u * prime8bytes) >> (64 - length))
 	default:
-		return hash4x64(u, hashLog)
+		return (uint32(u) * prime4bytes) >> (32 - length)
 	}
 }

@ -39,39 +39,3 @@ func hashLen(u uint64, hashLog, mls uint8) uint32 {
 func hash3(u uint32, h uint8) uint32 {
 	return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31)
 }
-
-// hash4 returns the hash of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <32.
-func hash4(u uint32, h uint8) uint32 {
-	return (u * prime4bytes) >> ((32 - h) & 31)
-}
-
-// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <32.
-func hash4x64(u uint64, h uint8) uint32 {
-	return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
-}
-
-// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash5(u uint64, h uint8) uint32 {
-	return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
-}
-
-// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash6(u uint64, h uint8) uint32 {
-	return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
-}
-
-// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash7(u uint64, h uint8) uint32 {
-	return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
-}
-
-// hash8 returns the hash of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash8(u uint64, h uint8) uint32 {
-	return uint32((u * prime8bytes) >> ((64 - h) & 63))
-}
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go
@ -1,6 +1,5 @@
-// +build !appengine
-// +build gc
-// +build !purego
+//go:build !appengine && gc && !purego
+// +build !appengine,gc,!purego

 package xxhash

--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
@ -1,3 +1,4 @@
+//go:build !amd64 || appengine || !gc || purego
 // +build !amd64 appengine !gc purego

 package xxhash
--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@ -10,8 +10,8 @@ import (
 	"hash/crc32"
 	"io"

-	"github.com/golang/snappy"
 	"github.com/klauspost/compress/huff0"
+	snappy "github.com/klauspost/compress/internal/snapref"
 )

 const (
--- a/vendor/github.com/klauspost/compress/zstd/zip.go
+++ b/vendor/github.com/klauspost/compress/zstd/zip.go
@ -64,8 +64,9 @@ func (r *pooledZipReader) Close() error {
 }

 type pooledZipWriter struct {
-	mu  sync.Mutex // guards Close and Read
-	enc *Encoder
+	mu   sync.Mutex // guards Close and Read
+	enc  *Encoder
+	pool *sync.Pool
 }

 func (w *pooledZipWriter) Write(p []byte) (n int, err error) {
@ -83,7 +84,7 @@ func (w *pooledZipWriter) Close() error {
 	var err error
 	if w.enc != nil {
 		err = w.enc.Close()
-		zipReaderPool.Put(w.enc)
+		w.pool.Put(w.enc)
 		w.enc = nil
 	}
 	return err
@ -104,7 +105,7 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
 				return nil, err
 			}
 		}
-		return &pooledZipWriter{enc: enc}, nil
+		return &pooledZipWriter{enc: enc, pool: &pool}, nil
 	}
 }

--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -9,7 +9,7 @@ github.com/alessio/shellescape
 github.com/containerd/containerd/errdefs
 github.com/containerd/containerd/log
 github.com/containerd/containerd/platforms
-# github.com/containerd/stargz-snapshotter/estargz v0.7.0
+# github.com/containerd/stargz-snapshotter/estargz v0.8.0
 ## explicit
 github.com/containerd/stargz-snapshotter/estargz
 github.com/containerd/stargz-snapshotter/estargz/errorutil
@ -78,9 +78,6 @@ github.com/golang/protobuf/ptypes
 github.com/golang/protobuf/ptypes/any
 github.com/golang/protobuf/ptypes/duration
 github.com/golang/protobuf/ptypes/timestamp
-# github.com/golang/snappy v0.0.4
-## explicit
-github.com/golang/snappy
 # github.com/google/go-cmp v0.5.6
 ## explicit
 github.com/google/go-cmp/cmp
@ -136,10 +133,11 @@ github.com/hashicorp/hcl/json/scanner
 github.com/hashicorp/hcl/json/token
 # github.com/inconshreveable/mousetrap v1.0.0
 github.com/inconshreveable/mousetrap
-# github.com/klauspost/compress v1.13.1
-## explicit
+# github.com/klauspost/compress v1.13.5
+github.com/klauspost/compress
 github.com/klauspost/compress/fse
 github.com/klauspost/compress/huff0
+github.com/klauspost/compress/internal/snapref
 github.com/klauspost/compress/zstd
 github.com/klauspost/compress/zstd/internal/xxhash
 # github.com/magiconair/properties v1.8.5