-
Notifications
You must be signed in to change notification settings - Fork 337
Closed
Description
Following a bug report at restic/restic#4523, I've traced the data corruption back to the zstd library.
The following two minimized data chunks return corrupt data after compressing it at SpeedBestCompression level and decompressing it afterwards. There is no data corruption at the default compression level. I've tested versions v1.16.7 (via restic using Go 1.20.7) and v1.17.1 (using Go 1.21.3).
correct-3d0e366bad4a5e9b443f407b114756a0f5a8153dbc242e0b2601c707136815eb.bin-minimized.txt
correct-dc82d97be7683ecd41097ab02d7b15de81e8bbcd1c476c50b254b1f458090929.bin-minimized.txt
To produce the minimized examples, I've used the following code snippet, which also serves as a reproducer:
package main
import (
"bytes"
"os"
"sort"
"github.com/klauspost/compress/zstd"
)
func buildEncoder() *zstd.Encoder {
level := zstd.SpeedBestCompression
opts := []zstd.EOption{
// Set the compression level configured.
zstd.WithEncoderLevel(level),
// Disable CRC, we have enough checks in place, makes the
// compressed data four bytes shorter.
zstd.WithEncoderCRC(false),
// Set a window of 512kbyte, so we have good lookbehind for usual
// blob sizes.
zstd.WithWindowSize(512 * 1024),
}
enc, err := zstd.NewWriter(nil, opts...)
if err != nil {
panic(err)
}
return enc
}
func buildDecoder() *zstd.Decoder {
opts := []zstd.DOption{
// Use all available cores.
zstd.WithDecoderConcurrency(0),
// Limit the maximum decompressed memory. Set to a very high,
// conservative value.
zstd.WithDecoderMaxMemory(16 * 1024 * 1024 * 1024),
}
dec, err := zstd.NewReader(nil, opts...)
if err != nil {
panic(err)
}
return dec
}
var enc = buildEncoder()
var dec = buildDecoder()
func verifyCompression(data []byte) bool {
compressed := enc.EncodeAll(data, nil)
decompressed, err := dec.DecodeAll(compressed, make([]byte, 0, len(data)))
if err != nil {
panic(err)
}
return bytes.Equal(data, decompressed)
}
func main() {
data, err := os.ReadFile(os.Args[1])
if err != nil {
panic(err)
}
idx := sort.Search(len(data), func(i int) bool {
return !verifyCompression(data[:i])
})
if verifyCompression(data[:idx]) {
panic("missing compression error")
}
startIdx := sort.Search(idx, func(i int) bool {
return verifyCompression(data[i:idx])
}) - 1
if verifyCompression(data[startIdx:idx]) {
panic("missing compression error")
}
println("minimal example from", startIdx, "to", idx)
f, err := os.OpenFile(os.Args[1]+"-minimized", os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
panic(err)
}
_, err = f.Write(data[startIdx:idx])
if err != nil {
panic(err)
}
err = f.Close()
if err != nil {
panic(err)
}
}
Run it using
go mod init test
go mod tidy
go run main.go correct-3d0e366bad4a5e9b443f407b114756a0f5a8153dbc242e0b2601c707136815eb.bin
rawtaz
Metadata
Metadata
Assignees
Labels
No labels