Skip to content

Commit 8f74e7d

Browse files
authored
backend/compress: add zstd compression
Added support for reading and writing zstd-compressed archives in seekable format using "github.com/klauspost/compress/zstd" and "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg". Bumped Go version from 1.24.0 to 1.24.4 due to requirements of "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg".
1 parent ee92673 commit 8f74e7d

File tree

11 files changed

+1093
-228
lines changed

11 files changed

+1093
-228
lines changed

backend/compress/compress.go

Lines changed: 130 additions & 188 deletions
Large diffs are not rendered by default.

backend/compress/compress_test.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,27 @@ func TestRemoteGzip(t *testing.T) {
4848
opt.ExtraConfig = []fstests.ExtraConfigItem{
4949
{Name: name, Key: "type", Value: "compress"},
5050
{Name: name, Key: "remote", Value: tempdir},
51-
{Name: name, Key: "compression_mode", Value: "gzip"},
51+
{Name: name, Key: "mode", Value: "gzip"},
52+
{Name: name, Key: "level", Value: "-1"},
53+
}
54+
opt.QuickTestOK = true
55+
fstests.Run(t, &opt)
56+
}
57+
58+
// TestRemoteZstd tests ZSTD compression
59+
func TestRemoteZstd(t *testing.T) {
60+
if *fstest.RemoteName != "" {
61+
t.Skip("Skipping as -remote set")
62+
}
63+
tempdir := filepath.Join(os.TempDir(), "rclone-compress-test-zstd")
64+
name := "TestCompressZstd"
65+
opt := defaultOpt
66+
opt.RemoteName = name + ":"
67+
opt.ExtraConfig = []fstests.ExtraConfigItem{
68+
{Name: name, Key: "type", Value: "compress"},
69+
{Name: name, Key: "remote", Value: tempdir},
70+
{Name: name, Key: "mode", Value: "zstd"},
71+
{Name: name, Key: "level", Value: "2"},
5272
}
5373
opt.QuickTestOK = true
5474
fstests.Run(t, &opt)

backend/compress/gzip_handler.go

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
package compress
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"context"
7+
"crypto/md5"
8+
"encoding/hex"
9+
"errors"
10+
"io"
11+
12+
"github.com/buengese/sgzip"
13+
14+
"github.com/rclone/rclone/fs"
15+
"github.com/rclone/rclone/fs/accounting"
16+
"github.com/rclone/rclone/fs/chunkedreader"
17+
"github.com/rclone/rclone/fs/hash"
18+
)
19+
20+
// gzipModeHandler implements compressionModeHandler for gzip
21+
type gzipModeHandler struct{}
22+
23+
// isCompressible checks the compression ratio of the provided data and returns true if the ratio exceeds
24+
// the configured threshold
25+
func (g *gzipModeHandler) isCompressible(r io.Reader, compressionMode int) (bool, error) {
26+
var b bytes.Buffer
27+
var n int64
28+
w, err := sgzip.NewWriterLevel(&b, sgzip.DefaultCompression)
29+
if err != nil {
30+
return false, err
31+
}
32+
n, err = io.Copy(w, r)
33+
if err != nil {
34+
return false, err
35+
}
36+
err = w.Close()
37+
if err != nil {
38+
return false, err
39+
}
40+
ratio := float64(n) / float64(b.Len())
41+
return ratio > minCompressionRatio, nil
42+
}
43+
44+
// newObjectGetOriginalSize returns the original file size from the metadata
45+
func (g *gzipModeHandler) newObjectGetOriginalSize(meta *ObjectMetadata) (int64, error) {
46+
if meta.CompressionMetadataGzip == nil {
47+
return 0, errors.New("missing gzip metadata")
48+
}
49+
return meta.CompressionMetadataGzip.Size, nil
50+
}
51+
52+
// openGetReadCloser opens a compressed object and returns a ReadCloser in the Open method
53+
func (g *gzipModeHandler) openGetReadCloser(
54+
ctx context.Context,
55+
o *Object,
56+
offset int64,
57+
limit int64,
58+
cr chunkedreader.ChunkedReader,
59+
closer io.Closer,
60+
options ...fs.OpenOption,
61+
) (rc io.ReadCloser, err error) {
62+
var file io.Reader
63+
64+
if offset != 0 {
65+
file, err = sgzip.NewReaderAt(cr, o.meta.CompressionMetadataGzip, offset)
66+
} else {
67+
file, err = sgzip.NewReader(cr)
68+
}
69+
if err != nil {
70+
return nil, err
71+
}
72+
73+
var fileReader io.Reader
74+
if limit != -1 {
75+
fileReader = io.LimitReader(file, limit)
76+
} else {
77+
fileReader = file
78+
}
79+
// Return a ReadCloser
80+
return ReadCloserWrapper{Reader: fileReader, Closer: closer}, nil
81+
}
82+
83+
// processFileNameGetFileExtension returns the file extension for the given compression mode
84+
func (g *gzipModeHandler) processFileNameGetFileExtension(compressionMode int) string {
85+
if compressionMode == Gzip {
86+
return gzFileExt
87+
}
88+
89+
return ""
90+
}
91+
92+
// putCompress compresses the input data and uploads it to the remote, returning the new object and its metadata
93+
func (g *gzipModeHandler) putCompress(
94+
ctx context.Context,
95+
f *Fs,
96+
in io.Reader,
97+
src fs.ObjectInfo,
98+
options []fs.OpenOption,
99+
mimeType string,
100+
) (fs.Object, *ObjectMetadata, error) {
101+
// Unwrap reader accounting
102+
in, wrap := accounting.UnWrap(in)
103+
104+
// Add the metadata hasher
105+
metaHasher := md5.New()
106+
in = io.TeeReader(in, metaHasher)
107+
108+
// Compress the file
109+
pipeReader, pipeWriter := io.Pipe()
110+
111+
resultsGzip := make(chan compressionResult[sgzip.GzipMetadata])
112+
go func() {
113+
gz, err := sgzip.NewWriterLevel(pipeWriter, f.opt.CompressionLevel)
114+
if err != nil {
115+
resultsGzip <- compressionResult[sgzip.GzipMetadata]{err: err, meta: sgzip.GzipMetadata{}}
116+
close(resultsGzip)
117+
return
118+
}
119+
_, err = io.Copy(gz, in)
120+
gzErr := gz.Close()
121+
if gzErr != nil && err == nil {
122+
err = gzErr
123+
}
124+
closeErr := pipeWriter.Close()
125+
if closeErr != nil && err == nil {
126+
err = closeErr
127+
}
128+
resultsGzip <- compressionResult[sgzip.GzipMetadata]{err: err, meta: gz.MetaData()}
129+
close(resultsGzip)
130+
}()
131+
132+
wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Probably no longer needed as sgzip has it's own buffering
133+
134+
// Find a hash the destination supports to compute a hash of
135+
// the compressed data.
136+
ht := f.Fs.Hashes().GetOne()
137+
var hasher *hash.MultiHasher
138+
var err error
139+
if ht != hash.None {
140+
// unwrap the accounting again
141+
wrappedIn, wrap = accounting.UnWrap(wrappedIn)
142+
hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht))
143+
if err != nil {
144+
return nil, nil, err
145+
}
146+
// add the hasher and re-wrap the accounting
147+
wrappedIn = io.TeeReader(wrappedIn, hasher)
148+
wrappedIn = wrap(wrappedIn)
149+
}
150+
151+
// Transfer the data
152+
o, err := f.rcat(ctx, makeDataName(src.Remote(), src.Size(), f.mode), io.NopCloser(wrappedIn), src.ModTime(ctx), options)
153+
if err != nil {
154+
if o != nil {
155+
if removeErr := o.Remove(ctx); removeErr != nil {
156+
fs.Errorf(o, "Failed to remove partially transferred object: %v", removeErr)
157+
}
158+
}
159+
return nil, nil, err
160+
}
161+
// Check whether we got an error during compression
162+
result := <-resultsGzip
163+
if result.err != nil {
164+
if o != nil {
165+
if removeErr := o.Remove(ctx); removeErr != nil {
166+
fs.Errorf(o, "Failed to remove partially compressed object: %v", removeErr)
167+
}
168+
}
169+
return nil, nil, result.err
170+
}
171+
172+
// Generate metadata
173+
meta := g.newMetadata(result.meta.Size, f.mode, result.meta, hex.EncodeToString(metaHasher.Sum(nil)), mimeType)
174+
175+
// Check the hashes of the compressed data if we were comparing them
176+
if ht != hash.None && hasher != nil {
177+
err = f.verifyObjectHash(ctx, o, hasher, ht)
178+
if err != nil {
179+
return nil, nil, err
180+
}
181+
}
182+
return o, meta, nil
183+
}
184+
185+
// putUncompressGetNewMetadata returns metadata in the putUncompress method for a specific compression algorithm
186+
func (g *gzipModeHandler) putUncompressGetNewMetadata(o fs.Object, mode int, md5 string, mimeType string, sum []byte) (fs.Object, *ObjectMetadata, error) {
187+
return o, g.newMetadata(o.Size(), mode, sgzip.GzipMetadata{}, hex.EncodeToString(sum), mimeType), nil
188+
}
189+
190+
// This function generates a metadata object for sgzip.GzipMetadata or SzstdMetadata.
191+
// Warning: This function panics if cmeta is not of the expected type.
192+
func (g *gzipModeHandler) newMetadata(size int64, mode int, cmeta any, md5 string, mimeType string) *ObjectMetadata {
193+
meta, ok := cmeta.(sgzip.GzipMetadata)
194+
if !ok {
195+
panic("invalid cmeta type: expected sgzip.GzipMetadata")
196+
}
197+
198+
objMeta := new(ObjectMetadata)
199+
objMeta.Size = size
200+
objMeta.Mode = mode
201+
objMeta.CompressionMetadataGzip = &meta
202+
objMeta.CompressionMetadataZstd = nil
203+
objMeta.MD5 = md5
204+
objMeta.MimeType = mimeType
205+
206+
return objMeta
207+
}

0 commit comments

Comments
 (0)