1
0
mirror of https://github.com/rclone/rclone.git synced 2025-01-13 20:38:12 +02:00

rcat: configurable small files cutoff and implement proper upload verification

This commit is contained in:
Stefan Breunig 2017-09-11 08:26:53 +02:00
parent 57817397a0
commit 80b1f2a494
4 changed files with 170 additions and 125 deletions

View File

@ -22,13 +22,24 @@ rclone rcat reads from standard input (stdin) and copies it to a
single remote file. single remote file.
echo "hello world" | rclone rcat remote:path/to/file echo "hello world" | rclone rcat remote:path/to/file
ffmpeg - | rclone rcat --checksum remote:path/to/file
Note that since the size is not known in advance, chunking options If the remote file already exists, it will be overwritten.
will likely be ignored. The upload can also not be retried because
the data is not kept around until the upload succeeds. If you need rcat will try to upload small files in a single request, which is
to transfer a lot of data, you're better off caching locally and usually more efficient than the streaming/chunked upload endpoints,
then ` + "`rclone move`" + ` it to the destination. which use multiple requests. Exact behaviour depends on the remote.
`, What is considered a small file may be set through
` + "`--streaming-upload-cutoff`" + `. Uploading only starts after
the cutoff is reached or if the file ends before that. The data
must fit into RAM. The cutoff needs to be small enough to adhere
the limits of your remote, please see there. Generally speaking,
setting this cutoff too high will decrease your performance.
Note that the upload can also not be retried because the data is
not kept around until the upload succeeds. If you need to transfer
a lot of data, you're better off caching locally and then
` + "`rclone move`" + ` it to the destination.`,
Run: func(command *cobra.Command, args []string) { Run: func(command *cobra.Command, args []string) {
cmd.CheckArgs(1, 1, command, args) cmd.CheckArgs(1, 1, command, args)

View File

@ -102,6 +102,7 @@ var (
bindAddr = StringP("bind", "", "", "Local address to bind to for outgoing connections, IPv4, IPv6 or name.") bindAddr = StringP("bind", "", "", "Local address to bind to for outgoing connections, IPv4, IPv6 or name.")
disableFeatures = StringP("disable", "", "", "Disable a comma separated list of features. Use help to see a list.") disableFeatures = StringP("disable", "", "", "Disable a comma separated list of features. Use help to see a list.")
userAgent = StringP("user-agent", "", "rclone/"+Version, "Set the user-agent to a specified string. The default is rclone/ version") userAgent = StringP("user-agent", "", "rclone/"+Version, "Set the user-agent to a specified string. The default is rclone/ version")
streamingUploadCutoff = SizeSuffix(100 * 1024)
logLevel = LogLevelNotice logLevel = LogLevelNotice
statsLogLevel = LogLevelInfo statsLogLevel = LogLevelInfo
bwLimit BwTimetable bwLimit BwTimetable
@ -117,6 +118,7 @@ func init() {
VarP(&statsLogLevel, "stats-log-level", "", "Log level to show --stats output DEBUG|INFO|NOTICE|ERROR") VarP(&statsLogLevel, "stats-log-level", "", "Log level to show --stats output DEBUG|INFO|NOTICE|ERROR")
VarP(&bwLimit, "bwlimit", "", "Bandwidth limit in kBytes/s, or use suffix b|k|M|G or a full timetable.") VarP(&bwLimit, "bwlimit", "", "Bandwidth limit in kBytes/s, or use suffix b|k|M|G or a full timetable.")
VarP(&bufferSize, "buffer-size", "", "Buffer size when copying files.") VarP(&bufferSize, "buffer-size", "", "Buffer size when copying files.")
VarP(&streamingUploadCutoff, "streaming-upload-cutoff", "", "Cutoff for switching to chunked upload if file size is unknown. Upload starts after reaching cutoff or when file ends.")
} }
// crypt internals // crypt internals
@ -238,6 +240,7 @@ type ConfigInfo struct {
TPSLimitBurst int TPSLimitBurst int
BindAddr net.IP BindAddr net.IP
DisableFeatures []string DisableFeatures []string
StreamingUploadCutoff SizeSuffix
} }
// Return the path to the configuration file // Return the path to the configuration file
@ -377,6 +380,7 @@ func LoadConfig() {
Config.TPSLimit = *tpsLimit Config.TPSLimit = *tpsLimit
Config.TPSLimitBurst = *tpsLimitBurst Config.TPSLimitBurst = *tpsLimitBurst
Config.BufferSize = bufferSize Config.BufferSize = bufferSize
Config.StreamingUploadCutoff = streamingUploadCutoff
Config.TrackRenames = *trackRenames Config.TrackRenames = *trackRenames

View File

@ -65,7 +65,7 @@ func HashEquals(src, dst string) bool {
// err - may return an error which will already have been logged // err - may return an error which will already have been logged
// //
// If an error is returned it will return equal as false // If an error is returned it will return equal as false
func CheckHashes(src, dst Object) (equal bool, hash HashType, err error) { func CheckHashes(src ObjectInfo, dst Object) (equal bool, hash HashType, err error) {
common := src.Fs().Hashes().Overlap(dst.Fs().Hashes()) common := src.Fs().Hashes().Overlap(dst.Fs().Hashes())
// Debugf(nil, "Shared hashes: %v", common) // Debugf(nil, "Shared hashes: %v", common)
if common.Count() == 0 { if common.Count() == 0 {
@ -115,11 +115,11 @@ func CheckHashes(src, dst Object) (equal bool, hash HashType, err error) {
// //
// Otherwise the file is considered to be not equal including if there // Otherwise the file is considered to be not equal including if there
// were errors reading info. // were errors reading info.
func Equal(src, dst Object) bool { func Equal(src ObjectInfo, dst Object) bool {
return equal(src, dst, Config.SizeOnly, Config.CheckSum) return equal(src, dst, Config.SizeOnly, Config.CheckSum)
} }
func equal(src, dst Object, sizeOnly, checkSum bool) bool { func equal(src ObjectInfo, dst Object, sizeOnly, checkSum bool) bool {
if !Config.IgnoreSize { if !Config.IgnoreSize {
if src.Size() != dst.Size() { if src.Size() != dst.Size() {
Debugf(src, "Sizes differ") Debugf(src, "Sizes differ")
@ -1587,27 +1587,44 @@ func Rcat(fdst Fs, dstFileName string, in0 io.ReadCloser, modTime time.Time) (er
Stats.Transferring(dstFileName) Stats.Transferring(dstFileName)
defer func() { defer func() {
Stats.DoneTransferring(dstFileName, err == nil) Stats.DoneTransferring(dstFileName, err == nil)
if err := in0.Close(); err != nil { if err = in0.Close(); err != nil {
Debugf(fdst, "Rcat: failed to close source: %v", err) Debugf(fdst, "Rcat: failed to close source: %v", err)
} }
}() }()
hashOption := &HashesOption{Hashes: NewHashSet()} hashOption := &HashesOption{Hashes: fdst.Hashes()}
hash, err := NewMultiHasherTypes(fdst.Hashes())
in := in0 if err != nil {
buf := make([]byte, 100*1024)
if n, err := io.ReadFull(in0, buf); err != nil {
Debugf(fdst, "File to upload is small, uploading instead of streaming")
in = ioutil.NopCloser(bytes.NewReader(buf[:n]))
in = NewAccountSizeName(in, int64(n), dstFileName).WithBuffer()
if !Config.SizeOnly {
hashOption = &HashesOption{Hashes: HashSet(fdst.Hashes().GetOne())}
}
objInfo := NewStaticObjectInfo(dstFileName, modTime, int64(n), false, nil, nil)
_, err := fdst.Put(in, objInfo, hashOption)
return err return err
} }
in = ioutil.NopCloser(io.MultiReader(bytes.NewReader(buf), in0)) readCounter := NewCountingReader(in0)
trackingIn := io.TeeReader(readCounter, hash)
compare := func(dst Object) error {
src := NewStaticObjectInfo(dstFileName, modTime, int64(readCounter.BytesRead()), false, hash.Sums(), fdst)
if !Equal(src, dst) {
Stats.Error()
err = errors.Errorf("corrupted on transfer")
Errorf(dst, "%v", err)
return err
}
return nil
}
// check if file small enough for direct upload
buf := make([]byte, Config.StreamingUploadCutoff)
if n, err := io.ReadFull(trackingIn, buf); err == io.EOF || err == io.ErrUnexpectedEOF {
Debugf(fdst, "File to upload is small (%d bytes), uploading instead of streaming", n)
in := ioutil.NopCloser(bytes.NewReader(buf[:n]))
in = NewAccountSizeName(in, int64(n), dstFileName).WithBuffer()
objInfo := NewStaticObjectInfo(dstFileName, modTime, int64(n), false, nil, nil)
dst, err := fdst.Put(in, objInfo, hashOption)
if err != nil {
return err
}
return compare(dst)
}
in := ioutil.NopCloser(io.MultiReader(bytes.NewReader(buf), trackingIn))
fStreamTo := fdst fStreamTo := fdst
canStream := fdst.Features().PutStream != nil canStream := fdst.Features().PutStream != nil
@ -1626,10 +1643,6 @@ func Rcat(fdst Fs, dstFileName string, in0 io.ReadCloser, modTime time.Time) (er
fStreamTo = tmpLocalFs fStreamTo = tmpLocalFs
} }
if !Config.SizeOnly {
hashOption = &HashesOption{Hashes: HashSet(fStreamTo.Hashes().GetOne())}
}
in = NewAccountSizeName(in, -1, dstFileName).WithBuffer() in = NewAccountSizeName(in, -1, dstFileName).WithBuffer()
if Config.DryRun { if Config.DryRun {
@ -1641,10 +1654,16 @@ func Rcat(fdst Fs, dstFileName string, in0 io.ReadCloser, modTime time.Time) (er
objInfo := NewStaticObjectInfo(dstFileName, modTime, -1, false, nil, nil) objInfo := NewStaticObjectInfo(dstFileName, modTime, -1, false, nil, nil)
tmpObj, err := fStreamTo.Features().PutStream(in, objInfo, hashOption) tmpObj, err := fStreamTo.Features().PutStream(in, objInfo, hashOption)
if err == nil && !canStream { if err != nil {
err = Copy(fdst, nil, dstFileName, tmpObj)
}
return err return err
}
if err = compare(tmpObj); err != nil {
return err
}
if !canStream {
return Copy(fdst, nil, dstFileName, tmpObj)
}
return nil
} }
// Rmdirs removes any empty directories (or directories only // Rmdirs removes any empty directories (or directories only

View File

@ -732,6 +732,10 @@ func TestCat(t *testing.T) {
} }
func TestRcat(t *testing.T) { func TestRcat(t *testing.T) {
checkSumBefore := fs.Config.CheckSum
defer func() { fs.Config.CheckSum = checkSumBefore }()
check := func() {
r := NewRun(t) r := NewRun(t)
defer r.Finalise() defer r.Finalise()
@ -740,7 +744,7 @@ func TestRcat(t *testing.T) {
data1 := "this is some really nice test data" data1 := "this is some really nice test data"
path1 := "small_file_from_pipe" path1 := "small_file_from_pipe"
data2 := string(make([]byte, 100*1024+1)) data2 := string(make([]byte, fs.Config.StreamingUploadCutoff+1))
path2 := "big_file_from_pipe" path2 := "big_file_from_pipe"
in := ioutil.NopCloser(strings.NewReader(data1)) in := ioutil.NopCloser(strings.NewReader(data1))
@ -754,6 +758,13 @@ func TestRcat(t *testing.T) {
file1 := fstest.NewItem(path1, data1, t1) file1 := fstest.NewItem(path1, data1, t1)
file2 := fstest.NewItem(path2, data2, t2) file2 := fstest.NewItem(path2, data2, t2)
fstest.CheckItems(t, r.fremote, file1, file2) fstest.CheckItems(t, r.fremote, file1, file2)
}
fs.Config.CheckSum = true
check()
fs.Config.CheckSum = false
check()
} }
func TestRmdirs(t *testing.T) { func TestRmdirs(t *testing.T) {