mirror of
https://github.com/rclone/rclone.git
synced 2025-10-06 05:47:10 +02:00
march: Implement --assume-listings-sorted
to speed up sync starting
This can be used when the source and destination backends are guaranteed to return the items in the same sorted order.
This commit is contained in:
@@ -659,6 +659,30 @@ However, a suffix of `B` for Byte, `K` for KiB, `M` for MiB,
|
|||||||
`G` for GiB, `T` for TiB and `P` for PiB may be used. These are
|
`G` for GiB, `T` for TiB and `P` for PiB may be used. These are
|
||||||
the binary units, e.g. 1, 2\*\*10, 2\*\*20, 2\*\*30 respectively.
|
the binary units, e.g. 1, 2\*\*10, 2\*\*20, 2\*\*30 respectively.
|
||||||
|
|
||||||
|
### --assume-listings-sorted
|
||||||
|
|
||||||
|
This flag can be used when the source and destination backends are
|
||||||
|
guaranteed to return the items in the same sorted order and in that
|
||||||
|
case it will speed up the sync.
|
||||||
|
|
||||||
|
Not all backends are guaranteed to return sorted entries (eg local)
|
||||||
|
but s3 should, so an s3 to s3 sync could benefit from this flag.
|
||||||
|
|
||||||
|
If rclone finds an out of order directory entry then it will cancel
|
||||||
|
the sync with the error:
|
||||||
|
|
||||||
|
out of order listing in source (remote:dir)
|
||||||
|
|
||||||
|
In this case you should remove the `--assume-listings-sorted` flag.
|
||||||
|
|
||||||
|
If you are using `--assume-listings-sorted` then rclone will assume
|
||||||
|
`--no-unicode-normalization` and it will compare file names in a case
|
||||||
|
sensitive way.
|
||||||
|
|
||||||
|
Normally sorting directory entries is not a bottleneck, but it can
|
||||||
|
become so with syncs of millions of items in a single directory as the
|
||||||
|
sync will not start until the directory listing is complete.
|
||||||
|
|
||||||
### --backup-dir=DIR ###
|
### --backup-dir=DIR ###
|
||||||
|
|
||||||
When using `sync`, `copy` or `move` any files which would have been
|
When using `sync`, `copy` or `move` any files which would have been
|
||||||
|
@@ -555,6 +555,12 @@ var ConfigOptionsInfo = Options{{
|
|||||||
Default: []string{},
|
Default: []string{},
|
||||||
Help: "Transform paths during the copy process.",
|
Help: "Transform paths during the copy process.",
|
||||||
Groups: "Copy",
|
Groups: "Copy",
|
||||||
|
}, {
|
||||||
|
Name: "assume_listings_sorted",
|
||||||
|
Default: false,
|
||||||
|
Advanced: true,
|
||||||
|
Help: "If set will not sort listings. If listings aren't sorted the sync may go wrong.",
|
||||||
|
Groups: "Copy",
|
||||||
}}
|
}}
|
||||||
|
|
||||||
// ConfigInfo is filesystem config options
|
// ConfigInfo is filesystem config options
|
||||||
@@ -667,6 +673,7 @@ type ConfigInfo struct {
|
|||||||
MetadataMapper SpaceSepList `config:"metadata_mapper"`
|
MetadataMapper SpaceSepList `config:"metadata_mapper"`
|
||||||
MaxConnections int `config:"max_connections"`
|
MaxConnections int `config:"max_connections"`
|
||||||
NameTransform []string `config:"name_transform"`
|
NameTransform []string `config:"name_transform"`
|
||||||
|
AssumeListingsSorted bool `config:"assume_listings_sorted"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
@@ -45,6 +45,7 @@ type Sorter struct {
|
|||||||
keyFn KeyFn // transform an entry into a sort key
|
keyFn KeyFn // transform an entry into a sort key
|
||||||
cutoff int // number of entries above which we start extsort
|
cutoff int // number of entries above which we start extsort
|
||||||
extSort bool // true if we are ext sorting
|
extSort bool // true if we are ext sorting
|
||||||
|
noSort bool // true if we aren't sorting
|
||||||
inputChan chan string // for sending data to the ext sort
|
inputChan chan string // for sending data to the ext sort
|
||||||
outputChan chan string // for receiving data from the ext sort
|
outputChan chan string // for receiving data from the ext sort
|
||||||
errChan chan error // for getting errors from the ext sort
|
errChan chan error // for getting errors from the ext sort
|
||||||
@@ -78,6 +79,7 @@ func NewSorter(ctx context.Context, f NewObjecter, callback fs.ListRCallback, ke
|
|||||||
keyFn: keyFn,
|
keyFn: keyFn,
|
||||||
cutoff: ci.ListCutoff,
|
cutoff: ci.ListCutoff,
|
||||||
errs: errcount.New(),
|
errs: errcount.New(),
|
||||||
|
noSort: ci.AssumeListingsSorted,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -172,6 +174,9 @@ func (ls *Sorter) startExtSort() (err error) {
|
|||||||
//
|
//
|
||||||
// Safe to call from concurrent go routines
|
// Safe to call from concurrent go routines
|
||||||
func (ls *Sorter) Add(entries fs.DirEntries) error {
|
func (ls *Sorter) Add(entries fs.DirEntries) error {
|
||||||
|
if ls.noSort {
|
||||||
|
return ls.callback(entries)
|
||||||
|
}
|
||||||
ls.mu.Lock()
|
ls.mu.Lock()
|
||||||
defer ls.mu.Unlock()
|
defer ls.mu.Unlock()
|
||||||
if ls.extSort {
|
if ls.extSort {
|
||||||
@@ -268,6 +273,9 @@ func (lh *listHelper) Flush() error {
|
|||||||
|
|
||||||
// Send the sorted entries to the callback.
|
// Send the sorted entries to the callback.
|
||||||
func (ls *Sorter) Send() (err error) {
|
func (ls *Sorter) Send() (err error) {
|
||||||
|
if ls.noSort {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
ls.mu.Lock()
|
ls.mu.Lock()
|
||||||
defer ls.mu.Unlock()
|
defer ls.mu.Unlock()
|
||||||
|
|
||||||
|
@@ -46,6 +46,46 @@ func TestSorter(t *testing.T) {
|
|||||||
assert.Equal(t, fs.DirEntries(nil), ls.entries)
|
assert.Equal(t, fs.DirEntries(nil), ls.entries)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSorterAssumeSorted(t *testing.T) {
|
||||||
|
ctx, ci := fs.AddConfig(context.Background())
|
||||||
|
ci.AssumeListingsSorted = true
|
||||||
|
|
||||||
|
gotEntry := 0
|
||||||
|
wantEntries := fs.DirEntries{
|
||||||
|
mockdir.New("c"),
|
||||||
|
mockobject.Object("C"),
|
||||||
|
mockdir.New("b"),
|
||||||
|
mockobject.Object("B"),
|
||||||
|
mockdir.New("a"),
|
||||||
|
mockobject.Object("A"),
|
||||||
|
}
|
||||||
|
callback := func(entries fs.DirEntries) error {
|
||||||
|
for _, entry := range entries {
|
||||||
|
require.Equal(t, wantEntries[gotEntry], entry)
|
||||||
|
gotEntry++
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
ls, err := NewSorter(ctx, nil, callback, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Test Add
|
||||||
|
require.NoError(t, ls.Add(wantEntries[0:2]))
|
||||||
|
require.NoError(t, ls.Add(wantEntries[2:6]))
|
||||||
|
assert.Equal(t, 6, gotEntry)
|
||||||
|
assert.Equal(t, fs.DirEntries(nil), ls.entries)
|
||||||
|
|
||||||
|
// Test Send
|
||||||
|
err = ls.Send()
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 6, gotEntry)
|
||||||
|
|
||||||
|
// Test Cleanup
|
||||||
|
ls.CleanUp()
|
||||||
|
assert.Equal(t, 6, gotEntry)
|
||||||
|
assert.Equal(t, fs.DirEntries(nil), ls.entries)
|
||||||
|
}
|
||||||
|
|
||||||
func TestSorterIdentity(t *testing.T) {
|
func TestSorterIdentity(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
cmpFn := func(a, b fs.DirEntry) int {
|
cmpFn := func(a, b fs.DirEntry) int {
|
||||||
|
@@ -13,6 +13,7 @@ import (
|
|||||||
"github.com/rclone/rclone/fs"
|
"github.com/rclone/rclone/fs"
|
||||||
"github.com/rclone/rclone/fs/dirtree"
|
"github.com/rclone/rclone/fs/dirtree"
|
||||||
"github.com/rclone/rclone/fs/filter"
|
"github.com/rclone/rclone/fs/filter"
|
||||||
|
"github.com/rclone/rclone/fs/fserrors"
|
||||||
"github.com/rclone/rclone/fs/list"
|
"github.com/rclone/rclone/fs/list"
|
||||||
"github.com/rclone/rclone/fs/walk"
|
"github.com/rclone/rclone/fs/walk"
|
||||||
"github.com/rclone/rclone/lib/transform"
|
"github.com/rclone/rclone/lib/transform"
|
||||||
@@ -328,7 +329,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO
|
|||||||
continue
|
continue
|
||||||
} else if srcName < srcPrevName {
|
} else if srcName < srcPrevName {
|
||||||
// this should never happen since we sort the listings
|
// this should never happen since we sort the listings
|
||||||
panic("Out of order listing in source")
|
// however the user may be using the --assume-listings-sorted flag
|
||||||
|
return fserrors.FatalError(fmt.Errorf("out of order listing in source (%v)", src.Fs()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if dst != nil && dstPrev != nil {
|
if dst != nil && dstPrev != nil {
|
||||||
@@ -338,7 +340,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO
|
|||||||
continue
|
continue
|
||||||
} else if dstName < dstPrevName {
|
} else if dstName < dstPrevName {
|
||||||
// this should never happen since we sort the listings
|
// this should never happen since we sort the listings
|
||||||
panic("Out of order listing in destination")
|
// however the user may be using the --assume-listings-sorted flag
|
||||||
|
return fserrors.FatalError(fmt.Errorf("out of order listing in destination (%v)", dst.Fs()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch {
|
switch {
|
||||||
|
Reference in New Issue
Block a user