1
0
mirror of https://github.com/rclone/rclone.git synced 2025-10-06 05:47:10 +02:00

march: Implement --assume-listings-sorted to speed up sync starting

This can be used when the source and destination backends are
guaranteed to return the items in the same sorted order.
This commit is contained in:
Nick Craig-Wood
2025-06-30 16:19:25 +01:00
parent c0a2d730a6
commit 0e1ca6f993
5 changed files with 84 additions and 2 deletions

View File

@@ -659,6 +659,30 @@ However, a suffix of `B` for Byte, `K` for KiB, `M` for MiB,
`G` for GiB, `T` for TiB and `P` for PiB may be used. These are
the binary units, e.g. 1, 2\*\*10, 2\*\*20, 2\*\*30 respectively.
### --assume-listings-sorted
This flag can be used when the source and destination backends are
guaranteed to return the items in the same sorted order and in that
case it will speed up the sync.
Not all backends are guaranteed to return sorted entries (eg local)
but s3 should, so an s3 to s3 sync could benefit from this flag.
If rclone finds an out of order directory entry then it will cancel
the sync with the error:
out of order listing in source (remote:dir)
In this case you should remove the `--assume-listings-sorted` flag.
If you are using `--assume-listings-sorted` then rclone will assume
`--no-unicode-normalization` and it will compare file names in a case
sensitive way.
Normally sorting directory entries is not a bottleneck, but it can
become so with syncs of millions of items in a single directory as the
sync will not start until the directory listing is complete.
### --backup-dir=DIR ###
When using `sync`, `copy` or `move` any files which would have been

View File

@@ -555,6 +555,12 @@ var ConfigOptionsInfo = Options{{
Default: []string{},
Help: "Transform paths during the copy process.",
Groups: "Copy",
}, {
Name: "assume_listings_sorted",
Default: false,
Advanced: true,
Help: "If set will not sort listings. If listings aren't sorted the sync may go wrong.",
Groups: "Copy",
}}
// ConfigInfo is filesystem config options
@@ -667,6 +673,7 @@ type ConfigInfo struct {
MetadataMapper SpaceSepList `config:"metadata_mapper"`
MaxConnections int `config:"max_connections"`
NameTransform []string `config:"name_transform"`
AssumeListingsSorted bool `config:"assume_listings_sorted"`
}
func init() {

View File

@@ -45,6 +45,7 @@ type Sorter struct {
keyFn KeyFn // transform an entry into a sort key
cutoff int // number of entries above which we start extsort
extSort bool // true if we are ext sorting
noSort bool // true if we aren't sorting
inputChan chan string // for sending data to the ext sort
outputChan chan string // for receiving data from the ext sort
errChan chan error // for getting errors from the ext sort
@@ -78,6 +79,7 @@ func NewSorter(ctx context.Context, f NewObjecter, callback fs.ListRCallback, ke
keyFn: keyFn,
cutoff: ci.ListCutoff,
errs: errcount.New(),
noSort: ci.AssumeListingsSorted,
}, nil
}
@@ -172,6 +174,9 @@ func (ls *Sorter) startExtSort() (err error) {
//
// Safe to call from concurrent go routines
func (ls *Sorter) Add(entries fs.DirEntries) error {
if ls.noSort {
return ls.callback(entries)
}
ls.mu.Lock()
defer ls.mu.Unlock()
if ls.extSort {
@@ -268,6 +273,9 @@ func (lh *listHelper) Flush() error {
// Send the sorted entries to the callback.
func (ls *Sorter) Send() (err error) {
if ls.noSort {
return nil
}
ls.mu.Lock()
defer ls.mu.Unlock()

View File

@@ -46,6 +46,46 @@ func TestSorter(t *testing.T) {
assert.Equal(t, fs.DirEntries(nil), ls.entries)
}
func TestSorterAssumeSorted(t *testing.T) {
ctx, ci := fs.AddConfig(context.Background())
ci.AssumeListingsSorted = true
gotEntry := 0
wantEntries := fs.DirEntries{
mockdir.New("c"),
mockobject.Object("C"),
mockdir.New("b"),
mockobject.Object("B"),
mockdir.New("a"),
mockobject.Object("A"),
}
callback := func(entries fs.DirEntries) error {
for _, entry := range entries {
require.Equal(t, wantEntries[gotEntry], entry)
gotEntry++
}
return nil
}
ls, err := NewSorter(ctx, nil, callback, nil)
require.NoError(t, err)
// Test Add
require.NoError(t, ls.Add(wantEntries[0:2]))
require.NoError(t, ls.Add(wantEntries[2:6]))
assert.Equal(t, 6, gotEntry)
assert.Equal(t, fs.DirEntries(nil), ls.entries)
// Test Send
err = ls.Send()
require.NoError(t, err)
assert.Equal(t, 6, gotEntry)
// Test Cleanup
ls.CleanUp()
assert.Equal(t, 6, gotEntry)
assert.Equal(t, fs.DirEntries(nil), ls.entries)
}
func TestSorterIdentity(t *testing.T) {
ctx := context.Background()
cmpFn := func(a, b fs.DirEntry) int {

View File

@@ -13,6 +13,7 @@ import (
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/dirtree"
"github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/fserrors"
"github.com/rclone/rclone/fs/list"
"github.com/rclone/rclone/fs/walk"
"github.com/rclone/rclone/lib/transform"
@@ -328,7 +329,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO
continue
} else if srcName < srcPrevName {
// this should never happen since we sort the listings
panic("Out of order listing in source")
// however the user may be using the --assume-listings-sorted flag
return fserrors.FatalError(fmt.Errorf("out of order listing in source (%v)", src.Fs()))
}
}
if dst != nil && dstPrev != nil {
@@ -338,7 +340,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO
continue
} else if dstName < dstPrevName {
// this should never happen since we sort the listings
panic("Out of order listing in destination")
// however the user may be using the --assume-listings-sorted flag
return fserrors.FatalError(fmt.Errorf("out of order listing in destination (%v)", dst.Fs()))
}
}
switch {