From 98f539de8f91192e68f8252ad1641747950570e1 Mon Sep 17 00:00:00 2001 From: nielash Date: Thu, 9 Nov 2023 05:04:33 -0500 Subject: [PATCH] bisync: refactor normalization code, fix deltas - fixes #7270 Refactored the case / unicode normalization logic to be much more efficient, and fix the last outstanding issue from #7270. Before this change, we were doing lots of for loops and re-normalizing strings we had already normalized earlier. Now, we leave the normalizing entirely to March and avoid re-transforming later, which seems to make a large difference in terms of performance. --- cmd/bisync/bilib/names.go | 24 +++ cmd/bisync/deltas.go | 145 +++++++++++++++--- cmd/bisync/listing.go | 80 ++++------ cmd/bisync/march.go | 4 + cmd/bisync/operations.go | 26 +--- cmd/bisync/queue.go | 45 +++--- ...testdir_path1.._testdir_path2.copy2to1.que | 2 - ...estdir_path1.._testdir_path2.path2.lst-new | 4 +- .../test_normalization/golden/test.log | 42 ++--- .../testdata/test_normalization/scenario.txt | 4 +- docs/content/bisync.md | 19 +-- 11 files changed, 242 insertions(+), 153 deletions(-) diff --git a/cmd/bisync/bilib/names.go b/cmd/bisync/bilib/names.go index 41c4692b5..3093dff55 100644 --- a/cmd/bisync/bilib/names.go +++ b/cmd/bisync/bilib/names.go @@ -59,3 +59,27 @@ func SaveList(list []string, path string) error { } return os.WriteFile(path, buf.Bytes(), PermSecure) } + +// AliasMap comprises a pair of names that are not equal but treated as equal for comparison purposes +// For example, when normalizing unicode and casing +// This helps reduce repeated normalization functions, which really slow things down +type AliasMap map[string]string + +// Add adds new pair to the set, in both directions +func (am AliasMap) Add(name1, name2 string) { + if name1 != name2 { + am[name1] = name2 + am[name2] = name1 + } +} + +// Alias returns the alternate version, if any, else the original. +func (am AliasMap) Alias(name1 string) string { + // note: we don't need to check normalization settings, because we already did it in March. + // the AliasMap will only exist if March paired up two unequal filenames. + name2, ok := am[name1] + if ok { + return name2 + } + return name1 +} diff --git a/cmd/bisync/deltas.go b/cmd/bisync/deltas.go index d0c0cd8aa..68fb538c3 100644 --- a/cmd/bisync/deltas.go +++ b/cmd/bisync/deltas.go @@ -16,6 +16,7 @@ import ( "github.com/rclone/rclone/fs/accounting" "github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/operations" + "golang.org/x/text/unicode/norm" ) // delta @@ -240,6 +241,9 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change ctxMove := b.opt.setDryRun(ctx) + // update AliasMap for deleted files, as march does not know about them + b.updateAliases(ctx, ds1, ds2) + // efficient isDir check // we load the listing just once and store only the dirs dirs1, dirs1Err := b.listDirsOnly(1) @@ -270,14 +274,24 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change ctxCheck, filterCheck := filter.AddConfig(ctxNew) for _, file := range ds1.sort() { + alias := b.aliases.Alias(file) d1 := ds1.deltas[file] if d1.is(deltaOther) { - d2 := ds2.deltas[file] + d2, in2 := ds2.deltas[file] + if !in2 && file != alias { + d2 = ds2.deltas[alias] + } if d2.is(deltaOther) { - if err := filterCheck.AddFile(file); err != nil { - fs.Debugf(nil, "Non-critical error adding file to list of potential conflicts to check: %s", err) - } else { - fs.Debugf(nil, "Added file to list of potential conflicts to check: %s", file) + checkit := func(filename string) { + if err := filterCheck.AddFile(filename); err != nil { + fs.Debugf(nil, "Non-critical error adding file to list of potential conflicts to check: %s", err) + } else { + fs.Debugf(nil, "Added file to list of potential conflicts to check: %s", filename) + } + } + checkit(file) + if file != alias { + checkit(alias) } } } @@ -287,12 +301,17 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change matches, err := b.checkconflicts(ctxCheck, filterCheck, b.fs1, b.fs2) for _, file := range ds1.sort() { + alias := b.aliases.Alias(file) p1 := path1 + file - p2 := path2 + file + p2 := path2 + alias d1 := ds1.deltas[file] if d1.is(deltaOther) { d2, in2 := ds2.deltas[file] + // try looking under alternate name + if !in2 && file != alias { + d2, in2 = ds2.deltas[alias] + } if !in2 { b.indent("Path1", p2, "Queue copy to Path2") copy1to2.Add(file) @@ -304,15 +323,26 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change b.indent("!WARNING", file, "New or changed in both paths") //if files are identical, leave them alone instead of renaming - if dirs1.has(file) && dirs2.has(file) { + if (dirs1.has(file) || dirs1.has(alias)) && (dirs2.has(file) || dirs2.has(alias)) { fs.Debugf(nil, "This is a directory, not a file. Skipping equality check and will not rename: %s", file) ls1.getPut(file, skippedDirs1) ls2.getPut(file, skippedDirs2) } else { equal := matches.Has(file) + if !equal { + equal = matches.Has(alias) + } if equal { - fs.Infof(nil, "Files are equal! Skipping: %s", file) - renameSkipped.Add(file) + if ciCheck.FixCase && file != alias { + // the content is equal but filename still needs to be FixCase'd, so copy1to2 + // the Path1 version is deemed "correct" in this scenario + fs.Infof(alias, "Files are equal but will copy anyway to fix case to %s", file) + copy1to2.Add(file) + } else { + fs.Infof(nil, "Files are equal! Skipping: %s", file) + renameSkipped.Add(file) + renameSkipped.Add(alias) + } } else { fs.Debugf(nil, "Files are NOT equal: %s", file) b.indent("!Path1", p1+"..path1", "Renaming Path1 copy") @@ -330,17 +360,17 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change copy1to2.Add(file + "..path1") b.indent("!Path2", p2+"..path2", "Renaming Path2 copy") - if err = operations.MoveFile(ctxMove, b.fs2, b.fs2, file+"..path2", file); err != nil { - err = fmt.Errorf("path2 rename failed for %s: %w", file, err) + if err = operations.MoveFile(ctxMove, b.fs2, b.fs2, alias+"..path2", alias); err != nil { + err = fmt.Errorf("path2 rename failed for %s: %w", alias, err) return } if b.opt.DryRun { - renameSkipped.Add(file) + renameSkipped.Add(alias) } else { - renamed2.Add(file) + renamed2.Add(alias) } b.indent("!Path2", p1+"..path2", "Queue copy to Path1") - copy2to1.Add(file + "..path2") + copy2to1.Add(alias + "..path2") } } handled.Add(file) @@ -348,6 +378,15 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change } else { // Path1 deleted d2, in2 := ds2.deltas[file] + // try looking under alternate name + fs.Debugf(file, "alias: %s, in2: %v", alias, in2) + if !in2 && file != alias { + fs.Debugf(file, "looking for alias: %s", alias) + d2, in2 = ds2.deltas[alias] + if in2 { + fs.Debugf(file, "detected alias: %s", alias) + } + } if !in2 { b.indent("Path2", p2, "Queue delete") delete2.Add(file) @@ -359,15 +398,17 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change } else if d2.is(deltaDeleted) { handled.Add(file) deletedonboth.Add(file) + deletedonboth.Add(alias) } } } for _, file := range ds2.sort() { - p1 := path1 + file + alias := b.aliases.Alias(file) + p1 := path1 + alias d2 := ds2.deltas[file] - if handled.Has(file) { + if handled.Has(file) || handled.Has(alias) { continue } if d2.is(deltaOther) { @@ -381,17 +422,11 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change } } - // find alternate names according to normalization settings - altNames1to2 := bilib.Names{} - altNames2to1 := bilib.Names{} - b.findAltNames(ctx, b.fs1, copy2to1, b.newListing1, altNames2to1) - b.findAltNames(ctx, b.fs2, copy1to2, b.newListing2, altNames1to2) - // Do the batch operation if copy2to1.NotEmpty() { changes1 = true b.indent("Path2", "Path1", "Do queued copies to") - results2to1, err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1", altNames2to1) + results2to1, err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1") if err != nil { return } @@ -403,7 +438,7 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change if copy1to2.NotEmpty() { changes2 = true b.indent("Path1", "Path2", "Do queued copies to") - results1to2, err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2", altNames1to2) + results1to2, err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2") if err != nil { return } @@ -458,3 +493,65 @@ func (ds *deltaSet) excessDeletes() bool { maxDelete, ds.deleted, ds.oldCount, ds.msg, quotePath(bilib.FsPath(ds.fs))) return true } + +// normally we build the AliasMap from march results, +// however, march does not know about deleted files, so need to manually check them for aliases +func (b *bisyncRun) updateAliases(ctx context.Context, ds1, ds2 *deltaSet) { + ci := fs.GetConfig(ctx) + // skip if not needed + if ci.NoUnicodeNormalization && !ci.IgnoreCaseSync && !b.fs1.Features().CaseInsensitive && !b.fs2.Features().CaseInsensitive { + return + } + if ds1.deleted < 1 && ds2.deleted < 1 { + return + } + + fs.Debugf(nil, "Updating AliasMap") + + transform := func(s string) string { + if !ci.NoUnicodeNormalization { + s = norm.NFC.String(s) + } + // note: march only checks the dest, but we check both here + if ci.IgnoreCaseSync || b.fs1.Features().CaseInsensitive || b.fs2.Features().CaseInsensitive { + s = strings.ToLower(s) + } + return s + } + + delMap1 := map[string]string{} // [transformedname]originalname + delMap2 := map[string]string{} // [transformedname]originalname + fullMap1 := map[string]string{} // [transformedname]originalname + fullMap2 := map[string]string{} // [transformedname]originalname + + for _, name := range ls1.list { + fullMap1[transform(name)] = name + } + for _, name := range ls2.list { + fullMap2[transform(name)] = name + } + + addDeletes := func(ds *deltaSet, delMap, fullMap map[string]string) { + for _, file := range ds.sort() { + d := ds.deltas[file] + if d.is(deltaDeleted) { + delMap[transform(file)] = file + fullMap[transform(file)] = file + } + } + } + addDeletes(ds1, delMap1, fullMap1) + addDeletes(ds2, delMap2, fullMap2) + + addAliases := func(delMap, fullMap map[string]string) { + for transformedname, name := range delMap { + matchedName, found := fullMap[transformedname] + if found && name != matchedName { + fs.Debugf(name, "adding alias %s", matchedName) + b.aliases.Add(name, matchedName) + } + } + } + addAliases(delMap1, fullMap2) + addAliases(delMap2, fullMap1) +} diff --git a/cmd/bisync/listing.go b/cmd/bisync/listing.go index 70a2aaf5f..5c44101f0 100644 --- a/cmd/bisync/listing.go +++ b/cmd/bisync/listing.go @@ -5,6 +5,7 @@ package bisync import ( "bufio" "context" + "errors" "fmt" "io" "os" @@ -20,7 +21,6 @@ import ( "github.com/rclone/rclone/fs/hash" "github.com/rclone/rclone/fs/operations" "golang.org/x/exp/slices" - "golang.org/x/text/unicode/norm" ) // ListingHeader defines first line of a listing @@ -407,18 +407,6 @@ func ConvertPrecision(Modtime time.Time, dst fs.Fs) time.Time { return Modtime } -// ApplyTransforms handles unicode and case normalization -func ApplyTransforms(ctx context.Context, dst fs.Fs, s string) string { - ci := fs.GetConfig(ctx) - if !ci.NoUnicodeNormalization { - s = norm.NFC.String(s) - } - if ci.IgnoreCaseSync || dst.Features().CaseInsensitive { - s = strings.ToLower(s) - } - return s -} - // modifyListing will modify the listing based on the results of the sync func (b *bisyncRun) modifyListing(ctx context.Context, src fs.Fs, dst fs.Fs, results []Results, queues queues, is1to2 bool) (err error) { queue := queues.copy2to1 @@ -448,18 +436,6 @@ func (b *bisyncRun) modifyListing(ctx context.Context, src fs.Fs, dst fs.Fs, res srcList.hash = src.Hashes().GetOne() dstList.hash = dst.Hashes().GetOne() } - dstListNew, err := b.loadListing(dstListing + "-new") - if err != nil { - return fmt.Errorf("cannot read new listing: %w", err) - } - // for resync only, dstListNew will be empty, so need to use results instead - if b.opt.Resync { - for _, result := range results { - if result.Name != "" && result.IsDst { - dstListNew.put(result.Name, result.Size, result.Modtime, result.Hash, "-", result.Flags) - } - } - } srcWinners := newFileList() dstWinners := newFileList() @@ -471,6 +447,10 @@ func (b *bisyncRun) modifyListing(ctx context.Context, src fs.Fs, dst fs.Fs, res continue } + if result.AltName != "" { + b.aliases.Add(result.Name, result.AltName) + } + if result.Flags == "d" && !b.opt.CreateEmptySrcDirs { continue } @@ -496,6 +476,7 @@ func (b *bisyncRun) modifyListing(ctx context.Context, src fs.Fs, dst fs.Fs, res } } + ci := fs.GetConfig(ctx) updateLists := func(side string, winners, list *fileList) { for _, queueFile := range queue.ToList() { if !winners.has(queueFile) && list.has(queueFile) && !errors.has(queueFile) { @@ -506,28 +487,17 @@ func (b *bisyncRun) modifyListing(ctx context.Context, src fs.Fs, dst fs.Fs, res // copies to side new := winners.get(queueFile) - // handle normalization according to settings - ci := fs.GetConfig(ctx) - if side == "dst" && (!ci.NoUnicodeNormalization || ci.IgnoreCaseSync || dst.Features().CaseInsensitive) { - // search list for existing file that matches queueFile when normalized - normalizedName := ApplyTransforms(ctx, dst, queueFile) - matchFound := false - matchedName := "" - for _, filename := range dstListNew.list { - if ApplyTransforms(ctx, dst, filename) == normalizedName { - matchFound = true - matchedName = filename // original, not normalized - break - } - } - if matchFound && matchedName != queueFile { + // handle normalization + if side == "dst" { + alias := b.aliases.Alias(queueFile) + if alias != queueFile { // use the (non-identical) existing name, unless --fix-case if ci.FixCase { - fs.Debugf(direction, "removing %s and adding %s as --fix-case was specified", matchedName, queueFile) - list.remove(matchedName) + fs.Debugf(direction, "removing %s and adding %s as --fix-case was specified", alias, queueFile) + list.remove(alias) } else { - fs.Debugf(direction, "casing/unicode difference detected. using %s instead of %s", matchedName, queueFile) - queueFile = matchedName + fs.Debugf(direction, "casing/unicode difference detected. using %s instead of %s", alias, queueFile) + queueFile = alias } } } @@ -613,6 +583,16 @@ func (b *bisyncRun) modifyListing(ctx context.Context, src fs.Fs, dst fs.Fs, res } if filterRecheck.HaveFilesFrom() { + // also include any aliases + recheckFiles := filterRecheck.Files() + for recheckFile := range recheckFiles { + alias := b.aliases.Alias(recheckFile) + if recheckFile != alias { + if err := filterRecheck.AddFile(alias); err != nil { + fs.Debugf(alias, "error adding file to recheck filter: %v", err) + } + } + } b.recheck(ctxRecheck, src, dst, srcList, dstList, is1to2) } @@ -661,7 +641,7 @@ func (b *bisyncRun) recheck(ctxRecheck context.Context, src, dst fs.Fs, srcList, for _, srcObj := range srcObjs { fs.Debugf(srcObj, "rechecking") for _, dstObj := range dstObjs { - if srcObj.Remote() == dstObj.Remote() { + if srcObj.Remote() == dstObj.Remote() || srcObj.Remote() == b.aliases.Alias(dstObj.Remote()) { if operations.Equal(ctxRecheck, srcObj, dstObj) || b.opt.DryRun { putObj(srcObj, src, srcList) putObj(dstObj, dst, dstList) @@ -673,8 +653,13 @@ func (b *bisyncRun) recheck(ctxRecheck context.Context, src, dst fs.Fs, srcList, } // if srcObj not resolved by now (either because no dstObj match or files not equal), // roll it back to old version, so it gets retried next time. + // skip and error during --resync, as rollback is not possible if !slices.Contains(resolved, srcObj.Remote()) && !b.opt.DryRun { - toRollback = append(toRollback, srcObj.Remote()) + if b.opt.Resync { + b.handleErr(srcObj, "Unable to rollback during --resync", errors.New("no dstObj match or files not equal"), true, false) + } else { + toRollback = append(toRollback, srcObj.Remote()) + } } } if len(toRollback) > 0 { @@ -683,6 +668,9 @@ func (b *bisyncRun) recheck(ctxRecheck context.Context, src, dst fs.Fs, srcList, b.handleErr(oldSrc, "error loading old src listing", err, true, true) oldDst, err := b.loadListing(dstListing + "-old") b.handleErr(oldDst, "error loading old dst listing", err, true, true) + if b.critical { + return + } for _, item := range toRollback { rollback(item, oldSrc, srcList) diff --git a/cmd/bisync/march.go b/cmd/bisync/march.go index 6e8c5bf81..66d8fdb05 100644 --- a/cmd/bisync/march.go +++ b/cmd/bisync/march.go @@ -15,6 +15,7 @@ var ls1 = newFileList() var ls2 = newFileList() var err error var firstErr error +var marchAliasLock sync.Mutex var marchLsLock sync.Mutex var marchErrLock sync.Mutex var marchCtx context.Context @@ -77,6 +78,9 @@ func (b *bisyncRun) DstOnly(o fs.DirEntry) (recurse bool) { // Match is called when object exists on both path1 and path2 (whether equal or not) func (b *bisyncRun) Match(ctx context.Context, o2, o1 fs.DirEntry) (recurse bool) { fs.Debugf(o1, "both path1 and path2") + marchAliasLock.Lock() + b.aliases.Add(o1.Remote(), o2.Remote()) + marchAliasLock.Unlock() b.parse(o1, true) b.parse(o2, false) return isDir(o1) diff --git a/cmd/bisync/operations.go b/cmd/bisync/operations.go index 70f759343..00d879dbc 100644 --- a/cmd/bisync/operations.go +++ b/cmd/bisync/operations.go @@ -36,6 +36,7 @@ type bisyncRun struct { listing2 string newListing1 string newListing2 string + aliases bilib.AliasMap opt *Options octx context.Context fctx context.Context @@ -90,6 +91,7 @@ func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) { b.listing2 = b.basePath + ".path2.lst" b.newListing1 = b.listing1 + "-new" b.newListing2 = b.listing2 + "-new" + b.aliases = bilib.AliasMap{} // Handle lock file lockFile := "" @@ -448,7 +450,7 @@ func (b *bisyncRun) resync(octx, fctx context.Context) error { } fs.Infof(nil, "Resync updating listings") - b.saveOldListings() + b.saveOldListings() // may not exist, as this is --resync b.replaceCurrentListings() resultsToQueue := func(results []Results) bilib.Names { @@ -496,31 +498,15 @@ func (b *bisyncRun) checkSync(listing1, listing2 string) error { return fmt.Errorf("cannot read prior listing of Path2: %w", err) } - transformList := func(files *fileList, fs fs.Fs) *fileList { - transformed := newFileList() - for _, file := range files.list { - f := files.get(file) - transformed.put(ApplyTransforms(b.fctx, fs, file), f.size, f.time, f.hash, f.id, f.flags) - } - return transformed - } - - files1Transformed := transformList(files1, b.fs1) - files2Transformed := transformList(files2, b.fs2) - - // DEBUG - fs.Debugf(nil, "files1Transformed: %v", files1Transformed) - fs.Debugf(nil, "files2Transformed: %v", files2Transformed) - ok := true for _, file := range files1.list { - if !files2.has(file) && !files2Transformed.has(ApplyTransforms(b.fctx, b.fs1, file)) { + if !files2.has(file) && !files2.has(b.aliases.Alias(file)) { b.indent("ERROR", file, "Path1 file not found in Path2") ok = false } } for _, file := range files2.list { - if !files1.has(file) && !files1Transformed.has(ApplyTransforms(b.fctx, b.fs2, file)) { + if !files1.has(file) && !files1.has(b.aliases.Alias(file)) { b.indent("ERROR", file, "Path2 file not found in Path1") ok = false } @@ -580,7 +566,7 @@ func (b *bisyncRun) handleErr(o interface{}, msg string, err error, critical, re b.critical = true fs.Errorf(o, "%s: %v", msg, err) } else { - fs.Debugf(o, "%s: %v", msg, err) + fs.Infof(o, "%s: %v", msg, err) } } } diff --git a/cmd/bisync/queue.go b/cmd/bisync/queue.go index 12bcb9b69..f08a82f89 100644 --- a/cmd/bisync/queue.go +++ b/cmd/bisync/queue.go @@ -22,6 +22,7 @@ type Results struct { Src string Dst string Name string + AltName string Size int64 Modtime time.Time Hash string @@ -58,6 +59,21 @@ func resultName(result Results, side, src, dst fs.DirEntry) string { return "" } +// returns the opposite side's name, only if different +func altName(name string, src, dst fs.DirEntry) string { + if src != nil && dst != nil { + if src.Remote() != dst.Remote() { + switch name { + case src.Remote(): + return dst.Remote() + case dst.Remote(): + return src.Remote() + } + } + } + return "" +} + // WriteResults is Bisync's LoggerFn func WriteResults(ctx context.Context, sigil operations.Sigil, src, dst fs.DirEntry, err error) { lock.Lock() @@ -77,6 +93,7 @@ func WriteResults(ctx context.Context, sigil operations.Sigil, src, dst fs.DirEn for i, side := range fss { result.Name = resultName(result, side, src, dst) + result.AltName = altName(result.Name, src, dst) result.IsSrc = i == 0 result.IsDst = i == 1 result.Flags = "-" @@ -129,7 +146,7 @@ func ReadResults(results io.Reader) []Results { return slice } -func (b *bisyncRun) fastCopy(ctx context.Context, fsrc, fdst fs.Fs, files bilib.Names, queueName string, altNames bilib.Names) ([]Results, error) { +func (b *bisyncRun) fastCopy(ctx context.Context, fsrc, fdst fs.Fs, files bilib.Names, queueName string) ([]Results, error) { if err := b.saveQueue(files, queueName); err != nil { return nil, err } @@ -139,10 +156,9 @@ func (b *bisyncRun) fastCopy(ctx context.Context, fsrc, fdst fs.Fs, files bilib. if err := filterCopy.AddFile(file); err != nil { return nil, err } - } - if altNames.NotEmpty() { - for _, file := range altNames.ToList() { - if err := filterCopy.AddFile(file); err != nil { + alias := b.aliases.Alias(file) + if alias != file { + if err := filterCopy.AddFile(alias); err != nil { return nil, err } } @@ -249,22 +265,3 @@ func (b *bisyncRun) saveQueue(files bilib.Names, jobName string) error { queueFile := fmt.Sprintf("%s.%s.que", b.basePath, jobName) return files.Save(queueFile) } - -func (b *bisyncRun) findAltNames(ctx context.Context, dst fs.Fs, queue bilib.Names, newListing string, altNames bilib.Names) { - ci := fs.GetConfig(ctx) - if queue.NotEmpty() && (!ci.NoUnicodeNormalization || ci.IgnoreCaseSync || b.fs1.Features().CaseInsensitive || b.fs2.Features().CaseInsensitive) { - // search list for existing file that matches queueFile when normalized - for _, queueFile := range queue.ToList() { - normalizedName := ApplyTransforms(ctx, dst, queueFile) - candidates, err := b.loadListing(newListing) - if err != nil { - fs.Errorf(candidates, "cannot read new listing: %v", err) - } - for _, filename := range candidates.list { - if ApplyTransforms(ctx, dst, filename) == normalizedName && filename != queueFile { - altNames.Add(filename) // original, not normalized - } - } - } - } -} diff --git a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.copy2to1.que b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.copy2to1.que index 85deec9f9..029f3e190 100644 --- a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.copy2to1.que +++ b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.copy2to1.que @@ -1,3 +1 @@ "file1.txt" -"folder/hello,WORLD!.txt" -"folder/éééö.txt" diff --git a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-new b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-new index 225fe66c1..9afa4b347 100644 --- a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-new +++ b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-new @@ -1,5 +1,5 @@ # bisync listing v1 from test - 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-05T00:00:00.000000000+0000 "file1.txt" -- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-05T00:00:00.000000000+0000 "folder/hello,WORLD!.txt" -- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-05T00:00:00.000000000+0000 "folder/éééö.txt" +- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/hello,WORLD!.txt" +- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/éééö.txt" - 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt" diff --git a/cmd/bisync/testdata/test_normalization/golden/test.log b/cmd/bisync/testdata/test_normalization/golden/test.log index 54045e130..db66b5e75 100644 --- a/cmd/bisync/testdata/test_normalization/golden/test.log +++ b/cmd/bisync/testdata/test_normalization/golden/test.log @@ -36,16 +36,20 @@ INFO : - Path2 File is new - f INFO : - Path2 File is new - folder/hello,WORLD!.txt INFO : Path2: 3 changes: 2 new, 1 newer, 0 older, 0 deleted INFO : Applying changes -INFO : - Path1 Queue copy to Path2 - {path2/}folder/HeLlO,wOrLd!.txt -INFO : - Path1 Queue copy to Path2 - {path2/}folder/éééö.txt +INFO : Checking potential conflicts... +NOTICE: Local file system at {path2}: 0 differences found +NOTICE: Local file system at {path2}: 2 matching files +INFO : Finished checking the potential conflicts. %!s() +NOTICE: - WARNING New or changed in both paths - folder/HeLlO,wOrLd!.txt +INFO : folder/hello,WORLD!.txt: Files are equal but will copy anyway to fix case to folder/HeLlO,wOrLd!.txt +NOTICE: - WARNING New or changed in both paths - folder/éééö.txt +INFO : folder/éééö.txt: Files are equal but will copy anyway to fix case to folder/éééö.txt INFO : - Path1 Queue copy to Path2 - "{path2/}測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt" INFO : - Path2 Queue copy to Path1 - {path1/}file1.txt -INFO : - Path2 Queue copy to Path1 - {path1/}folder/éééö.txt -INFO : - Path2 Queue copy to Path1 - {path1/}folder/hello,WORLD!.txt INFO : - Path2 Do queued copies to - Path1 -INFO : folder/HeLlO,wOrLd!.txt: Fixed case by renaming to: folder/hello,WORLD!.txt -INFO : folder/éééö.txt: Fixed case by renaming to: folder/éééö.txt INFO : - Path1 Do queued copies to - Path2 +INFO : folder/hello,WORLD!.txt: Fixed case by renaming to: folder/HeLlO,wOrLd!.txt +INFO : folder/éééö.txt: Fixed case by renaming to: folder/éééö.txt INFO : Updating listings INFO : Validating listings for Path1 "{path1/}" vs Path2 "{path2/}" INFO : Bisync successful @@ -87,12 +91,16 @@ INFO : - Path2 File is new - f INFO : - Path2 File is new - folder/hello,WORLD!.txt INFO : Path2: 3 changes: 2 new, 1 newer, 0 older, 0 deleted INFO : Applying changes -INFO : - Path1 Queue copy to Path2 - {path2/}folder/HeLlO,wOrLd!.txt -INFO : - Path1 Queue copy to Path2 - {path2/}folder/éééö.txt +INFO : Checking potential conflicts... +NOTICE: Local file system at {path2}: 0 differences found +NOTICE: Local file system at {path2}: 2 matching files +INFO : Finished checking the potential conflicts. %!s() +NOTICE: - WARNING New or changed in both paths - folder/HeLlO,wOrLd!.txt +INFO : Files are equal! Skipping: folder/HeLlO,wOrLd!.txt +NOTICE: - WARNING New or changed in both paths - folder/éééö.txt +INFO : Files are equal! Skipping: folder/éééö.txt INFO : - Path1 Queue copy to Path2 - "{path2/}測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt" INFO : - Path2 Queue copy to Path1 - {path1/}file1.txt -INFO : - Path2 Queue copy to Path1 - {path1/}folder/éééö.txt -INFO : - Path2 Queue copy to Path1 - {path1/}folder/hello,WORLD!.txt INFO : - Path2 Do queued copies to - Path1 INFO : - Path1 Do queued copies to - Path2 INFO : Updating listings @@ -108,14 +116,12 @@ INFO : - Path1 Resync is copying UNIQUE OR DIFFERING files to INFO : Resync updating listings INFO : Bisync successful -(27) : test changed on both paths +(27) : test changed on one path (28) : touch-copy 2001-01-05 {datadir/}file1.txt {path2/} (29) : copy-as-NFC {datadir/}file1.txt {path1/}測試_Русский___ě_áñ👸🏼🧝🏾‍♀️💆🏿‍♂️🐨🤙🏼🤮🧑🏻‍🔧🧑‍🔬éééö 測試_Русский___ě_áñ👸🏼🧝🏾‍♀️💆🏿‍♂️🐨🤙🏼🤮🧑🏻‍🔧🧑‍🔬éééö.txt (30) : copy-as-NFC {datadir/}file1.txt {path1/}folder éééö.txt (31) : copy-as-NFC {datadir/}file1.txt {path1/}folder HeLlO,wOrLd!.txt -(32) : copy-as-NFD {datadir/}file1.txt {path2/}folder éééö.txt -(33) : copy-as-NFD {datadir/}file1.txt {path2/}folder hello,WORLD!.txt -(34) : bisync norm +(32) : bisync norm INFO : Synching Path1 "{path1/}" with Path2 "{path2/}" INFO : Building Path1 and Path2 listings INFO : Path1 checking for diffs @@ -125,16 +131,12 @@ INFO : - Path1 File is newer - " INFO : Path1: 3 changes: 0 new, 3 newer, 0 older, 0 deleted INFO : Path2 checking for diffs INFO : - Path2 File is newer - file1.txt -INFO : - Path2 File is newer - folder/éééö.txt -INFO : - Path2 File is newer - folder/hello,WORLD!.txt -INFO : Path2: 3 changes: 0 new, 3 newer, 0 older, 0 deleted +INFO : Path2: 1 changes: 0 new, 1 newer, 0 older, 0 deleted INFO : Applying changes -INFO : - Path1 Queue copy to Path2 - {path2/}folder/HeLlO,wOrLd!.txt +INFO : - Path1 Queue copy to Path2 - {path2/}folder/hello,WORLD!.txt INFO : - Path1 Queue copy to Path2 - {path2/}folder/éééö.txt INFO : - Path1 Queue copy to Path2 - "{path2/}測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt" INFO : - Path2 Queue copy to Path1 - {path1/}file1.txt -INFO : - Path2 Queue copy to Path1 - {path1/}folder/éééö.txt -INFO : - Path2 Queue copy to Path1 - {path1/}folder/hello,WORLD!.txt INFO : - Path2 Do queued copies to - Path1 INFO : - Path1 Do queued copies to - Path2 INFO : Updating listings diff --git a/cmd/bisync/testdata/test_normalization/scenario.txt b/cmd/bisync/testdata/test_normalization/scenario.txt index e44265fd2..bc42bbdb5 100644 --- a/cmd/bisync/testdata/test_normalization/scenario.txt +++ b/cmd/bisync/testdata/test_normalization/scenario.txt @@ -43,11 +43,9 @@ bisync norm force test resync bisync resync norm -test changed on both paths +test changed on one path touch-copy 2001-01-05 {datadir/}file1.txt {path2/} copy-as-NFC {datadir/}file1.txt {path1/}測試_Русский___ě_áñ👸🏼🧝🏾‍♀️💆🏿‍♂️🐨🤙🏼🤮🧑🏻‍🔧🧑‍🔬éééö 測試_Русский___ě_áñ👸🏼🧝🏾‍♀️💆🏿‍♂️🐨🤙🏼🤮🧑🏻‍🔧🧑‍🔬éééö.txt copy-as-NFC {datadir/}file1.txt {path1/}folder éééö.txt copy-as-NFC {datadir/}file1.txt {path1/}folder HeLlO,wOrLd!.txt -copy-as-NFD {datadir/}file1.txt {path2/}folder éééö.txt -copy-as-NFD {datadir/}file1.txt {path2/}folder hello,WORLD!.txt bisync norm \ No newline at end of file diff --git a/docs/content/bisync.md b/docs/content/bisync.md index 165e73db2..bfde514cd 100644 --- a/docs/content/bisync.md +++ b/docs/content/bisync.md @@ -591,17 +591,8 @@ instead of specifying them with command flags. (You can still override them as n ### Case (and unicode) sensitivity {#case-sensitivity} -Synching with **case-insensitive** filesystems, such as Windows or `Box`, -can result in unusual behavior. As of `v1.65`, case and unicode form differences no longer cause critical errors, -however they may cause unexpected delta outcomes, due to the delta engine still being case-sensitive. -This will be fixed in a future release. The near-term workaround is to make sure that files on both sides -don't have spelling case differences (`Smile.jpg` vs. `smile.jpg`). - -The same limitation applies to Unicode normalization forms. -This [particularly applies to macOS](https://github.com/rclone/rclone/issues/7270), -which prefers NFD and sometimes auto-converts filenames from the NFC form used by most other platforms. -This should no longer cause bisync to fail entirely, but may cause surprising delta results, as explained above. - +As of `v1.65`, case and unicode form differences no longer cause critical errors, +and normalization (when comparing between filesystems) is handled according to the same flags and defaults as `rclone sync`. See the following options (all of which are supported by bisync) to control this behavior more granularly: - [`--fix-case`](/docs/#fix-case) - [`--ignore-case-sync`](/docs/#ignore-case-sync) @@ -609,6 +600,10 @@ See the following options (all of which are supported by bisync) to control this - [`--local-unicode-normalization`](/local/#local-unicode-normalization) and [`--local-case-sensitive`](/local/#local-case-sensitive) (caution: these are normally not what you want.) +Note that in the (probably rare) event that `--fix-case` is used AND a file is new/changed on both sides +AND the checksums match AND the filename case does not match, the Path1 filename is considered the winner, +for the purposes of `--fix-case` (Path2 will be renamed to match it). + ## Windows support {#windows} Bisync has been tested on Windows 8.1, Windows 10 Pro 64-bit and on Windows @@ -1292,7 +1287,7 @@ about _Unison_ and synchronization in general. * A few basic terminal colors are now supported, controllable with [`--color`](/docs/#color-when) (`AUTO`|`NEVER`|`ALWAYS`) * Initial listing snapshots of Path1 and Path2 are now generated concurrently, using the same "march" infrastructure as `check` and `sync`, for performance improvements and less [risk of error](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=4.%20Listings%20should%20alternate%20between%20paths%20to%20minimize%20errors). -* Better handling of unicode normalization and case insensitivity, support for [`--fix-case`](/docs/#fix-case), [`--ignore-case-sync`](/docs/#ignore-case-sync), [`--no-unicode-normalization`](/docs/#no-unicode-normalization) +* Fixed handling of unicode normalization and case insensitivity, support for [`--fix-case`](/docs/#fix-case), [`--ignore-case-sync`](/docs/#ignore-case-sync), [`--no-unicode-normalization`](/docs/#no-unicode-normalization) * `--resync` is now much more efficient (especially for users of `--create-empty-src-dirs`) ### `v1.64`