2019-09-04 23:30:48 +03:00
|
|
|
package vfs
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"os"
|
|
|
|
"testing"
|
|
|
|
|
vfs: fix unicode normalization on macOS - fixes #7072
Before this change, the VFS layer did not properly handle unicode normalization,
which caused problems particularly for users of macOS. While attempts were made
to handle it with various `-o modules=iconv` combinations, this was an imperfect
solution, as no one combination allowed both NFC and NFD content to
simultaneously be both visible and editable via Finder.
After this change, the VFS supports `--no-unicode-normalization` (default `false`)
via the existing `--vfs-case-insensitive` logic, which is extended to apply to both
case insensitivity and unicode normalization form.
This change also adds an additional flag, `--vfs-block-norm-dupes`, to address a
probably rare but potentially possible scenario where a directory contains
multiple duplicate filenames after applying case and unicode normalization
settings. In such a scenario, this flag (disabled by default) hides the
duplicates. This comes with a performance tradeoff, as rclone will have to scan
the entire directory for duplicates when listing a directory. For this reason,
it is recommended to leave this disabled if not needed. However, macOS users may
wish to consider using it, as otherwise, if a remote directory contains both NFC
and NFD versions of the same filename, an odd situation will occur: both
versions of the file will be visible in the mount, and both will appear to be
editable, however, editing either version will actually result in only the NFD
version getting edited under the hood. `--vfs-block-norm-dupes` prevents this
confusion by detecting this scenario, hiding the duplicates, and logging an
error, similar to how this is handled in `rclone sync`.
2024-02-05 02:58:11 -05:00
|
|
|
"github.com/rclone/rclone/fs"
|
2019-09-04 23:30:48 +03:00
|
|
|
"github.com/rclone/rclone/fstest"
|
2020-02-28 14:44:15 +00:00
|
|
|
"github.com/rclone/rclone/vfs/vfscommon"
|
2019-09-04 23:30:48 +03:00
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
"github.com/stretchr/testify/require"
|
vfs: fix unicode normalization on macOS - fixes #7072
Before this change, the VFS layer did not properly handle unicode normalization,
which caused problems particularly for users of macOS. While attempts were made
to handle it with various `-o modules=iconv` combinations, this was an imperfect
solution, as no one combination allowed both NFC and NFD content to
simultaneously be both visible and editable via Finder.
After this change, the VFS supports `--no-unicode-normalization` (default `false`)
via the existing `--vfs-case-insensitive` logic, which is extended to apply to both
case insensitivity and unicode normalization form.
This change also adds an additional flag, `--vfs-block-norm-dupes`, to address a
probably rare but potentially possible scenario where a directory contains
multiple duplicate filenames after applying case and unicode normalization
settings. In such a scenario, this flag (disabled by default) hides the
duplicates. This comes with a performance tradeoff, as rclone will have to scan
the entire directory for duplicates when listing a directory. For this reason,
it is recommended to leave this disabled if not needed. However, macOS users may
wish to consider using it, as otherwise, if a remote directory contains both NFC
and NFD versions of the same filename, an odd situation will occur: both
versions of the file will be visible in the mount, and both will appear to be
editable, however, editing either version will actually result in only the NFD
version getting edited under the hood. `--vfs-block-norm-dupes` prevents this
confusion by detecting this scenario, hiding the duplicates, and logging an
error, similar to how this is handled in `rclone sync`.
2024-02-05 02:58:11 -05:00
|
|
|
"golang.org/x/text/unicode/norm"
|
2019-09-04 23:30:48 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestCaseSensitivity(t *testing.T) {
|
|
|
|
r := fstest.NewRun(t)
|
|
|
|
|
2019-09-06 12:44:59 +03:00
|
|
|
if r.Fremote.Features().CaseInsensitive {
|
|
|
|
t.Skip("Can't test case sensitivity - this remote is officially not case-sensitive")
|
|
|
|
}
|
|
|
|
|
2019-09-04 23:30:48 +03:00
|
|
|
// Create test files
|
|
|
|
ctx := context.Background()
|
|
|
|
file1 := r.WriteObject(ctx, "FiLeA", "data1", t1)
|
|
|
|
file2 := r.WriteObject(ctx, "FiLeB", "data2", t2)
|
2021-11-09 19:43:36 +08:00
|
|
|
r.CheckRemoteItems(t, file1, file2)
|
2019-09-04 23:30:48 +03:00
|
|
|
|
|
|
|
// Create file3 with name differing from file2 name only by case.
|
|
|
|
// On a case-Sensitive remote this will be a separate file.
|
|
|
|
// On a case-INsensitive remote this file will either not exist
|
|
|
|
// or overwrite file2 depending on how file system diverges.
|
2019-09-06 12:44:59 +03:00
|
|
|
// On a box.com remote this step will even fail.
|
2019-09-04 23:30:48 +03:00
|
|
|
file3 := r.WriteObject(ctx, "FilEb", "data3", t3)
|
|
|
|
|
|
|
|
// Create a case-Sensitive and case-INsensitive VFS
|
2024-07-03 11:34:29 +01:00
|
|
|
optCS := vfscommon.Opt
|
2019-09-04 23:30:48 +03:00
|
|
|
optCS.CaseInsensitive = false
|
|
|
|
vfsCS := New(r.Fremote, &optCS)
|
2020-04-17 11:18:58 +01:00
|
|
|
defer cleanupVFS(t, vfsCS)
|
2019-09-04 23:30:48 +03:00
|
|
|
|
2024-07-03 11:34:29 +01:00
|
|
|
optCI := vfscommon.Opt
|
2019-09-04 23:30:48 +03:00
|
|
|
optCI.CaseInsensitive = true
|
|
|
|
vfsCI := New(r.Fremote, &optCI)
|
2020-04-17 11:18:58 +01:00
|
|
|
defer cleanupVFS(t, vfsCI)
|
2019-09-04 23:30:48 +03:00
|
|
|
|
|
|
|
// Run basic checks that must pass on VFS of any type.
|
|
|
|
assertFileDataVFS(t, vfsCI, "FiLeA", "data1")
|
|
|
|
assertFileDataVFS(t, vfsCS, "FiLeA", "data1")
|
|
|
|
|
|
|
|
// Detect case sensitivity of the underlying remote.
|
|
|
|
remoteIsOK := true
|
|
|
|
if !checkFileDataVFS(t, vfsCS, "FiLeA", "data1") {
|
|
|
|
remoteIsOK = false
|
|
|
|
}
|
|
|
|
if !checkFileDataVFS(t, vfsCS, "FiLeB", "data2") {
|
|
|
|
remoteIsOK = false
|
|
|
|
}
|
|
|
|
if !checkFileDataVFS(t, vfsCS, "FilEb", "data3") {
|
|
|
|
remoteIsOK = false
|
|
|
|
}
|
|
|
|
|
|
|
|
// The remaining test is only meaningful on a case-Sensitive file system.
|
|
|
|
if !remoteIsOK {
|
2019-09-06 12:44:59 +03:00
|
|
|
t.Skip("Can't test case sensitivity - this remote doesn't comply as case-sensitive")
|
2019-09-04 23:30:48 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Continue with test as the underlying remote is fully case-Sensitive.
|
2021-11-09 19:43:36 +08:00
|
|
|
r.CheckRemoteItems(t, file1, file2, file3)
|
2019-09-04 23:30:48 +03:00
|
|
|
|
|
|
|
// See how VFS handles case-INsensitive flag
|
|
|
|
assertFileDataVFS(t, vfsCI, "FiLeA", "data1")
|
|
|
|
assertFileDataVFS(t, vfsCI, "fileA", "data1")
|
|
|
|
assertFileDataVFS(t, vfsCI, "filea", "data1")
|
|
|
|
assertFileDataVFS(t, vfsCI, "FILEA", "data1")
|
|
|
|
|
|
|
|
assertFileDataVFS(t, vfsCI, "FiLeB", "data2")
|
|
|
|
assertFileDataVFS(t, vfsCI, "FilEb", "data3")
|
|
|
|
|
|
|
|
fd, err := vfsCI.OpenFile("fileb", os.O_RDONLY, 0777)
|
|
|
|
assert.Nil(t, fd)
|
|
|
|
assert.Error(t, err)
|
|
|
|
assert.NotEqual(t, err, ENOENT)
|
|
|
|
|
|
|
|
fd, err = vfsCI.OpenFile("FILEB", os.O_RDONLY, 0777)
|
|
|
|
assert.Nil(t, fd)
|
|
|
|
assert.Error(t, err)
|
|
|
|
assert.NotEqual(t, err, ENOENT)
|
|
|
|
|
|
|
|
// Run the same set of checks with case-Sensitive VFS, for comparison.
|
|
|
|
assertFileDataVFS(t, vfsCS, "FiLeA", "data1")
|
|
|
|
|
|
|
|
assertFileAbsentVFS(t, vfsCS, "fileA")
|
|
|
|
assertFileAbsentVFS(t, vfsCS, "filea")
|
|
|
|
assertFileAbsentVFS(t, vfsCS, "FILEA")
|
|
|
|
|
|
|
|
assertFileDataVFS(t, vfsCS, "FiLeB", "data2")
|
|
|
|
assertFileDataVFS(t, vfsCS, "FilEb", "data3")
|
|
|
|
|
|
|
|
assertFileAbsentVFS(t, vfsCS, "fileb")
|
|
|
|
assertFileAbsentVFS(t, vfsCS, "FILEB")
|
|
|
|
}
|
|
|
|
|
|
|
|
func checkFileDataVFS(t *testing.T, vfs *VFS, name string, expect string) bool {
|
|
|
|
fd, err := vfs.OpenFile(name, os.O_RDONLY, 0777)
|
|
|
|
if fd == nil || err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
// File must be closed - otherwise Run.cleanUp() will fail on Windows.
|
|
|
|
_ = fd.Close()
|
|
|
|
}()
|
|
|
|
|
|
|
|
fh, ok := fd.(*ReadFileHandle)
|
|
|
|
if !ok {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
size := len(expect)
|
|
|
|
buf := make([]byte, size)
|
|
|
|
num, err := fh.Read(buf)
|
|
|
|
if err != nil || num != size {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return string(buf) == expect
|
|
|
|
}
|
|
|
|
|
|
|
|
func assertFileDataVFS(t *testing.T, vfs *VFS, name string, expect string) {
|
|
|
|
fd, errOpen := vfs.OpenFile(name, os.O_RDONLY, 0777)
|
|
|
|
assert.NotNil(t, fd)
|
|
|
|
assert.NoError(t, errOpen)
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
// File must be closed - otherwise Run.cleanUp() will fail on Windows.
|
|
|
|
if errOpen == nil && fd != nil {
|
|
|
|
_ = fd.Close()
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
fh, ok := fd.(*ReadFileHandle)
|
|
|
|
require.True(t, ok)
|
|
|
|
|
|
|
|
size := len(expect)
|
|
|
|
buf := make([]byte, size)
|
|
|
|
numRead, errRead := fh.Read(buf)
|
|
|
|
assert.NoError(t, errRead)
|
|
|
|
assert.Equal(t, numRead, size)
|
|
|
|
|
|
|
|
assert.Equal(t, string(buf), expect)
|
|
|
|
}
|
|
|
|
|
|
|
|
func assertFileAbsentVFS(t *testing.T, vfs *VFS, name string) {
|
|
|
|
fd, err := vfs.OpenFile(name, os.O_RDONLY, 0777)
|
|
|
|
defer func() {
|
|
|
|
// File must be closed - otherwise Run.cleanUp() will fail on Windows.
|
|
|
|
if err == nil && fd != nil {
|
|
|
|
_ = fd.Close()
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
assert.Nil(t, fd)
|
|
|
|
assert.Error(t, err)
|
|
|
|
assert.Equal(t, err, ENOENT)
|
|
|
|
}
|
vfs: fix unicode normalization on macOS - fixes #7072
Before this change, the VFS layer did not properly handle unicode normalization,
which caused problems particularly for users of macOS. While attempts were made
to handle it with various `-o modules=iconv` combinations, this was an imperfect
solution, as no one combination allowed both NFC and NFD content to
simultaneously be both visible and editable via Finder.
After this change, the VFS supports `--no-unicode-normalization` (default `false`)
via the existing `--vfs-case-insensitive` logic, which is extended to apply to both
case insensitivity and unicode normalization form.
This change also adds an additional flag, `--vfs-block-norm-dupes`, to address a
probably rare but potentially possible scenario where a directory contains
multiple duplicate filenames after applying case and unicode normalization
settings. In such a scenario, this flag (disabled by default) hides the
duplicates. This comes with a performance tradeoff, as rclone will have to scan
the entire directory for duplicates when listing a directory. For this reason,
it is recommended to leave this disabled if not needed. However, macOS users may
wish to consider using it, as otherwise, if a remote directory contains both NFC
and NFD versions of the same filename, an odd situation will occur: both
versions of the file will be visible in the mount, and both will appear to be
editable, however, editing either version will actually result in only the NFD
version getting edited under the hood. `--vfs-block-norm-dupes` prevents this
confusion by detecting this scenario, hiding the duplicates, and logging an
error, similar to how this is handled in `rclone sync`.
2024-02-05 02:58:11 -05:00
|
|
|
|
|
|
|
func TestUnicodeNormalization(t *testing.T) {
|
|
|
|
r := fstest.NewRun(t)
|
|
|
|
|
|
|
|
var (
|
|
|
|
nfc = norm.NFC.String(norm.NFD.String("測試_Русский___ě_áñ"))
|
|
|
|
nfd = norm.NFD.String(nfc)
|
|
|
|
both = "normal name with no special characters.txt"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Create test files
|
|
|
|
ctx := context.Background()
|
|
|
|
file1 := r.WriteObject(ctx, both, "data1", t1)
|
|
|
|
file2 := r.WriteObject(ctx, nfc, "data2", t2)
|
|
|
|
r.CheckRemoteItems(t, file1, file2)
|
|
|
|
|
|
|
|
// Create VFS
|
2024-07-03 11:34:29 +01:00
|
|
|
opt := vfscommon.Opt
|
vfs: fix unicode normalization on macOS - fixes #7072
Before this change, the VFS layer did not properly handle unicode normalization,
which caused problems particularly for users of macOS. While attempts were made
to handle it with various `-o modules=iconv` combinations, this was an imperfect
solution, as no one combination allowed both NFC and NFD content to
simultaneously be both visible and editable via Finder.
After this change, the VFS supports `--no-unicode-normalization` (default `false`)
via the existing `--vfs-case-insensitive` logic, which is extended to apply to both
case insensitivity and unicode normalization form.
This change also adds an additional flag, `--vfs-block-norm-dupes`, to address a
probably rare but potentially possible scenario where a directory contains
multiple duplicate filenames after applying case and unicode normalization
settings. In such a scenario, this flag (disabled by default) hides the
duplicates. This comes with a performance tradeoff, as rclone will have to scan
the entire directory for duplicates when listing a directory. For this reason,
it is recommended to leave this disabled if not needed. However, macOS users may
wish to consider using it, as otherwise, if a remote directory contains both NFC
and NFD versions of the same filename, an odd situation will occur: both
versions of the file will be visible in the mount, and both will appear to be
editable, however, editing either version will actually result in only the NFD
version getting edited under the hood. `--vfs-block-norm-dupes` prevents this
confusion by detecting this scenario, hiding the duplicates, and logging an
error, similar to how this is handled in `rclone sync`.
2024-02-05 02:58:11 -05:00
|
|
|
vfs := New(r.Fremote, &opt)
|
|
|
|
defer cleanupVFS(t, vfs)
|
|
|
|
|
|
|
|
// assert that both files are found under NFD-normalized names
|
|
|
|
assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1")
|
|
|
|
assertFileDataVFS(t, vfs, nfd, "data2")
|
|
|
|
|
|
|
|
// change ci.NoUnicodeNormalization to true and verify that only file1 is found
|
|
|
|
ci := fs.GetConfig(ctx) // need to set the global config here as the *Dir methods don't take a ctx param
|
|
|
|
oldVal := ci.NoUnicodeNormalization
|
|
|
|
defer func() { fs.GetConfig(ctx).NoUnicodeNormalization = oldVal }() // restore the prior value after the test
|
|
|
|
ci.NoUnicodeNormalization = true
|
|
|
|
assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1")
|
|
|
|
assertFileAbsentVFS(t, vfs, nfd)
|
|
|
|
}
|