2014-03-28 20:56:04 +03:00
// Generic operations on filesystems and objects
package fs
import (
"fmt"
2014-08-01 19:58:39 +03:00
"io"
2016-03-05 18:10:51 +02:00
"log"
2015-03-01 14:38:31 +02:00
"mime"
"path"
2016-03-05 18:10:51 +02:00
"sort"
2016-01-23 22:16:47 +02:00
"strings"
2014-03-28 20:56:04 +03:00
"sync"
2015-10-02 20:48:48 +02:00
"sync/atomic"
2015-06-03 16:08:27 +02:00
"time"
2016-01-23 22:16:47 +02:00
2016-06-12 16:06:02 +02:00
"github.com/pkg/errors"
2016-01-23 22:16:47 +02:00
"golang.org/x/text/unicode/norm"
2014-03-28 20:56:04 +03:00
)
2015-09-22 19:47:16 +02:00
// CalculateModifyWindow works out modify window for Fses passed in -
// sets Config.ModifyWindow
2014-03-28 20:56:04 +03:00
//
// This is the largest modify window of all the fses in use, and the
// user configured value
func CalculateModifyWindow ( fs ... Fs ) {
for _ , f := range fs {
if f != nil {
precision := f . Precision ( )
if precision > Config . ModifyWindow {
Config . ModifyWindow = precision
}
2015-08-20 21:48:58 +02:00
if precision == ModTimeNotSupported {
Debug ( f , "Modify window not supported" )
return
}
2014-03-28 20:56:04 +03:00
}
}
2015-08-20 21:48:58 +02:00
Debug ( fs [ 0 ] , "Modify window is %s" , Config . ModifyWindow )
2014-03-28 20:56:04 +03:00
}
2016-01-11 14:39:33 +02:00
// HashEquals checks to see if src == dst, but ignores empty strings
// and returns true if either is empty.
func HashEquals ( src , dst string ) bool {
2015-08-17 00:24:34 +02:00
if src == "" || dst == "" {
return true
}
return src == dst
}
2016-01-11 14:39:33 +02:00
// CheckHashes checks the two files to see if they have common
// known hash types and compares them
2014-03-28 20:56:04 +03:00
//
2016-01-24 20:06:57 +02:00
// Returns
2015-08-20 21:48:58 +02:00
//
2016-01-24 20:06:57 +02:00
// equal - which is equality of the hashes
//
// hash - the HashType. This is HashNone if either of the hashes were
// unset or a compatible hash couldn't be found.
//
// err - may return an error which will already have been logged
2014-03-28 20:56:04 +03:00
//
2015-08-20 21:48:58 +02:00
// If an error is returned it will return equal as false
2016-01-24 20:06:57 +02:00
func CheckHashes ( src , dst Object ) ( equal bool , hash HashType , err error ) {
2016-01-11 14:39:33 +02:00
common := src . Fs ( ) . Hashes ( ) . Overlap ( dst . Fs ( ) . Hashes ( ) )
2016-01-24 20:06:57 +02:00
// Debug(nil, "Shared hashes: %v", common)
2016-01-11 14:39:33 +02:00
if common . Count ( ) == 0 {
2016-01-24 20:06:57 +02:00
return true , HashNone , nil
2016-01-11 14:39:33 +02:00
}
2016-01-24 20:06:57 +02:00
hash = common . GetOne ( )
srcHash , err := src . Hash ( hash )
2014-03-28 20:56:04 +03:00
if err != nil {
Stats . Error ( )
2016-06-18 10:32:14 +02:00
ErrorLog ( src , "Failed to calculate src hash: %v" , err )
2016-01-24 20:06:57 +02:00
return false , hash , err
2015-08-20 21:48:58 +02:00
}
2016-01-11 14:39:33 +02:00
if srcHash == "" {
2016-01-24 20:06:57 +02:00
return true , HashNone , nil
2014-03-28 20:56:04 +03:00
}
2016-01-24 20:06:57 +02:00
dstHash , err := dst . Hash ( hash )
2014-03-28 20:56:04 +03:00
if err != nil {
Stats . Error ( )
2016-06-18 10:32:14 +02:00
ErrorLog ( dst , "Failed to calculate dst hash: %v" , err )
2016-01-24 20:06:57 +02:00
return false , hash , err
2015-08-20 21:48:58 +02:00
}
2016-01-11 14:39:33 +02:00
if dstHash == "" {
2016-01-24 20:06:57 +02:00
return true , HashNone , nil
2014-03-28 20:56:04 +03:00
}
2016-01-24 20:06:57 +02:00
return srcHash == dstHash , hash , nil
2014-03-28 20:56:04 +03:00
}
2015-09-22 19:47:16 +02:00
// Equal checks to see if the src and dst objects are equal by looking at
2016-01-11 14:39:33 +02:00
// size, mtime and hash
2014-03-28 20:56:04 +03:00
//
// If the src and dst size are different then it is considered to be
2015-06-06 09:38:45 +02:00
// not equal. If --size-only is in effect then this is the only check
2016-06-17 18:20:08 +02:00
// that is done. If --ignore-size is in effect then this check is
// skipped and the files are considered the same size.
2014-03-28 20:56:04 +03:00
//
// If the size is the same and the mtime is the same then it is
2015-06-06 09:38:45 +02:00
// considered to be equal. This check is skipped if using --checksum.
2014-03-28 20:56:04 +03:00
//
2015-06-06 09:38:45 +02:00
// If the size is the same and mtime is different, unreadable or
2016-01-11 14:39:33 +02:00
// --checksum is set and the hash is the same then the file is
2015-06-06 09:38:45 +02:00
// considered to be equal. In this case the mtime on the dst is
// updated if --checksum is not set.
2014-03-28 20:56:04 +03:00
//
// Otherwise the file is considered to be not equal including if there
// were errors reading info.
func Equal ( src , dst Object ) bool {
2016-06-17 18:20:08 +02:00
if ! Config . IgnoreSize {
if src . Size ( ) != dst . Size ( ) {
Debug ( src , "Sizes differ" )
return false
}
2014-03-28 20:56:04 +03:00
}
2015-06-06 09:38:45 +02:00
if Config . SizeOnly {
Debug ( src , "Sizes identical" )
return true
}
2014-03-28 20:56:04 +03:00
2015-06-03 16:08:27 +02:00
var srcModTime time . Time
if ! Config . CheckSum {
2015-08-20 21:48:58 +02:00
if Config . ModifyWindow == ModTimeNotSupported {
Debug ( src , "Sizes identical" )
return true
}
2015-06-03 16:08:27 +02:00
// Size the same so check the mtime
srcModTime = src . ModTime ( )
dstModTime := dst . ModTime ( )
dt := dstModTime . Sub ( srcModTime )
ModifyWindow := Config . ModifyWindow
if dt >= ModifyWindow || dt <= - ModifyWindow {
Debug ( src , "Modification times differ by %s: %v, %v" , dt , srcModTime , dstModTime )
} else {
Debug ( src , "Size and modification time the same (differ by %s, within tolerance %s)" , dt , ModifyWindow )
return true
}
2014-03-28 20:56:04 +03:00
}
// mtime is unreadable or different but size is the same so
2016-01-11 14:39:33 +02:00
// check the hash
2016-01-24 20:06:57 +02:00
same , hash , _ := CheckHashes ( src , dst )
2014-03-28 20:56:04 +03:00
if ! same {
2016-01-11 14:39:33 +02:00
Debug ( src , "Hash differ" )
2014-03-28 20:56:04 +03:00
return false
}
2015-06-03 16:08:27 +02:00
if ! Config . CheckSum {
2016-01-11 14:39:33 +02:00
// Size and hash the same but mtime different so update the
2015-06-03 16:08:27 +02:00
// mtime of the dst object here
2016-03-22 17:07:10 +02:00
err := dst . SetModTime ( srcModTime )
2016-03-22 17:23:37 +02:00
if err == ErrorCantSetModTime {
Debug ( src , "src and dst identical but can't set mod time without re-uploading" )
return false
} else if err != nil {
2016-03-22 17:07:10 +02:00
Stats . Error ( )
2016-06-18 10:32:14 +02:00
ErrorLog ( dst , "Failed to read set modification time: %v" , err )
2016-03-22 17:07:10 +02:00
}
2015-06-03 16:08:27 +02:00
}
2014-03-28 20:56:04 +03:00
2016-01-24 20:06:57 +02:00
if hash == HashNone {
2015-08-20 21:48:58 +02:00
Debug ( src , "Size of src and dst objects identical" )
} else {
2016-01-24 20:06:57 +02:00
Debug ( src , "Size and %v of src and dst objects identical" , hash )
2015-08-20 21:48:58 +02:00
}
2014-03-28 20:56:04 +03:00
return true
}
2015-09-22 19:47:16 +02:00
// MimeType returns a guess at the mime type from the extension
2016-06-15 19:49:11 +02:00
func MimeType ( o ObjectInfo ) string {
2015-03-01 14:38:31 +02:00
mimeType := mime . TypeByExtension ( path . Ext ( o . Remote ( ) ) )
2016-04-07 15:32:01 +02:00
if ! strings . ContainsRune ( mimeType , '/' ) {
2015-03-01 14:38:31 +02:00
mimeType = "application/octet-stream"
}
return mimeType
}
2014-07-15 21:27:05 +03:00
// Used to remove a failed copy
2015-03-14 19:54:41 +02:00
//
// Returns whether the file was succesfully removed or not
func removeFailedCopy ( dst Object ) bool {
if dst == nil {
return false
}
Debug ( dst , "Removing failed copy" )
removeErr := dst . Remove ( )
if removeErr != nil {
Debug ( dst , "Failed to remove failed copy: %s" , removeErr )
return false
2014-07-15 21:27:05 +03:00
}
2015-03-14 19:54:41 +02:00
return true
2014-07-15 21:27:05 +03:00
}
2014-04-18 19:04:21 +03:00
// Copy src object to dst or f if nil
//
// If dst is nil then the object must not exist already. If you do
// call Copy() with dst nil on a pre-existing file then some filing
// systems (eg Drive) may duplicate the file.
2016-07-04 14:12:33 +02:00
func Copy ( f Fs , dst , src Object ) ( err error ) {
2016-01-12 19:38:28 +02:00
maxTries := Config . LowLevelRetries
2015-02-02 19:29:08 +02:00
tries := 0
doUpdate := dst != nil
2016-06-18 11:55:58 +02:00
var actionTaken string
for {
// Try server side copy first - if has optional interface and
// is same underlying remote
actionTaken = "Copied (server side copy)"
if fCopy , ok := f . ( Copier ) ; ok && src . Fs ( ) . Name ( ) == f . Name ( ) {
var newDst Object
newDst , err = fCopy . Copy ( src , src . Remote ( ) )
if err == nil {
dst = newDst
}
} else {
err = ErrorCantCopy
2015-10-06 16:35:22 +02:00
}
2016-06-18 11:55:58 +02:00
// If can't server side copy, do it manually
if err == ErrorCantCopy {
var in0 io . ReadCloser
in0 , err = src . Open ( )
if err != nil {
err = errors . Wrap ( err , "failed to open source object" )
} else {
// On big files add a buffer
if src . Size ( ) > 10 << 20 {
in0 , _ = newAsyncReader ( in0 , 4 , 4 << 20 )
}
2015-10-06 16:35:22 +02:00
2016-06-18 11:55:58 +02:00
in := NewAccount ( in0 , src ) // account the transfer
2014-03-28 20:56:04 +03:00
2016-06-18 11:55:58 +02:00
if doUpdate {
2016-07-09 11:11:57 +02:00
actionTaken = "Copied (replaced existing)"
2016-06-18 11:55:58 +02:00
err = dst . Update ( in , src )
} else {
actionTaken = "Copied (new)"
dst , err = f . Put ( in , src )
}
closeErr := in . Close ( )
if err == nil {
err = closeErr
}
}
2015-02-14 20:48:08 +02:00
}
2015-02-02 19:29:08 +02:00
tries ++
2016-06-18 11:55:58 +02:00
if tries >= maxTries {
break
2015-03-14 19:54:41 +02:00
}
2016-06-18 11:55:58 +02:00
// Retry if err returned a retry error
if IsRetryError ( err ) || ShouldRetry ( err ) {
2016-06-25 22:22:50 +02:00
Debug ( src , "Received error: %v - low level retry %d/%d" , err , tries , maxTries )
2016-06-18 11:55:58 +02:00
if removeFailedCopy ( dst ) {
// If we removed dst, then nil it out and note we are not updating
dst = nil
doUpdate = false
}
continue
}
// otherwise finish
break
2014-03-28 20:56:04 +03:00
}
if err != nil {
Stats . Error ( )
2016-06-18 10:32:14 +02:00
ErrorLog ( src , "Failed to copy: %v" , err )
2014-07-15 21:27:05 +03:00
removeFailedCopy ( dst )
2016-07-04 14:12:33 +02:00
return err
2014-03-28 20:56:04 +03:00
}
2014-07-15 21:27:05 +03:00
2014-07-19 14:38:58 +03:00
// Verify sizes are the same after transfer
2016-06-17 18:20:08 +02:00
if ! Config . IgnoreSize && src . Size ( ) != dst . Size ( ) {
2014-07-19 14:38:58 +03:00
Stats . Error ( )
2016-06-12 16:06:02 +02:00
err = errors . Errorf ( "corrupted on transfer: sizes differ %d vs %d" , src . Size ( ) , dst . Size ( ) )
2016-06-18 10:32:14 +02:00
ErrorLog ( dst , "%v" , err )
2014-07-19 14:38:58 +03:00
removeFailedCopy ( dst )
2016-07-04 14:12:33 +02:00
return err
2014-07-19 14:38:58 +03:00
}
2016-01-11 14:39:33 +02:00
// Verify hashes are the same after transfer - ignoring blank hashes
// TODO(klauspost): This could be extended, so we always create a hash type matching
// the destination, and calculate it while sending.
common := src . Fs ( ) . Hashes ( ) . Overlap ( dst . Fs ( ) . Hashes ( ) )
2016-01-24 20:06:57 +02:00
// Debug(src, "common hashes: %v", common)
2016-01-11 14:39:33 +02:00
if ! Config . SizeOnly && common . Count ( ) > 0 {
// Get common hash type
hashType := common . GetOne ( )
2016-07-04 14:12:33 +02:00
var srcSum string
srcSum , err = src . Hash ( hashType )
2016-01-11 14:39:33 +02:00
if err != nil {
2014-07-15 21:27:05 +03:00
Stats . Error ( )
2016-06-18 10:32:14 +02:00
ErrorLog ( src , "Failed to read src hash: %v" , err )
2016-01-11 14:39:33 +02:00
} else if srcSum != "" {
2016-07-04 14:12:33 +02:00
var dstSum string
dstSum , err = dst . Hash ( hashType )
2016-01-11 14:39:33 +02:00
if err != nil {
2015-06-09 14:18:40 +02:00
Stats . Error ( )
2016-06-18 10:32:14 +02:00
ErrorLog ( dst , "Failed to read hash: %v" , err )
2016-01-11 14:39:33 +02:00
} else if ! HashEquals ( srcSum , dstSum ) {
2015-06-09 14:18:40 +02:00
Stats . Error ( )
2016-06-12 16:06:02 +02:00
err = errors . Errorf ( "corrupted on transfer: %v hash differ %q vs %q" , hashType , srcSum , dstSum )
2016-06-18 10:32:14 +02:00
ErrorLog ( dst , "%v" , err )
2015-06-09 14:18:40 +02:00
removeFailedCopy ( dst )
2016-07-04 14:12:33 +02:00
return err
2015-06-09 14:18:40 +02:00
}
2014-07-15 21:27:05 +03:00
}
}
2014-04-18 19:46:57 +03:00
Debug ( src , actionTaken )
2016-07-04 14:12:33 +02:00
return err
2015-08-24 22:42:23 +02:00
}
2016-03-05 18:10:51 +02:00
// DeleteFile deletes a single file respecting --dry-run and accumulating stats and errors.
2016-06-25 15:27:44 +02:00
func DeleteFile ( dst Object ) ( err error ) {
2016-03-05 18:10:51 +02:00
if Config . DryRun {
Log ( dst , "Not deleting as --dry-run" )
} else {
2016-07-02 17:58:50 +02:00
Stats . Checking ( dst . Remote ( ) )
2016-06-25 15:27:44 +02:00
err = dst . Remove ( )
2016-07-02 17:58:50 +02:00
Stats . DoneChecking ( dst . Remote ( ) )
2016-03-05 18:10:51 +02:00
if err != nil {
Stats . Error ( )
2016-06-18 10:32:14 +02:00
ErrorLog ( dst , "Couldn't delete: %v" , err )
2016-03-05 18:10:51 +02:00
} else {
Debug ( dst , "Deleted" )
}
}
2016-06-25 15:27:44 +02:00
return err
2016-03-05 18:10:51 +02:00
}
2015-09-22 19:47:16 +02:00
// DeleteFiles removes all the files passed in the channel
2016-06-25 15:27:44 +02:00
func DeleteFiles ( toBeDeleted ObjectsChan ) error {
2014-03-28 20:56:04 +03:00
var wg sync . WaitGroup
wg . Add ( Config . Transfers )
2016-06-25 15:27:44 +02:00
var errorCount int32
2014-03-28 20:56:04 +03:00
for i := 0 ; i < Config . Transfers ; i ++ {
go func ( ) {
defer wg . Done ( )
2015-09-22 19:47:16 +02:00
for dst := range toBeDeleted {
2016-06-25 15:27:44 +02:00
err := DeleteFile ( dst )
if err != nil {
atomic . AddInt32 ( & errorCount , 1 )
}
2014-03-28 20:56:04 +03:00
}
} ( )
}
2014-07-23 01:03:14 +03:00
Log ( nil , "Waiting for deletions to finish" )
2014-03-28 20:56:04 +03:00
wg . Wait ( )
2016-06-25 15:27:44 +02:00
if errorCount > 0 {
return errors . Errorf ( "failed to delete %d files" , errorCount )
}
return nil
2014-03-28 20:56:04 +03:00
}
2016-06-25 15:28:26 +02:00
// Read a Objects into add() for the given Fs.
2016-04-23 22:46:52 +02:00
// dir is the start directory, "" for root
2016-01-12 15:33:03 +02:00
// If includeAll is specified all files will be added,
// otherwise only files passing the filter will be added.
2016-07-04 14:12:33 +02:00
//
// Each object is passed ito the function provided. If that returns
// an error then the listing will be aborted and that error returned.
func readFilesFn ( fs Fs , includeAll bool , dir string , add func ( Object ) error ) ( err error ) {
2016-04-21 21:06:21 +02:00
list := NewLister ( )
if ! includeAll {
list . SetFilter ( Config . Filter )
2016-06-02 22:02:44 +02:00
list . SetLevel ( Config . MaxDepth )
2016-04-21 21:06:21 +02:00
}
2016-04-23 22:46:52 +02:00
list . Start ( fs , dir )
2016-04-21 21:06:21 +02:00
for {
o , err := list . GetObject ( )
if err != nil {
2016-06-25 15:28:26 +02:00
return err
2016-04-21 21:06:21 +02:00
}
// Check if we are finished
if o == nil {
break
}
2016-06-25 15:28:26 +02:00
// Make sure we don't delete excluded files if not required
if includeAll || Config . Filter . IncludeObject ( o ) {
2016-07-04 14:12:33 +02:00
err = add ( o )
if err != nil {
list . SetError ( err )
}
2016-06-25 15:28:26 +02:00
} else {
Debug ( o , "Excluded from sync (and deletion)" )
}
}
return nil
}
// Read a map of Object.Remote to Object for the given Fs.
// dir is the start directory, "" for root
// If includeAll is specified all files will be added,
// otherwise only files passing the filter will be added.
//
// This also detects duplicates and normalised duplicates
func readFilesMap ( fs Fs , includeAll bool , dir string ) ( files map [ string ] Object , err error ) {
files = make ( map [ string ] Object )
normalised := make ( map [ string ] struct { } )
2016-07-04 14:12:33 +02:00
err = readFilesFn ( fs , includeAll , dir , func ( o Object ) error {
2015-03-14 19:11:24 +02:00
remote := o . Remote ( )
2016-01-23 22:16:47 +02:00
normalisedRemote := strings . ToLower ( norm . NFC . String ( remote ) )
2015-03-14 19:11:24 +02:00
if _ , ok := files [ remote ] ; ! ok {
2016-06-25 15:28:26 +02:00
files [ remote ] = o
if _ , ok := normalised [ normalisedRemote ] ; ok {
Log ( o , "Warning: File found with same name but different case on %v" , o . Fs ( ) )
2015-11-12 13:46:04 +02:00
}
2015-03-14 19:11:24 +02:00
} else {
Log ( o , "Duplicate file detected" )
}
2016-01-23 22:16:47 +02:00
normalised [ normalisedRemote ] = struct { } { }
2016-07-04 14:12:33 +02:00
return nil
2016-06-25 15:28:26 +02:00
} )
return files , err
2016-04-21 21:06:21 +02:00
}
// readFilesMaps runs readFilesMap on fdst and fsrc at the same time
2016-04-23 22:46:52 +02:00
// dir is the start directory, "" for root
func readFilesMaps ( fdst Fs , fdstIncludeAll bool , fsrc Fs , fsrcIncludeAll bool , dir string ) ( dstFiles , srcFiles map [ string ] Object , err error ) {
2016-04-21 21:06:21 +02:00
var wg sync . WaitGroup
var srcErr , dstErr error
list := func ( fs Fs , includeAll bool , pMap * map [ string ] Object , pErr * error ) {
defer wg . Done ( )
Log ( fs , "Building file list" )
2016-06-21 22:17:52 +02:00
files , listErr := readFilesMap ( fs , includeAll , dir )
2016-04-21 21:06:21 +02:00
if listErr != nil {
ErrorLog ( fs , "Error building file list: %v" , listErr )
* pErr = listErr
} else {
Debug ( fs , "Done building file list" )
2016-06-21 22:17:52 +02:00
* pMap = files
2016-04-21 21:06:21 +02:00
}
}
wg . Add ( 2 )
go list ( fdst , fdstIncludeAll , & dstFiles , & srcErr )
go list ( fsrc , fsrcIncludeAll , & srcFiles , & dstErr )
wg . Wait ( )
if srcErr != nil {
err = srcErr
}
if dstErr != nil {
err = dstErr
}
return dstFiles , srcFiles , err
2015-03-14 19:11:24 +02:00
}
2015-09-22 19:47:16 +02:00
// Same returns true if fdst and fsrc point to the same underlying Fs
func Same ( fdst , fsrc Fs ) bool {
2015-09-01 21:50:28 +02:00
return fdst . Name ( ) == fsrc . Name ( ) && fdst . Root ( ) == fsrc . Root ( )
}
2016-04-07 15:56:27 +02:00
// checkIdentical checks to see if dst and src are identical
//
// it returns true if differences were found
func checkIdentical ( dst , src Object ) bool {
2016-07-02 17:58:50 +02:00
Stats . Checking ( src . Remote ( ) )
defer Stats . DoneChecking ( src . Remote ( ) )
2016-04-07 15:56:27 +02:00
if src . Size ( ) != dst . Size ( ) {
Stats . Error ( )
ErrorLog ( src , "Sizes differ" )
return true
}
if ! Config . SizeOnly {
same , _ , err := CheckHashes ( src , dst )
if err != nil {
// CheckHashes will log and count errors
return true
}
if ! same {
Stats . Error ( )
ErrorLog ( src , "Md5sums differ" )
return true
}
}
Debug ( src , "OK" )
return false
}
2016-01-11 14:39:33 +02:00
// Check the files in fsrc and fdst according to Size and hash
2014-03-28 20:56:04 +03:00
func Check ( fdst , fsrc Fs ) error {
2016-04-23 22:46:52 +02:00
dstFiles , srcFiles , err := readFilesMaps ( fdst , false , fsrc , false , "" )
2016-04-21 21:06:21 +02:00
if err != nil {
return err
}
2016-01-17 12:08:28 +02:00
differences := int32 ( 0 )
2015-11-24 18:54:12 +02:00
// FIXME could do this as it goes along and make it use less
// memory.
2015-03-14 19:11:24 +02:00
// Move all the common files into commonFiles and delete then
// from srcFiles and dstFiles
2014-03-28 20:56:04 +03:00
commonFiles := make ( map [ string ] [ ] Object )
2015-03-14 19:11:24 +02:00
for remote , src := range srcFiles {
2014-03-28 20:56:04 +03:00
if dst , ok := dstFiles [ remote ] ; ok {
commonFiles [ remote ] = [ ] Object { dst , src }
2015-03-14 19:11:24 +02:00
delete ( srcFiles , remote )
2014-03-28 20:56:04 +03:00
delete ( dstFiles , remote )
}
}
Log ( fdst , "%d files not in %v" , len ( dstFiles ) , fsrc )
for _ , dst := range dstFiles {
Stats . Error ( )
2015-08-08 21:10:31 +02:00
ErrorLog ( dst , "File not in %v" , fsrc )
2016-01-17 12:08:28 +02:00
atomic . AddInt32 ( & differences , 1 )
2014-03-28 20:56:04 +03:00
}
Log ( fsrc , "%d files not in %s" , len ( srcFiles ) , fdst )
for _ , src := range srcFiles {
Stats . Error ( )
2015-08-08 21:10:31 +02:00
ErrorLog ( src , "File not in %v" , fdst )
2016-01-17 12:08:28 +02:00
atomic . AddInt32 ( & differences , 1 )
2014-03-28 20:56:04 +03:00
}
checks := make ( chan [ ] Object , Config . Transfers )
go func ( ) {
for _ , check := range commonFiles {
checks <- check
}
close ( checks )
} ( )
var checkerWg sync . WaitGroup
checkerWg . Add ( Config . Checkers )
for i := 0 ; i < Config . Checkers ; i ++ {
go func ( ) {
defer checkerWg . Done ( )
for check := range checks {
2016-04-07 15:56:27 +02:00
if checkIdentical ( check [ 0 ] , check [ 1 ] ) {
2016-01-17 12:08:28 +02:00
atomic . AddInt32 ( & differences , 1 )
2014-03-28 20:56:04 +03:00
}
}
} ( )
}
Log ( fdst , "Waiting for checks to finish" )
checkerWg . Wait ( )
Log ( fdst , "%d differences found" , Stats . GetErrors ( ) )
2016-01-17 12:08:28 +02:00
if differences > 0 {
2016-06-12 16:06:02 +02:00
return errors . Errorf ( "%d differences found" , differences )
2014-03-28 20:56:04 +03:00
}
return nil
}
2015-09-22 19:47:16 +02:00
// ListFn lists the Fs to the supplied function
2014-03-28 20:56:04 +03:00
//
// Lists in parallel which may get them out of order
2014-07-12 14:09:20 +03:00
func ListFn ( f Fs , fn func ( Object ) ) error {
2016-06-02 22:02:44 +02:00
list := NewLister ( ) . SetFilter ( Config . Filter ) . SetLevel ( Config . MaxDepth ) . Start ( f , "" )
2014-03-28 20:56:04 +03:00
var wg sync . WaitGroup
wg . Add ( Config . Checkers )
for i := 0 ; i < Config . Checkers ; i ++ {
go func ( ) {
defer wg . Done ( )
2016-04-21 21:06:21 +02:00
for {
o , err := list . GetObject ( )
if err != nil {
log . Fatal ( err )
}
// check if we are finished
if o == nil {
return
}
2015-11-24 18:54:12 +02:00
if Config . Filter . IncludeObject ( o ) {
fn ( o )
}
2014-03-28 20:56:04 +03:00
}
} ( )
}
wg . Wait ( )
return nil
}
2015-02-28 17:30:40 +02:00
// mutex for synchronized output
var outMutex sync . Mutex
// Synchronized fmt.Fprintf
2015-09-22 08:31:12 +02:00
//
// Ignores errors from Fprintf
func syncFprintf ( w io . Writer , format string , a ... interface { } ) {
2015-02-28 17:30:40 +02:00
outMutex . Lock ( )
defer outMutex . Unlock ( )
2015-09-22 08:31:12 +02:00
_ , _ = fmt . Fprintf ( w , format , a ... )
2015-02-28 17:30:40 +02:00
}
2015-09-15 16:46:06 +02:00
// List the Fs to the supplied writer
2014-07-12 14:09:20 +03:00
//
2015-11-24 18:54:12 +02:00
// Shows size and path - obeys includes and excludes
2014-07-12 14:09:20 +03:00
//
// Lists in parallel which may get them out of order
2014-08-01 19:58:39 +03:00
func List ( f Fs , w io . Writer ) error {
2014-07-12 14:09:20 +03:00
return ListFn ( f , func ( o Object ) {
2015-02-28 17:30:40 +02:00
syncFprintf ( w , "%9d %s\n" , o . Size ( ) , o . Remote ( ) )
2014-07-12 14:09:20 +03:00
} )
}
2015-09-22 19:47:16 +02:00
// ListLong lists the Fs to the supplied writer
2014-07-12 14:09:20 +03:00
//
2015-11-24 18:54:12 +02:00
// Shows size, mod time and path - obeys includes and excludes
2014-07-12 14:09:20 +03:00
//
// Lists in parallel which may get them out of order
2014-08-01 19:58:39 +03:00
func ListLong ( f Fs , w io . Writer ) error {
2014-07-12 14:09:20 +03:00
return ListFn ( f , func ( o Object ) {
2016-07-02 17:58:50 +02:00
Stats . Checking ( o . Remote ( ) )
2014-07-12 14:09:20 +03:00
modTime := o . ModTime ( )
2016-07-02 17:58:50 +02:00
Stats . DoneChecking ( o . Remote ( ) )
2015-09-22 20:04:12 +02:00
syncFprintf ( w , "%9d %s %s\n" , o . Size ( ) , modTime . Local ( ) . Format ( "2006-01-02 15:04:05.000000000" ) , o . Remote ( ) )
2014-07-12 14:09:20 +03:00
} )
}
2015-09-22 19:47:16 +02:00
// Md5sum list the Fs to the supplied writer
2014-07-12 14:09:20 +03:00
//
2015-11-24 18:54:12 +02:00
// Produces the same output as the md5sum command - obeys includes and
// excludes
2014-07-12 14:09:20 +03:00
//
// Lists in parallel which may get them out of order
2014-08-01 19:58:39 +03:00
func Md5sum ( f Fs , w io . Writer ) error {
2016-01-11 14:39:33 +02:00
return hashLister ( HashMD5 , f , w )
}
// Sha1sum list the Fs to the supplied writer
//
// Obeys includes and excludes
//
// Lists in parallel which may get them out of order
func Sha1sum ( f Fs , w io . Writer ) error {
return hashLister ( HashSHA1 , f , w )
}
func hashLister ( ht HashType , f Fs , w io . Writer ) error {
2014-07-12 14:09:20 +03:00
return ListFn ( f , func ( o Object ) {
2016-07-02 17:58:50 +02:00
Stats . Checking ( o . Remote ( ) )
2016-01-11 14:39:33 +02:00
sum , err := o . Hash ( ht )
2016-07-02 17:58:50 +02:00
Stats . DoneChecking ( o . Remote ( ) )
2016-01-11 14:39:33 +02:00
if err == ErrHashUnsupported {
sum = "UNSUPPORTED"
} else if err != nil {
Debug ( o , "Failed to read %v: %v" , ht , err )
sum = "ERROR"
2014-07-12 14:09:20 +03:00
}
2016-01-17 15:56:00 +02:00
syncFprintf ( w , "%*s %s\n" , HashWidth [ ht ] , sum , o . Remote ( ) )
2014-07-12 14:09:20 +03:00
} )
}
2015-10-02 20:48:48 +02:00
// Count counts the objects and their sizes in the Fs
2015-11-24 18:54:12 +02:00
//
// Obeys includes and excludes
2015-10-02 20:48:48 +02:00
func Count ( f Fs ) ( objects int64 , size int64 , err error ) {
err = ListFn ( f , func ( o Object ) {
atomic . AddInt64 ( & objects , 1 )
atomic . AddInt64 ( & size , o . Size ( ) )
} )
return
}
2015-09-22 19:47:16 +02:00
// ListDir lists the directories/buckets/containers in the Fs to the supplied writer
2014-08-01 19:58:39 +03:00
func ListDir ( f Fs , w io . Writer ) error {
2016-06-02 22:02:44 +02:00
level := 1
if Config . MaxDepth > 0 {
level = Config . MaxDepth
}
list := NewLister ( ) . SetLevel ( level ) . Start ( f , "" )
2016-04-21 21:06:21 +02:00
for {
dir , err := list . GetDir ( )
if err != nil {
log . Fatal ( err )
}
if dir == nil {
break
}
2015-02-28 17:30:40 +02:00
syncFprintf ( w , "%12d %13s %9d %s\n" , dir . Bytes , dir . When . Format ( "2006-01-02 15:04:05" ) , dir . Count , dir . Name )
2014-03-28 20:56:04 +03:00
}
return nil
}
2015-09-22 19:47:16 +02:00
// Mkdir makes a destination directory or container
2014-03-28 20:56:04 +03:00
func Mkdir ( f Fs ) error {
2016-02-28 21:47:22 +02:00
if Config . DryRun {
Log ( f , "Not making directory as dry run is set" )
return nil
}
2014-03-28 20:56:04 +03:00
err := f . Mkdir ( )
if err != nil {
Stats . Error ( )
return err
}
return nil
}
2016-02-25 22:05:34 +02:00
// TryRmdir removes a container but not if not empty. It doesn't
// count errors but may return one.
func TryRmdir ( f Fs ) error {
2014-03-28 20:56:04 +03:00
if Config . DryRun {
Log ( f , "Not deleting as dry run is set" )
2016-02-25 22:05:34 +02:00
return nil
2014-03-28 20:56:04 +03:00
}
2016-02-25 22:05:34 +02:00
return f . Rmdir ( )
}
// Rmdir removes a container but not if not empty
func Rmdir ( f Fs ) error {
err := TryRmdir ( f )
if err != nil {
Stats . Error ( )
return err
}
return err
2014-03-28 20:56:04 +03:00
}
2015-09-22 19:47:16 +02:00
// Purge removes a container and all of its contents
2014-03-28 20:56:04 +03:00
//
// FIXME doesn't delete local directories
func Purge ( f Fs ) error {
2015-11-08 16:16:00 +02:00
doFallbackPurge := true
2014-07-25 20:19:49 +03:00
var err error
2014-03-28 20:56:04 +03:00
if purger , ok := f . ( Purger ) ; ok {
2015-11-08 16:16:00 +02:00
doFallbackPurge = false
2014-07-13 12:45:13 +03:00
if Config . DryRun {
2016-01-31 17:53:09 +02:00
Log ( f , "Not purging as --dry-run set" )
2014-07-13 12:45:13 +03:00
} else {
2014-07-25 20:19:49 +03:00
err = purger . Purge ( )
2015-11-08 16:16:00 +02:00
if err == ErrorCantPurge {
doFallbackPurge = true
}
2014-03-28 20:56:04 +03:00
}
2015-11-08 16:16:00 +02:00
}
if doFallbackPurge {
2014-07-25 20:19:49 +03:00
// DeleteFiles and Rmdir observe --dry-run
2016-04-23 22:46:52 +02:00
list := NewLister ( ) . Start ( f , "" )
2016-06-25 15:27:44 +02:00
err = DeleteFiles ( listToChan ( list ) )
if err != nil {
return err
}
2014-07-25 20:19:49 +03:00
err = Rmdir ( f )
}
if err != nil {
Stats . Error ( )
return err
2014-03-28 20:56:04 +03:00
}
return nil
}
2015-12-03 00:25:32 +02:00
// Delete removes all the contents of a container. Unlike Purge, it
// obeys includes and excludes.
func Delete ( f Fs ) error {
delete := make ( ObjectsChan , Config . Transfers )
2016-06-25 15:27:44 +02:00
delErr := make ( chan error , 1 )
2015-12-03 00:25:32 +02:00
go func ( ) {
2016-06-25 15:27:44 +02:00
delErr <- DeleteFiles ( delete )
2015-12-03 00:25:32 +02:00
} ( )
err := ListFn ( f , func ( o Object ) {
delete <- o
} )
close ( delete )
2016-06-25 15:27:44 +02:00
delError := <- delErr
if err == nil {
err = delError
}
2015-12-03 00:25:32 +02:00
return err
}
2016-01-31 14:58:41 +02:00
2016-03-05 18:10:51 +02:00
// dedupeRename renames the objs slice to different names
func dedupeRename ( remote string , objs [ ] Object ) {
f := objs [ 0 ] . Fs ( )
2016-01-31 14:58:41 +02:00
mover , ok := f . ( Mover )
if ! ok {
2016-03-05 18:10:51 +02:00
log . Fatalf ( "Fs %v doesn't support Move" , f )
}
ext := path . Ext ( remote )
base := remote [ : len ( remote ) - len ( ext ) ]
for i , o := range objs {
newName := fmt . Sprintf ( "%s-%d%s" , base , i + 1 , ext )
if ! Config . DryRun {
newObj , err := mover . Move ( o , newName )
if err != nil {
Stats . Error ( )
ErrorLog ( o , "Failed to rename: %v" , err )
continue
}
Log ( newObj , "renamed from: %v" , o )
} else {
Log ( remote , "Not renaming to %q as --dry-run" , newName )
}
}
}
// dedupeDeleteAllButOne deletes all but the one in keep
func dedupeDeleteAllButOne ( keep int , remote string , objs [ ] Object ) {
for i , o := range objs {
if i == keep {
continue
}
2016-06-25 15:27:44 +02:00
_ = DeleteFile ( o )
2016-03-05 18:10:51 +02:00
}
Log ( remote , "Deleted %d extra copies" , len ( objs ) - 1 )
}
// dedupeDeleteIdentical deletes all but one of identical (by hash) copies
func dedupeDeleteIdentical ( remote string , objs [ ] Object ) [ ] Object {
// See how many of these duplicates are identical
byHash := make ( map [ string ] [ ] Object , len ( objs ) )
for _ , o := range objs {
md5sum , err := o . Hash ( HashMD5 )
if err == nil {
byHash [ md5sum ] = append ( byHash [ md5sum ] , o )
}
2016-01-31 14:58:41 +02:00
}
2016-03-05 18:10:51 +02:00
// Delete identical duplicates, refilling obj with the ones remaining
objs = nil
for md5sum , hashObjs := range byHash {
if len ( hashObjs ) > 1 {
Log ( remote , "Deleting %d/%d identical duplicates (md5sum %q)" , len ( hashObjs ) - 1 , len ( hashObjs ) , md5sum )
for _ , o := range hashObjs [ 1 : ] {
2016-06-25 15:27:44 +02:00
_ = DeleteFile ( o )
2016-03-05 18:10:51 +02:00
}
}
objs = append ( objs , hashObjs [ 0 ] )
}
return objs
}
// dedupeInteractive interactively dedupes the slice of objects
func dedupeInteractive ( remote string , objs [ ] Object ) {
fmt . Printf ( "%s: %d duplicates remain\n" , remote , len ( objs ) )
for i , o := range objs {
md5sum , err := o . Hash ( HashMD5 )
if err != nil {
md5sum = err . Error ( )
}
fmt . Printf ( " %d: %12d bytes, %s, md5sum %32s\n" , i + 1 , o . Size ( ) , o . ModTime ( ) . Format ( "2006-01-02 15:04:05.000000000" ) , md5sum )
}
switch Command ( [ ] string { "sSkip and do nothing" , "kKeep just one (choose which in next step)" , "rRename all to be different (by changing file.jpg to file-1.jpg)" } ) {
case 's' :
case 'k' :
keep := ChooseNumber ( "Enter the number of the file to keep" , 1 , len ( objs ) )
dedupeDeleteAllButOne ( keep - 1 , remote , objs )
case 'r' :
dedupeRename ( remote , objs )
}
}
type objectsSortedByModTime [ ] Object
func ( objs objectsSortedByModTime ) Len ( ) int { return len ( objs ) }
func ( objs objectsSortedByModTime ) Swap ( i , j int ) { objs [ i ] , objs [ j ] = objs [ j ] , objs [ i ] }
func ( objs objectsSortedByModTime ) Less ( i , j int ) bool {
return objs [ i ] . ModTime ( ) . Before ( objs [ j ] . ModTime ( ) )
}
// DeduplicateMode is how the dedupe command chooses what to do
type DeduplicateMode int
// Deduplicate modes
const (
DeduplicateInteractive DeduplicateMode = iota // interactively ask the user
DeduplicateSkip // skip all conflicts
DeduplicateFirst // choose the first object
DeduplicateNewest // choose the newest object
DeduplicateOldest // choose the oldest object
DeduplicateRename // rename the objects
)
func ( mode DeduplicateMode ) String ( ) string {
switch mode {
case DeduplicateInteractive :
return "interactive"
case DeduplicateSkip :
return "skip"
case DeduplicateFirst :
return "first"
case DeduplicateNewest :
return "newest"
case DeduplicateOldest :
return "oldest"
case DeduplicateRename :
return "rename"
}
return "unknown"
}
// Deduplicate interactively finds duplicate files and offers to
// delete all but one or rename them to be different. Only useful with
// Google Drive which can have duplicate file names.
func Deduplicate ( f Fs , mode DeduplicateMode ) error {
Log ( f , "Looking for duplicates using %v mode." , mode )
2016-01-31 14:58:41 +02:00
files := map [ string ] [ ] Object { }
2016-04-23 22:46:52 +02:00
list := NewLister ( ) . Start ( f , "" )
2016-04-21 21:06:21 +02:00
for {
o , err := list . GetObject ( )
if err != nil {
return err
}
// Check if we are finished
if o == nil {
break
}
2016-01-31 14:58:41 +02:00
remote := o . Remote ( )
files [ remote ] = append ( files [ remote ] , o )
}
for remote , objs := range files {
if len ( objs ) > 1 {
2016-03-05 18:10:51 +02:00
Log ( remote , "Found %d duplicates - deleting identical copies" , len ( objs ) )
objs = dedupeDeleteIdentical ( remote , objs )
if len ( objs ) <= 1 {
Log ( remote , "All duplicates removed" )
continue
2016-01-31 14:58:41 +02:00
}
2016-03-05 18:10:51 +02:00
switch mode {
case DeduplicateInteractive :
dedupeInteractive ( remote , objs )
case DeduplicateFirst :
dedupeDeleteAllButOne ( 0 , remote , objs )
case DeduplicateNewest :
sort . Sort ( objectsSortedByModTime ( objs ) ) // sort oldest first
dedupeDeleteAllButOne ( len ( objs ) - 1 , remote , objs )
case DeduplicateOldest :
sort . Sort ( objectsSortedByModTime ( objs ) ) // sort oldest first
dedupeDeleteAllButOne ( 0 , remote , objs )
case DeduplicateRename :
dedupeRename ( remote , objs )
case DeduplicateSkip :
// skip
default :
//skip
2016-01-31 14:58:41 +02:00
}
}
}
return nil
}
2016-04-21 21:06:21 +02:00
// listToChan will transfer all incoming objects to a new channel.
//
// If an error occurs, the error will be logged, and it will close the
// channel.
//
// If the error was ErrorDirNotFound then it will be ignored
func listToChan ( list * Lister ) ObjectsChan {
o := make ( ObjectsChan , Config . Checkers )
go func ( ) {
defer close ( o )
for {
obj , dir , err := list . Get ( )
if err != nil {
if err != ErrorDirNotFound {
Stats . Error ( )
ErrorLog ( nil , "Failed to list: %v" , err )
}
return
}
if dir == nil && obj == nil {
return
}
if o == nil {
continue
}
o <- obj
}
} ( )
return o
}
2016-07-01 17:35:36 +02:00
// CleanUp removes the trash for the Fs
func CleanUp ( f Fs ) error {
fc , ok := f . ( CleanUpper )
if ! ok {
return errors . Errorf ( "%v doesn't support cleanup" , f )
}
2016-07-02 17:58:50 +02:00
if Config . DryRun {
Log ( f , "Not running cleanup as --dry-run set" )
return nil
}
2016-07-01 17:35:36 +02:00
return fc . CleanUp ( )
}