From 63819598505cf9a3346a37a553c31381afc71bb9 Mon Sep 17 00:00:00 2001
From: Nick Craig-Wood <nick@craig-wood.com>
Date: Fri, 26 May 2017 15:09:31 +0100
Subject: [PATCH] dropbox: support Dropbox content hashing scheme - fixes #1302

  * add support to hashing module
  * add dbhashsum to list the hashes
  * add support to dropbox module

This means objects up and downloaded to/from Dropbox will have their
hashes checked.

Note after this change local objects are calculating MD5, SHA1 and
DBHASH which is excessive and needs to be fixed.
---
 cmd/all/all.go           |  1 +
 docs/content/dropbox.md  |  6 +++++-
 docs/content/overview.md |  8 ++++++--
 dropbox/dropbox.go       | 30 +++++++++++++++++++-----------
 fs/hash.go               | 16 +++++++++++++---
 fs/hash_test.go          | 18 ++++++++++--------
 fs/operations.go         |  9 +++++++++
 fs/operations_test.go    | 34 ++++++++++++++++++++++------------
 8 files changed, 85 insertions(+), 37 deletions(-)

diff --git a/cmd/all/all.go b/cmd/all/all.go
index 0c2d2e3b0..a355348f5 100644
--- a/cmd/all/all.go
+++ b/cmd/all/all.go
@@ -13,6 +13,7 @@ import (
 	_ "github.com/ncw/rclone/cmd/copy"
 	_ "github.com/ncw/rclone/cmd/copyto"
 	_ "github.com/ncw/rclone/cmd/cryptcheck"
+	_ "github.com/ncw/rclone/cmd/dbhashsum"
 	_ "github.com/ncw/rclone/cmd/dedupe"
 	_ "github.com/ncw/rclone/cmd/delete"
 	_ "github.com/ncw/rclone/cmd/genautocomplete"
diff --git a/docs/content/dropbox.md b/docs/content/dropbox.md
index 03b21dad2..49a3d7a75 100644
--- a/docs/content/dropbox.md
+++ b/docs/content/dropbox.md
@@ -91,7 +91,7 @@ To copy a local directory to a dropbox directory called backup
 
     rclone copy /home/source remote:backup
 
-### Modified time and MD5SUMs ###
+### Modified time and Hashes ###
 
 Dropbox supports modified times, but the only way to set a
 modification time is to re-upload the file.
@@ -102,6 +102,10 @@ decide to upload all your old data to fix the modification times.  If
 you don't want this to happen use `--size-only` or `--checksum` flag
 to stop it.
 
+Dropbox supports [its own hash
+type](https://www.dropbox.com/developers/reference/content-hash) which
+is checked for all transfers.
+
 ### Specific options ###
 
 Here are the command line options specific to this cloud storage
diff --git a/docs/content/overview.md b/docs/content/overview.md
index 02ed89cc5..f88d1415d 100644
--- a/docs/content/overview.md
+++ b/docs/content/overview.md
@@ -20,7 +20,7 @@ Here is an overview of the major features of each cloud storage system.
 | Google Drive           | MD5     | Yes     | No               | Yes             | R/W       |
 | Amazon S3              | MD5     | Yes     | No               | No              | R/W       |
 | Openstack Swift        | MD5     | Yes     | No               | No              | R/W       |
-| Dropbox                | -       | Yes     | Yes              | No              | -         |
+| Dropbox                | DBHASH †| Yes     | Yes              | No              | -         |
 | Google Cloud Storage   | MD5     | Yes     | No               | No              | R/W       |
 | Amazon Drive           | MD5     | No      | Yes              | No              | R         |
 | Microsoft OneDrive     | SHA1    | Yes     | Yes              | No              | R         |
@@ -28,7 +28,7 @@ Here is an overview of the major features of each cloud storage system.
 | Backblaze B2           | SHA1    | Yes     | No               | No              | R/W       |
 | Yandex Disk            | MD5     | Yes     | No               | No              | R/W       |
 | SFTP                   | -       | Yes     | Depends          | No              | -         |
-| FTP                    | None    | No      | Yes              | No              | -         |
+| FTP                    | -       | No      | Yes              | No              | -         |
 | The local filesystem   | All     | Yes     | Depends          | No              | -         |
 
 ### Hash ###
@@ -41,6 +41,10 @@ the `check` command.
 To use the checksum checks between filesystems they must support a 
 common hash type.
 
+† Note that Dropbox supports [its own custom
+hash](https://www.dropbox.com/developers/reference/content-hash).
+This is an SHA256 sum of all the 4MB block SHA256s.
+
 ### ModTime ###
 
 The cloud storage system supports setting modification times on
diff --git a/dropbox/dropbox.go b/dropbox/dropbox.go
index 394be939b..e005cf5ba 100644
--- a/dropbox/dropbox.go
+++ b/dropbox/dropbox.go
@@ -2,7 +2,6 @@
 package dropbox
 
 // FIXME put low level retries in
-// FIXME add dropbox style hashes
 // FIXME dropbox for business would be quite easy to add
 
 /*
@@ -99,12 +98,14 @@ type Fs struct {
 }
 
 // Object describes a dropbox object
+//
+// Dropbox Objects always have full metadata
 type Object struct {
-	fs          *Fs       // what this object is part of
-	remote      string    // The remote path
-	bytes       int64     // size of the object
-	modTime     time.Time // time it was last modified
-	hasMetadata bool      // metadata is valid
+	fs      *Fs       // what this object is part of
+	remote  string    // The remote path
+	bytes   int64     // size of the object
+	modTime time.Time // time it was last modified
+	hash    string    // content_hash of the object
 }
 
 // ------------------------------------------------------------
@@ -640,7 +641,7 @@ func (f *Fs) DirMove(src fs.Fs, srcRemote, dstRemote string) error {
 
 // Hashes returns the supported hash sets.
 func (f *Fs) Hashes() fs.HashSet {
-	return fs.HashSet(fs.HashNone)
+	return fs.HashSet(fs.HashDropbox)
 }
 
 // ------------------------------------------------------------
@@ -663,9 +664,16 @@ func (o *Object) Remote() string {
 	return o.remote
 }
 
-// Hash is unsupported on Dropbox
+// Hash returns the dropbox special hash
 func (o *Object) Hash(t fs.HashType) (string, error) {
-	return "", fs.ErrHashUnsupported
+	if t != fs.HashDropbox {
+		return "", fs.ErrHashUnsupported
+	}
+	err := o.readMetaData()
+	if err != nil {
+		return "", errors.Wrap(err, "failed to read hash from metadata")
+	}
+	return o.hash, nil
 }
 
 // Size returns the size of an object in bytes
@@ -679,7 +687,7 @@ func (o *Object) Size() int64 {
 func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error {
 	o.bytes = int64(info.Size)
 	o.modTime = info.ClientModified
-	o.hasMetadata = true
+	o.hash = info.ContentHash
 	return nil
 }
 
@@ -722,7 +730,7 @@ func (o *Object) metadataKey() string {
 
 // readMetaData gets the info if it hasn't already been fetched
 func (o *Object) readMetaData() (err error) {
-	if o.hasMetadata {
+	if !o.modTime.IsZero() {
 		return nil
 	}
 	// Last resort
diff --git a/fs/hash.go b/fs/hash.go
index e8d9d8e1e..cd808fbd0 100644
--- a/fs/hash.go
+++ b/fs/hash.go
@@ -9,6 +9,7 @@ import (
 	"io"
 	"strings"
 
+	"github.com/ncw/rclone/dropbox/dbhash"
 	"github.com/pkg/errors"
 )
 
@@ -26,18 +27,23 @@ const (
 	// HashSHA1 indicates SHA-1 support
 	HashSHA1
 
+	// HashDropbox indicates Dropbox special hash
+	// https://www.dropbox.com/developers/reference/content-hash
+	HashDropbox
+
 	// HashNone indicates no hashes are supported
 	HashNone HashType = 0
 )
 
 // SupportedHashes returns a set of all the supported hashes by
 // HashStream and MultiHasher.
-var SupportedHashes = NewHashSet(HashMD5, HashSHA1)
+var SupportedHashes = NewHashSet(HashMD5, HashSHA1, HashDropbox)
 
 // HashWidth returns the width in characters for any HashType
 var HashWidth = map[HashType]int{
-	HashMD5:  32,
-	HashSHA1: 40,
+	HashMD5:     32,
+	HashSHA1:    40,
+	HashDropbox: 64,
 }
 
 // HashStream will calculate hashes of all supported hash types.
@@ -73,6 +79,8 @@ func (h HashType) String() string {
 		return "MD5"
 	case HashSHA1:
 		return "SHA-1"
+	case HashDropbox:
+		return "DropboxHash"
 	default:
 		err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h))
 		panic(err)
@@ -94,6 +102,8 @@ func hashFromTypes(set HashSet) (map[HashType]hash.Hash, error) {
 			hashers[t] = md5.New()
 		case HashSHA1:
 			hashers[t] = sha1.New()
+		case HashDropbox:
+			hashers[t] = dbhash.New()
 		default:
 			err := fmt.Sprintf("internal error: Unsupported hash type %v", t)
 			panic(err)
diff --git a/fs/hash_test.go b/fs/hash_test.go
index cfe4791a2..b3cc8d07e 100644
--- a/fs/hash_test.go
+++ b/fs/hash_test.go
@@ -65,16 +65,18 @@ var hashTestSet = []hashTest{
 	{
 		input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
 		output: map[fs.HashType]string{
-			fs.HashMD5:  "bf13fc19e5151ac57d4252e0e0f87abe",
-			fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
+			fs.HashMD5:     "bf13fc19e5151ac57d4252e0e0f87abe",
+			fs.HashSHA1:    "3ab6543c08a75f292a5ecedac87ec41642d12166",
+			fs.HashDropbox: "214d2fcf3566e94c99ad2f59bd993daca46d8521a0c447adf4b324f53fddc0c7",
 		},
 	},
 	// Empty data set
 	{
 		input: []byte{},
 		output: map[fs.HashType]string{
-			fs.HashMD5:  "d41d8cd98f00b204e9800998ecf8427e",
-			fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
+			fs.HashMD5:     "d41d8cd98f00b204e9800998ecf8427e",
+			fs.HashSHA1:    "da39a3ee5e6b4b0d3255bfef95601890afd80709",
+			fs.HashDropbox: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
 		},
 	},
 }
@@ -88,13 +90,13 @@ func TestMultiHasher(t *testing.T) {
 		sums := mh.Sums()
 		for k, v := range sums {
 			expect, ok := test.output[k]
-			require.True(t, ok)
+			require.True(t, ok, "test output for hash not found")
 			assert.Equal(t, v, expect)
 		}
 		// Test that all are present
 		for k, v := range test.output {
 			expect, ok := sums[k]
-			require.True(t, ok)
+			require.True(t, ok, "test output for hash not found")
 			assert.Equal(t, v, expect)
 		}
 	}
@@ -145,8 +147,8 @@ func TestHashStreamTypes(t *testing.T) {
 }
 
 func TestHashSetStringer(t *testing.T) {
-	h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5)
-	assert.Equal(t, h.String(), "[MD5, SHA-1]")
+	h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5, fs.HashDropbox)
+	assert.Equal(t, h.String(), "[MD5, SHA-1, DropboxHash]")
 	h = fs.NewHashSet(fs.HashSHA1)
 	assert.Equal(t, h.String(), "[SHA-1]")
 	h = fs.NewHashSet()
diff --git a/fs/operations.go b/fs/operations.go
index 752203106..923f53fc6 100644
--- a/fs/operations.go
+++ b/fs/operations.go
@@ -978,6 +978,15 @@ func Sha1sum(f Fs, w io.Writer) error {
 	return hashLister(HashSHA1, f, w)
 }
 
+// DropboxHashSum list the Fs to the supplied writer
+//
+// Obeys includes and excludes
+//
+// Lists in parallel which may get them out of order
+func DropboxHashSum(f Fs, w io.Writer) error {
+	return hashLister(HashDropbox, f, w)
+}
+
 func hashLister(ht HashType, f Fs, w io.Writer) error {
 	return ListFn(f, func(o Object) {
 		Stats.Checking(o.Remote())
diff --git a/fs/operations_test.go b/fs/operations_test.go
index 40a1af16a..0a0d84bb6 100644
--- a/fs/operations_test.go
+++ b/fs/operations_test.go
@@ -386,7 +386,7 @@ func TestLsLong(t *testing.T) {
 	}
 }
 
-func TestMd5sum(t *testing.T) {
+func TestHashSums(t *testing.T) {
 	r := NewRun(t)
 	defer r.Finalise()
 	file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
@@ -394,6 +394,8 @@ func TestMd5sum(t *testing.T) {
 
 	fstest.CheckItems(t, r.fremote, file1, file2)
 
+	// MD5 Sum
+
 	var buf bytes.Buffer
 	err := fs.Md5sum(r.fremote, &buf)
 	require.NoError(t, err)
@@ -408,20 +410,12 @@ func TestMd5sum(t *testing.T) {
 		!strings.Contains(res, "                                  potato2\n") {
 		t.Errorf("potato2 missing: %q", res)
 	}
-}
 
-func TestSha1sum(t *testing.T) {
-	r := NewRun(t)
-	defer r.Finalise()
-	file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
-	file2 := r.WriteBoth("empty space", "", t2)
+	// SHA1 Sum
 
-	fstest.CheckItems(t, r.fremote, file1, file2)
-
-	var buf bytes.Buffer
-	err := fs.Sha1sum(r.fremote, &buf)
+	err = fs.Sha1sum(r.fremote, &buf)
 	require.NoError(t, err)
-	res := buf.String()
+	res = buf.String()
 	if !strings.Contains(res, "da39a3ee5e6b4b0d3255bfef95601890afd80709  empty space\n") &&
 		!strings.Contains(res, "                             UNSUPPORTED  empty space\n") &&
 		!strings.Contains(res, "                                          empty space\n") {
@@ -432,6 +426,22 @@ func TestSha1sum(t *testing.T) {
 		!strings.Contains(res, "                                          potato2\n") {
 		t.Errorf("potato2 missing: %q", res)
 	}
+
+	// Dropbox Hash Sum
+
+	err = fs.DropboxHashSum(r.fremote, &buf)
+	require.NoError(t, err)
+	res = buf.String()
+	if !strings.Contains(res, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855  empty space\n") &&
+		!strings.Contains(res, "                                                                     UNSUPPORTED  empty space\n") &&
+		!strings.Contains(res, "                                                                                  empty space\n") {
+		t.Errorf("empty space missing: %q", res)
+	}
+	if !strings.Contains(res, "a979481df794fed9c3990a6a422e0b1044ac802c15fab13af9c687f8bdbee01a  potato2\n") &&
+		!strings.Contains(res, "                                                                     UNSUPPORTED  potato2\n") &&
+		!strings.Contains(res, "                                                                                  potato2\n") {
+		t.Errorf("potato2 missing: %q", res)
+	}
 }
 
 func TestCount(t *testing.T) {