From 3cb811c63641671aa02e50b12f9943be3f112084 Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Fri, 5 Oct 2018 15:17:22 -0400 Subject: [PATCH] Fixed hashing --- pkg/runtime/core/value.go | 2 +- pkg/runtime/expressions/for.go | 4 +- pkg/runtime/values/array.go | 34 ++++++++++------ pkg/runtime/values/array_test.go | 39 ++++++++++++++++++ pkg/runtime/values/binary.go | 16 ++++---- pkg/runtime/values/boolean.go | 16 ++++---- pkg/runtime/values/boolean_test.go | 12 ++++++ pkg/runtime/values/date_time.go | 19 ++++++--- pkg/runtime/values/date_time_test.go | 25 ++++++++++++ pkg/runtime/values/float.go | 25 +++++------- pkg/runtime/values/float_test.go | 29 +++++++++++++ pkg/runtime/values/int.go | 15 ++++++- pkg/runtime/values/int_test.go | 29 +++++++++++++ pkg/runtime/values/none.go | 2 +- pkg/runtime/values/object.go | 47 ++++++++++++++++------ pkg/runtime/values/object_test.go | 39 ++++++++++++++++++ pkg/runtime/values/string.go | 16 ++++---- pkg/runtime/values/string_test.go | 29 +++++++++++++ pkg/stdlib/html/driver/dynamic/document.go | 16 ++++---- pkg/stdlib/html/driver/dynamic/element.go | 21 ++++------ pkg/stdlib/html/driver/static/element.go | 16 ++++---- 21 files changed, 341 insertions(+), 110 deletions(-) create mode 100644 pkg/runtime/values/date_time_test.go create mode 100644 pkg/runtime/values/float_test.go create mode 100644 pkg/runtime/values/int_test.go create mode 100644 pkg/runtime/values/string_test.go diff --git a/pkg/runtime/core/value.go b/pkg/runtime/core/value.go index ca315263..1383a492 100644 --- a/pkg/runtime/core/value.go +++ b/pkg/runtime/core/value.go @@ -44,7 +44,7 @@ type Value interface { String() string Compare(other Value) int Unwrap() interface{} - Hash() int + Hash() uint64 Clone() Value } diff --git a/pkg/runtime/expressions/for.go b/pkg/runtime/expressions/for.go index 5f51685e..d01d1aff 100644 --- a/pkg/runtime/expressions/for.go +++ b/pkg/runtime/expressions/for.go @@ -70,10 +70,10 @@ func (e *ForExpression) Exec(ctx context.Context, scope *core.Scope) (core.Value } // Hash map for a check for uniqueness - var hashes map[int]bool + var hashes map[uint64]bool if e.distinct { - hashes = make(map[int]bool) + hashes = make(map[uint64]bool) } res := values.NewArray(10) diff --git a/pkg/runtime/values/array.go b/pkg/runtime/values/array.go index 1933e6d7..612aa829 100644 --- a/pkg/runtime/values/array.go +++ b/pkg/runtime/values/array.go @@ -1,10 +1,11 @@ package values import ( - "crypto/sha512" + "encoding/binary" "encoding/json" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/pkg/errors" + "hash/fnv" ) type ( @@ -77,22 +78,29 @@ func (t *Array) Unwrap() interface{} { return arr } -func (t *Array) Hash() int { - bytes, err := t.MarshalJSON() +func (t *Array) Hash() uint64 { + h := fnv.New64a() - if err != nil { - return 0 + h.Write([]byte(t.Type().String())) + h.Write([]byte(":")) + h.Write([]byte("[")) + + endIndex := len(t.value) - 1 + + for i, el := range t.value { + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, el.Hash()) + + h.Write(bytes) + + if i != endIndex { + h.Write([]byte(",")) + } } - h := sha512.New() + h.Write([]byte("]")) - out, err := h.Write(bytes) - - if err != nil { - return 0 - } - - return out + return h.Sum64() } func (t *Array) Clone() core.Value { diff --git a/pkg/runtime/values/array_test.go b/pkg/runtime/values/array_test.go index e1467ef7..0de60c28 100644 --- a/pkg/runtime/values/array_test.go +++ b/pkg/runtime/values/array_test.go @@ -119,6 +119,45 @@ func TestArray(t *testing.T) { }) }) + Convey(".Hash", t, func() { + Convey("It should calculate hash of non-empty array", func() { + arr := values.NewArrayWith( + values.NewInt(1), + values.NewInt(2), + values.NewInt(3), + ) + + h := arr.Hash() + + So(h, ShouldBeGreaterThan, 0) + }) + + Convey("It should calculate hash of empty array", func() { + arr := values.NewArrayWith() + + h := arr.Hash() + + So(h, ShouldBeGreaterThan, 0) + }) + + Convey("Hash sum should be consistent", func() { + arr := values.NewArrayWith( + values.True, + values.NewInt(1), + values.NewFloat(1.1), + values.NewString("foobar"), + values.NewCurrentDateTime(), + values.NewArrayWith(values.NewInt(1), values.True), + values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar"))), + ) + + h1 := arr.Hash() + h2 := arr.Hash() + + So(h1, ShouldEqual, h2) + }) + }) + Convey(".Length", t, func() { Convey("Should return 0 when empty", func() { arr := values.NewArray(1) diff --git a/pkg/runtime/values/binary.go b/pkg/runtime/values/binary.go index c51039af..41baa822 100644 --- a/pkg/runtime/values/binary.go +++ b/pkg/runtime/values/binary.go @@ -1,9 +1,9 @@ package values import ( - "crypto/sha512" "encoding/json" "github.com/MontFerret/ferret/pkg/runtime/core" + "hash/fnv" "io" "io/ioutil" ) @@ -62,16 +62,14 @@ func (b *Binary) Unwrap() interface{} { return b.values } -func (b *Binary) Hash() int { - h := sha512.New() +func (b *Binary) Hash() uint64 { + h := fnv.New64a() - out, err := h.Write(b.values) + h.Write([]byte(b.Type().String())) + h.Write([]byte(":")) + h.Write(b.values) - if err != nil { - return 0 - } - - return out + return h.Sum64() } func (b *Binary) Clone() core.Value { diff --git a/pkg/runtime/values/boolean.go b/pkg/runtime/values/boolean.go index e2746fbc..62fd2fc3 100644 --- a/pkg/runtime/values/boolean.go +++ b/pkg/runtime/values/boolean.go @@ -1,10 +1,10 @@ package values import ( - "crypto/sha512" "encoding/json" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/pkg/errors" + "hash/fnv" "strings" ) @@ -98,16 +98,14 @@ func (t Boolean) Unwrap() interface{} { return bool(t) } -func (t Boolean) Hash() int { - h := sha512.New() +func (t Boolean) Hash() uint64 { + h := fnv.New64a() - out, err := h.Write([]byte(t.String())) + h.Write([]byte(t.Type().String())) + h.Write([]byte(":")) + h.Write([]byte(t.String())) - if err != nil { - return 0 - } - - return out + return h.Sum64() } func (t Boolean) Clone() core.Value { diff --git a/pkg/runtime/values/boolean_test.go b/pkg/runtime/values/boolean_test.go index f37e1c4b..955063ce 100644 --- a/pkg/runtime/values/boolean_test.go +++ b/pkg/runtime/values/boolean_test.go @@ -70,4 +70,16 @@ func TestBoolean(t *testing.T) { So(values.False.Compare(values.True), ShouldEqual, -1) }) }) + + Convey(".Hash", t, func() { + Convey("It should calculate hash", func() { + So(values.True.Hash(), ShouldBeGreaterThan, 0) + So(values.False.Hash(), ShouldBeGreaterThan, 0) + }) + + Convey("Hash sum should be consistent", func() { + So(values.True.Hash(), ShouldEqual, values.True.Hash()) + So(values.False.Hash(), ShouldEqual, values.False.Hash()) + }) + }) } diff --git a/pkg/runtime/values/date_time.go b/pkg/runtime/values/date_time.go index c047aaf6..c36ca138 100644 --- a/pkg/runtime/values/date_time.go +++ b/pkg/runtime/values/date_time.go @@ -1,8 +1,8 @@ package values import ( - "crypto/sha512" "github.com/MontFerret/ferret/pkg/runtime/core" + "hash/fnv" "time" ) @@ -12,6 +12,10 @@ type DateTime struct { time.Time } +func NewCurrentDateTime() DateTime { + return DateTime{time.Now()} +} + func NewDateTime(time time.Time) DateTime { return DateTime{time} } @@ -84,18 +88,21 @@ func (t DateTime) Unwrap() interface{} { return t.Time } -func (t DateTime) Hash() int { - h := sha512.New() +func (t DateTime) Hash() uint64 { + h := fnv.New64a() - t.Time.MarshalJSON() + h.Write([]byte(t.Type().String())) + h.Write([]byte(":")) - out, err := h.Write([]byte(t.Time.String())) + bytes, err := t.Time.GobEncode() if err != nil { return 0 } - return out + h.Write(bytes) + + return h.Sum64() } func (t DateTime) Clone() core.Value { diff --git a/pkg/runtime/values/date_time_test.go b/pkg/runtime/values/date_time_test.go new file mode 100644 index 00000000..24bc5cd9 --- /dev/null +++ b/pkg/runtime/values/date_time_test.go @@ -0,0 +1,25 @@ +package values_test + +import ( + "github.com/MontFerret/ferret/pkg/runtime/values" + . "github.com/smartystreets/goconvey/convey" + "testing" +) + +func TestDateTime(t *testing.T) { + Convey(".Hash", t, func() { + Convey("It should calculate hash", func() { + d := values.NewCurrentDateTime() + + h := d.Hash() + + So(h, ShouldBeGreaterThan, 0) + }) + + Convey("Hash sum should be consistent", func() { + d := values.NewCurrentDateTime() + + So(d.Hash(), ShouldEqual, d.Hash()) + }) + }) +} diff --git a/pkg/runtime/values/float.go b/pkg/runtime/values/float.go index 3c7c3026..99cfa010 100644 --- a/pkg/runtime/values/float.go +++ b/pkg/runtime/values/float.go @@ -1,11 +1,13 @@ package values import ( - "crypto/sha512" + "encoding/binary" "encoding/json" "fmt" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/pkg/errors" + "hash/fnv" + "math" "strconv" ) @@ -112,22 +114,17 @@ func (t Float) Unwrap() interface{} { return float64(t) } -func (t Float) Hash() int { - bytes, err := t.MarshalJSON() +func (t Float) Hash() uint64 { + h := fnv.New64a() - if err != nil { - return 0 - } + h.Write([]byte(t.Type().String())) + h.Write([]byte(":")) - h := sha512.New() + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, math.Float64bits(float64(t))) + h.Write(bytes) - out, err := h.Write(bytes) - - if err != nil { - return 0 - } - - return out + return h.Sum64() } func (t Float) Clone() core.Value { diff --git a/pkg/runtime/values/float_test.go b/pkg/runtime/values/float_test.go new file mode 100644 index 00000000..fa038f99 --- /dev/null +++ b/pkg/runtime/values/float_test.go @@ -0,0 +1,29 @@ +package values_test + +import ( + "github.com/MontFerret/ferret/pkg/runtime/values" + . "github.com/smartystreets/goconvey/convey" + "testing" +) + +func TestFloat(t *testing.T) { + Convey(".Hash", t, func() { + Convey("It should calculate hash", func() { + v := values.NewFloat(1.1) + + h := v.Hash() + + So(h, ShouldBeGreaterThan, 0) + + v2 := values.NewFloat(1.2) + + So(h, ShouldNotEqual, v2.Hash()) + }) + + Convey("Hash sum should be consistent", func() { + v := values.NewFloat(1.1) + + So(v.Hash(), ShouldEqual, v.Hash()) + }) + }) +} diff --git a/pkg/runtime/values/int.go b/pkg/runtime/values/int.go index 4dc596c2..0b4a4181 100644 --- a/pkg/runtime/values/int.go +++ b/pkg/runtime/values/int.go @@ -1,9 +1,11 @@ package values import ( + "encoding/binary" "encoding/json" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/pkg/errors" + "hash/fnv" "strconv" ) @@ -110,8 +112,17 @@ func (t Int) Unwrap() interface{} { return int(t) } -func (t Int) Hash() int { - return int(t) +func (t Int) Hash() uint64 { + h := fnv.New64a() + + h.Write([]byte(t.Type().String())) + h.Write([]byte(":")) + + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, uint64(t)) + h.Write(bytes) + + return h.Sum64() } func (t Int) Clone() core.Value { diff --git a/pkg/runtime/values/int_test.go b/pkg/runtime/values/int_test.go new file mode 100644 index 00000000..91fd9772 --- /dev/null +++ b/pkg/runtime/values/int_test.go @@ -0,0 +1,29 @@ +package values_test + +import ( + "github.com/MontFerret/ferret/pkg/runtime/values" + . "github.com/smartystreets/goconvey/convey" + "testing" +) + +func TestInt(t *testing.T) { + Convey(".Hash", t, func() { + Convey("It should calculate hash", func() { + v := values.NewInt(1) + + h := v.Hash() + + So(h, ShouldBeGreaterThan, 0) + + v2 := values.NewInt(2) + + So(h, ShouldNotEqual, v2.Hash()) + }) + + Convey("Hash sum should be consistent", func() { + v := values.NewInt(1) + + So(v.Hash(), ShouldEqual, v.Hash()) + }) + }) +} diff --git a/pkg/runtime/values/none.go b/pkg/runtime/values/none.go index 8f54f1d4..b82df8dc 100644 --- a/pkg/runtime/values/none.go +++ b/pkg/runtime/values/none.go @@ -33,7 +33,7 @@ func (t *none) Unwrap() interface{} { return nil } -func (t *none) Hash() int { +func (t *none) Hash() uint64 { return 0 } diff --git a/pkg/runtime/values/object.go b/pkg/runtime/values/object.go index 59ce0a98..7b5518ba 100644 --- a/pkg/runtime/values/object.go +++ b/pkg/runtime/values/object.go @@ -1,15 +1,17 @@ package values import ( - "crypto/sha512" + "encoding/binary" "encoding/json" "github.com/MontFerret/ferret/pkg/runtime/core" + "hash/fnv" + "sort" ) type ( ObjectPredicate = func(value core.Value, key string) bool ObjectProperty struct { - name string + key string value core.Value } Object struct { @@ -29,7 +31,7 @@ func NewObjectWith(props ...*ObjectProperty) *Object { obj := NewObject() for _, prop := range props { - obj.value[prop.name] = prop.value + obj.value[prop.key] = prop.value } return obj @@ -88,22 +90,43 @@ func (t *Object) Unwrap() interface{} { return obj } -func (t *Object) Hash() int { - bytes, err := t.MarshalJSON() +func (t *Object) Hash() uint64 { + h := fnv.New64a() - if err != nil { - return 0 + h.Write([]byte(t.Type().String())) + h.Write([]byte(":")) + h.Write([]byte("{")) + + keys := make([]string, 0, len(t.value)) + + for key := range t.value { + keys = append(keys, key) } - h := sha512.New() + // order does not really matter + // but it will give us a consistent hash sum + sort.Strings(keys) + endIndex := len(keys) - 1 - out, err := h.Write(bytes) + for idx, key := range keys { + h.Write([]byte(key)) + h.Write([]byte(":")) - if err != nil { - return 0 + el := t.value[key] + + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, el.Hash()) + + h.Write(bytes) + + if idx != endIndex { + h.Write([]byte(",")) + } } - return out + h.Write([]byte("}")) + + return h.Sum64() } func (t *Object) Clone() core.Value { diff --git a/pkg/runtime/values/object_test.go b/pkg/runtime/values/object_test.go index b022cf74..67d600cb 100644 --- a/pkg/runtime/values/object_test.go +++ b/pkg/runtime/values/object_test.go @@ -142,6 +142,45 @@ func TestObject(t *testing.T) { }) }) + Convey(".Hash", t, func() { + Convey("It should calculate hash of non-empty object", func() { + v := values.NewObjectWith( + values.NewObjectProperty("foo", values.NewString("bar")), + values.NewObjectProperty("faz", values.NewInt(1)), + values.NewObjectProperty("qaz", values.True), + ) + + h := v.Hash() + + So(h, ShouldBeGreaterThan, 0) + }) + + Convey("It should calculate hash of empty object", func() { + v := values.NewObject() + + h := v.Hash() + + So(h, ShouldBeGreaterThan, 0) + }) + + Convey("Hash sum should be consistent", func() { + v := values.NewObjectWith( + values.NewObjectProperty("boolean", values.True), + values.NewObjectProperty("int", values.NewInt(1)), + values.NewObjectProperty("float", values.NewFloat(1.1)), + values.NewObjectProperty("string", values.NewString("foobar")), + values.NewObjectProperty("datetime", values.NewCurrentDateTime()), + values.NewObjectProperty("array", values.NewArrayWith(values.NewInt(1), values.True)), + values.NewObjectProperty("object", values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar")))), + ) + + h1 := v.Hash() + h2 := v.Hash() + + So(h1, ShouldEqual, h2) + }) + }) + Convey(".Length", t, func() { Convey("Should return 0 when empty", func() { obj := values.NewObject() diff --git a/pkg/runtime/values/string.go b/pkg/runtime/values/string.go index 64b3fbed..e0749f02 100644 --- a/pkg/runtime/values/string.go +++ b/pkg/runtime/values/string.go @@ -1,11 +1,11 @@ package values import ( - "crypto/sha512" "encoding/json" "fmt" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/pkg/errors" + "hash/fnv" "strings" ) @@ -93,16 +93,14 @@ func (t String) Unwrap() interface{} { return string(t) } -func (t String) Hash() int { - h := sha512.New() +func (t String) Hash() uint64 { + h := fnv.New64a() - out, err := h.Write([]byte(t)) + h.Write([]byte(t.Type().String())) + h.Write([]byte(":")) + h.Write([]byte(t)) - if err != nil { - return 0 - } - - return out + return h.Sum64() } func (t String) Clone() core.Value { diff --git a/pkg/runtime/values/string_test.go b/pkg/runtime/values/string_test.go new file mode 100644 index 00000000..59a81790 --- /dev/null +++ b/pkg/runtime/values/string_test.go @@ -0,0 +1,29 @@ +package values_test + +import ( + "github.com/MontFerret/ferret/pkg/runtime/values" + . "github.com/smartystreets/goconvey/convey" + "testing" +) + +func TestString(t *testing.T) { + Convey(".Hash", t, func() { + Convey("It should calculate hash", func() { + v := values.NewString("a") + + h := v.Hash() + + So(h, ShouldBeGreaterThan, 0) + + v2 := values.NewString("b") + + So(h, ShouldNotEqual, v2.Hash()) + }) + + Convey("Hash sum should be consistent", func() { + v := values.NewString("foobar") + + So(v.Hash(), ShouldEqual, v.Hash()) + }) + }) +} diff --git a/pkg/stdlib/html/driver/dynamic/document.go b/pkg/stdlib/html/driver/dynamic/document.go index a7cad913..aa7e373d 100644 --- a/pkg/stdlib/html/driver/dynamic/document.go +++ b/pkg/stdlib/html/driver/dynamic/document.go @@ -2,7 +2,6 @@ package dynamic import ( "context" - "crypto/sha512" "fmt" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/logging" @@ -17,6 +16,7 @@ import ( "github.com/mafredri/cdp/rpcc" "github.com/pkg/errors" "github.com/rs/zerolog" + "hash/fnv" "sync" "time" ) @@ -205,19 +205,17 @@ func (doc *HtmlDocument) Unwrap() interface{} { return doc.element } -func (doc *HtmlDocument) Hash() int { +func (doc *HtmlDocument) Hash() uint64 { doc.Lock() defer doc.Unlock() - h := sha512.New() + h := fnv.New64a() - out, err := h.Write([]byte(doc.url)) + h.Write([]byte(doc.Type().String())) + h.Write([]byte(":")) + h.Write([]byte(doc.url)) - if err != nil { - return 0 - } - - return out + return h.Sum64() } func (doc *HtmlDocument) Clone() core.Value { diff --git a/pkg/stdlib/html/driver/dynamic/element.go b/pkg/stdlib/html/driver/dynamic/element.go index 23895a7d..5da1758f 100644 --- a/pkg/stdlib/html/driver/dynamic/element.go +++ b/pkg/stdlib/html/driver/dynamic/element.go @@ -3,7 +3,6 @@ package dynamic import ( "bytes" "context" - "crypto/sha512" "encoding/json" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" @@ -14,6 +13,7 @@ import ( "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/dom" "github.com/rs/zerolog" + "hash/fnv" "strconv" "sync" "time" @@ -188,24 +188,17 @@ func (el *HtmlElement) Unwrap() interface{} { return el } -func (el *HtmlElement) Hash() int { +func (el *HtmlElement) Hash() uint64 { el.Lock() defer el.Unlock() - h := sha512.New() + h := fnv.New64a() - out, err := h.Write([]byte(el.innerHtml)) + h.Write([]byte(el.Type().String())) + h.Write([]byte(":")) + h.Write([]byte(el.innerHtml)) - if err != nil { - el.logger.Error(). - Timestamp(). - Err(err). - Msg("failed to calculate hash value") - - return 0 - } - - return out + return h.Sum64() } func (el *HtmlElement) Value() core.Value { diff --git a/pkg/stdlib/html/driver/static/element.go b/pkg/stdlib/html/driver/static/element.go index ed696f18..3307aa30 100644 --- a/pkg/stdlib/html/driver/static/element.go +++ b/pkg/stdlib/html/driver/static/element.go @@ -1,12 +1,12 @@ package static import ( - "crypto/sha512" "encoding/json" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/common" "github.com/PuerkitoBio/goquery" + "hash/fnv" ) type HtmlElement struct { @@ -53,22 +53,20 @@ func (el *HtmlElement) Unwrap() interface{} { return el.selection } -func (el *HtmlElement) Hash() int { - h := sha512.New() - +func (el *HtmlElement) Hash() uint64 { str, err := el.selection.Html() if err != nil { return 0 } - out, err := h.Write([]byte(str)) + h := fnv.New64a() - if err != nil { - return 0 - } + h.Write([]byte(el.Type().String())) + h.Write([]byte(":")) + h.Write([]byte(str)) - return out + return h.Sum64() } func (el *HtmlElement) Clone() core.Value {