1
0
mirror of https://github.com/MontFerret/ferret.git synced 2024-12-12 11:15:14 +02:00

Fixed hashing

This commit is contained in:
Tim Voronov 2018-10-05 15:17:22 -04:00
parent 84f7f36b9e
commit 3cb811c636
21 changed files with 341 additions and 110 deletions

View File

@ -44,7 +44,7 @@ type Value interface {
String() string
Compare(other Value) int
Unwrap() interface{}
Hash() int
Hash() uint64
Clone() Value
}

View File

@ -70,10 +70,10 @@ func (e *ForExpression) Exec(ctx context.Context, scope *core.Scope) (core.Value
}
// Hash map for a check for uniqueness
var hashes map[int]bool
var hashes map[uint64]bool
if e.distinct {
hashes = make(map[int]bool)
hashes = make(map[uint64]bool)
}
res := values.NewArray(10)

View File

@ -1,10 +1,11 @@
package values
import (
"crypto/sha512"
"encoding/binary"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
)
type (
@ -77,22 +78,29 @@ func (t *Array) Unwrap() interface{} {
return arr
}
func (t *Array) Hash() int {
bytes, err := t.MarshalJSON()
func (t *Array) Hash() uint64 {
h := fnv.New64a()
if err != nil {
return 0
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte("["))
endIndex := len(t.value) - 1
for i, el := range t.value {
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, el.Hash())
h.Write(bytes)
if i != endIndex {
h.Write([]byte(","))
}
}
h := sha512.New()
h.Write([]byte("]"))
out, err := h.Write(bytes)
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (t *Array) Clone() core.Value {

View File

@ -119,6 +119,45 @@ func TestArray(t *testing.T) {
})
})
Convey(".Hash", t, func() {
Convey("It should calculate hash of non-empty array", func() {
arr := values.NewArrayWith(
values.NewInt(1),
values.NewInt(2),
values.NewInt(3),
)
h := arr.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("It should calculate hash of empty array", func() {
arr := values.NewArrayWith()
h := arr.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
arr := values.NewArrayWith(
values.True,
values.NewInt(1),
values.NewFloat(1.1),
values.NewString("foobar"),
values.NewCurrentDateTime(),
values.NewArrayWith(values.NewInt(1), values.True),
values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar"))),
)
h1 := arr.Hash()
h2 := arr.Hash()
So(h1, ShouldEqual, h2)
})
})
Convey(".Length", t, func() {
Convey("Should return 0 when empty", func() {
arr := values.NewArray(1)

View File

@ -1,9 +1,9 @@
package values
import (
"crypto/sha512"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"hash/fnv"
"io"
"io/ioutil"
)
@ -62,16 +62,14 @@ func (b *Binary) Unwrap() interface{} {
return b.values
}
func (b *Binary) Hash() int {
h := sha512.New()
func (b *Binary) Hash() uint64 {
h := fnv.New64a()
out, err := h.Write(b.values)
h.Write([]byte(b.Type().String()))
h.Write([]byte(":"))
h.Write(b.values)
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (b *Binary) Clone() core.Value {

View File

@ -1,10 +1,10 @@
package values
import (
"crypto/sha512"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
"strings"
)
@ -98,16 +98,14 @@ func (t Boolean) Unwrap() interface{} {
return bool(t)
}
func (t Boolean) Hash() int {
h := sha512.New()
func (t Boolean) Hash() uint64 {
h := fnv.New64a()
out, err := h.Write([]byte(t.String()))
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(t.String()))
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (t Boolean) Clone() core.Value {

View File

@ -70,4 +70,16 @@ func TestBoolean(t *testing.T) {
So(values.False.Compare(values.True), ShouldEqual, -1)
})
})
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
So(values.True.Hash(), ShouldBeGreaterThan, 0)
So(values.False.Hash(), ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
So(values.True.Hash(), ShouldEqual, values.True.Hash())
So(values.False.Hash(), ShouldEqual, values.False.Hash())
})
})
}

View File

@ -1,8 +1,8 @@
package values
import (
"crypto/sha512"
"github.com/MontFerret/ferret/pkg/runtime/core"
"hash/fnv"
"time"
)
@ -12,6 +12,10 @@ type DateTime struct {
time.Time
}
func NewCurrentDateTime() DateTime {
return DateTime{time.Now()}
}
func NewDateTime(time time.Time) DateTime {
return DateTime{time}
}
@ -84,18 +88,21 @@ func (t DateTime) Unwrap() interface{} {
return t.Time
}
func (t DateTime) Hash() int {
h := sha512.New()
func (t DateTime) Hash() uint64 {
h := fnv.New64a()
t.Time.MarshalJSON()
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
out, err := h.Write([]byte(t.Time.String()))
bytes, err := t.Time.GobEncode()
if err != nil {
return 0
}
return out
h.Write(bytes)
return h.Sum64()
}
func (t DateTime) Clone() core.Value {

View File

@ -0,0 +1,25 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestDateTime(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
d := values.NewCurrentDateTime()
h := d.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
d := values.NewCurrentDateTime()
So(d.Hash(), ShouldEqual, d.Hash())
})
})
}

View File

@ -1,11 +1,13 @@
package values
import (
"crypto/sha512"
"encoding/binary"
"encoding/json"
"fmt"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
"math"
"strconv"
)
@ -112,22 +114,17 @@ func (t Float) Unwrap() interface{} {
return float64(t)
}
func (t Float) Hash() int {
bytes, err := t.MarshalJSON()
func (t Float) Hash() uint64 {
h := fnv.New64a()
if err != nil {
return 0
}
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h := sha512.New()
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, math.Float64bits(float64(t)))
h.Write(bytes)
out, err := h.Write(bytes)
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (t Float) Clone() core.Value {

View File

@ -0,0 +1,29 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestFloat(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
v := values.NewFloat(1.1)
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
v2 := values.NewFloat(1.2)
So(h, ShouldNotEqual, v2.Hash())
})
Convey("Hash sum should be consistent", func() {
v := values.NewFloat(1.1)
So(v.Hash(), ShouldEqual, v.Hash())
})
})
}

View File

@ -1,9 +1,11 @@
package values
import (
"encoding/binary"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
"strconv"
)
@ -110,8 +112,17 @@ func (t Int) Unwrap() interface{} {
return int(t)
}
func (t Int) Hash() int {
return int(t)
func (t Int) Hash() uint64 {
h := fnv.New64a()
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, uint64(t))
h.Write(bytes)
return h.Sum64()
}
func (t Int) Clone() core.Value {

View File

@ -0,0 +1,29 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestInt(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
v := values.NewInt(1)
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
v2 := values.NewInt(2)
So(h, ShouldNotEqual, v2.Hash())
})
Convey("Hash sum should be consistent", func() {
v := values.NewInt(1)
So(v.Hash(), ShouldEqual, v.Hash())
})
})
}

View File

@ -33,7 +33,7 @@ func (t *none) Unwrap() interface{} {
return nil
}
func (t *none) Hash() int {
func (t *none) Hash() uint64 {
return 0
}

View File

@ -1,15 +1,17 @@
package values
import (
"crypto/sha512"
"encoding/binary"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"hash/fnv"
"sort"
)
type (
ObjectPredicate = func(value core.Value, key string) bool
ObjectProperty struct {
name string
key string
value core.Value
}
Object struct {
@ -29,7 +31,7 @@ func NewObjectWith(props ...*ObjectProperty) *Object {
obj := NewObject()
for _, prop := range props {
obj.value[prop.name] = prop.value
obj.value[prop.key] = prop.value
}
return obj
@ -88,22 +90,43 @@ func (t *Object) Unwrap() interface{} {
return obj
}
func (t *Object) Hash() int {
bytes, err := t.MarshalJSON()
func (t *Object) Hash() uint64 {
h := fnv.New64a()
if err != nil {
return 0
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte("{"))
keys := make([]string, 0, len(t.value))
for key := range t.value {
keys = append(keys, key)
}
h := sha512.New()
// order does not really matter
// but it will give us a consistent hash sum
sort.Strings(keys)
endIndex := len(keys) - 1
out, err := h.Write(bytes)
for idx, key := range keys {
h.Write([]byte(key))
h.Write([]byte(":"))
if err != nil {
return 0
el := t.value[key]
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, el.Hash())
h.Write(bytes)
if idx != endIndex {
h.Write([]byte(","))
}
}
return out
h.Write([]byte("}"))
return h.Sum64()
}
func (t *Object) Clone() core.Value {

View File

@ -142,6 +142,45 @@ func TestObject(t *testing.T) {
})
})
Convey(".Hash", t, func() {
Convey("It should calculate hash of non-empty object", func() {
v := values.NewObjectWith(
values.NewObjectProperty("foo", values.NewString("bar")),
values.NewObjectProperty("faz", values.NewInt(1)),
values.NewObjectProperty("qaz", values.True),
)
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("It should calculate hash of empty object", func() {
v := values.NewObject()
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
v := values.NewObjectWith(
values.NewObjectProperty("boolean", values.True),
values.NewObjectProperty("int", values.NewInt(1)),
values.NewObjectProperty("float", values.NewFloat(1.1)),
values.NewObjectProperty("string", values.NewString("foobar")),
values.NewObjectProperty("datetime", values.NewCurrentDateTime()),
values.NewObjectProperty("array", values.NewArrayWith(values.NewInt(1), values.True)),
values.NewObjectProperty("object", values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar")))),
)
h1 := v.Hash()
h2 := v.Hash()
So(h1, ShouldEqual, h2)
})
})
Convey(".Length", t, func() {
Convey("Should return 0 when empty", func() {
obj := values.NewObject()

View File

@ -1,11 +1,11 @@
package values
import (
"crypto/sha512"
"encoding/json"
"fmt"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
"strings"
)
@ -93,16 +93,14 @@ func (t String) Unwrap() interface{} {
return string(t)
}
func (t String) Hash() int {
h := sha512.New()
func (t String) Hash() uint64 {
h := fnv.New64a()
out, err := h.Write([]byte(t))
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(t))
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (t String) Clone() core.Value {

View File

@ -0,0 +1,29 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestString(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
v := values.NewString("a")
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
v2 := values.NewString("b")
So(h, ShouldNotEqual, v2.Hash())
})
Convey("Hash sum should be consistent", func() {
v := values.NewString("foobar")
So(v.Hash(), ShouldEqual, v.Hash())
})
})
}

View File

@ -2,7 +2,6 @@ package dynamic
import (
"context"
"crypto/sha512"
"fmt"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/logging"
@ -17,6 +16,7 @@ import (
"github.com/mafredri/cdp/rpcc"
"github.com/pkg/errors"
"github.com/rs/zerolog"
"hash/fnv"
"sync"
"time"
)
@ -205,19 +205,17 @@ func (doc *HtmlDocument) Unwrap() interface{} {
return doc.element
}
func (doc *HtmlDocument) Hash() int {
func (doc *HtmlDocument) Hash() uint64 {
doc.Lock()
defer doc.Unlock()
h := sha512.New()
h := fnv.New64a()
out, err := h.Write([]byte(doc.url))
h.Write([]byte(doc.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(doc.url))
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (doc *HtmlDocument) Clone() core.Value {

View File

@ -3,7 +3,6 @@ package dynamic
import (
"bytes"
"context"
"crypto/sha512"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
@ -14,6 +13,7 @@ import (
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/rs/zerolog"
"hash/fnv"
"strconv"
"sync"
"time"
@ -188,24 +188,17 @@ func (el *HtmlElement) Unwrap() interface{} {
return el
}
func (el *HtmlElement) Hash() int {
func (el *HtmlElement) Hash() uint64 {
el.Lock()
defer el.Unlock()
h := sha512.New()
h := fnv.New64a()
out, err := h.Write([]byte(el.innerHtml))
h.Write([]byte(el.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(el.innerHtml))
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Msg("failed to calculate hash value")
return 0
}
return out
return h.Sum64()
}
func (el *HtmlElement) Value() core.Value {

View File

@ -1,12 +1,12 @@
package static
import (
"crypto/sha512"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
"github.com/PuerkitoBio/goquery"
"hash/fnv"
)
type HtmlElement struct {
@ -53,22 +53,20 @@ func (el *HtmlElement) Unwrap() interface{} {
return el.selection
}
func (el *HtmlElement) Hash() int {
h := sha512.New()
func (el *HtmlElement) Hash() uint64 {
str, err := el.selection.Html()
if err != nil {
return 0
}
out, err := h.Write([]byte(str))
h := fnv.New64a()
if err != nil {
return 0
}
h.Write([]byte(el.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(str))
return out
return h.Sum64()
}
func (el *HtmlElement) Clone() core.Value {