1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-08-15 20:02:56 +02:00

Merge pull request #51 from MontFerret/bug/invalid-hashing

Fixed hashing
This commit is contained in:
Tim Voronov
2018-10-05 15:47:18 -04:00
committed by GitHub
21 changed files with 341 additions and 110 deletions

View File

@@ -44,7 +44,7 @@ type Value interface {
String() string String() string
Compare(other Value) int Compare(other Value) int
Unwrap() interface{} Unwrap() interface{}
Hash() int Hash() uint64
Clone() Value Clone() Value
} }

View File

@@ -70,10 +70,10 @@ func (e *ForExpression) Exec(ctx context.Context, scope *core.Scope) (core.Value
} }
// Hash map for a check for uniqueness // Hash map for a check for uniqueness
var hashes map[int]bool var hashes map[uint64]bool
if e.distinct { if e.distinct {
hashes = make(map[int]bool) hashes = make(map[uint64]bool)
} }
res := values.NewArray(10) res := values.NewArray(10)

View File

@@ -1,10 +1,11 @@
package values package values
import ( import (
"crypto/sha512" "encoding/binary"
"encoding/json" "encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors" "github.com/pkg/errors"
"hash/fnv"
) )
type ( type (
@@ -77,22 +78,29 @@ func (t *Array) Unwrap() interface{} {
return arr return arr
} }
func (t *Array) Hash() int { func (t *Array) Hash() uint64 {
bytes, err := t.MarshalJSON() h := fnv.New64a()
if err != nil { h.Write([]byte(t.Type().String()))
return 0 h.Write([]byte(":"))
h.Write([]byte("["))
endIndex := len(t.value) - 1
for i, el := range t.value {
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, el.Hash())
h.Write(bytes)
if i != endIndex {
h.Write([]byte(","))
}
} }
h := sha512.New() h.Write([]byte("]"))
out, err := h.Write(bytes) return h.Sum64()
if err != nil {
return 0
}
return out
} }
func (t *Array) Clone() core.Value { func (t *Array) Clone() core.Value {

View File

@@ -119,6 +119,45 @@ func TestArray(t *testing.T) {
}) })
}) })
Convey(".Hash", t, func() {
Convey("It should calculate hash of non-empty array", func() {
arr := values.NewArrayWith(
values.NewInt(1),
values.NewInt(2),
values.NewInt(3),
)
h := arr.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("It should calculate hash of empty array", func() {
arr := values.NewArrayWith()
h := arr.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
arr := values.NewArrayWith(
values.True,
values.NewInt(1),
values.NewFloat(1.1),
values.NewString("foobar"),
values.NewCurrentDateTime(),
values.NewArrayWith(values.NewInt(1), values.True),
values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar"))),
)
h1 := arr.Hash()
h2 := arr.Hash()
So(h1, ShouldEqual, h2)
})
})
Convey(".Length", t, func() { Convey(".Length", t, func() {
Convey("Should return 0 when empty", func() { Convey("Should return 0 when empty", func() {
arr := values.NewArray(1) arr := values.NewArray(1)

View File

@@ -1,9 +1,9 @@
package values package values
import ( import (
"crypto/sha512"
"encoding/json" "encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"hash/fnv"
"io" "io"
"io/ioutil" "io/ioutil"
) )
@@ -62,16 +62,14 @@ func (b *Binary) Unwrap() interface{} {
return b.values return b.values
} }
func (b *Binary) Hash() int { func (b *Binary) Hash() uint64 {
h := sha512.New() h := fnv.New64a()
out, err := h.Write(b.values) h.Write([]byte(b.Type().String()))
h.Write([]byte(":"))
h.Write(b.values)
if err != nil { return h.Sum64()
return 0
}
return out
} }
func (b *Binary) Clone() core.Value { func (b *Binary) Clone() core.Value {

View File

@@ -1,10 +1,10 @@
package values package values
import ( import (
"crypto/sha512"
"encoding/json" "encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors" "github.com/pkg/errors"
"hash/fnv"
"strings" "strings"
) )
@@ -98,16 +98,14 @@ func (t Boolean) Unwrap() interface{} {
return bool(t) return bool(t)
} }
func (t Boolean) Hash() int { func (t Boolean) Hash() uint64 {
h := sha512.New() h := fnv.New64a()
out, err := h.Write([]byte(t.String())) h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(t.String()))
if err != nil { return h.Sum64()
return 0
}
return out
} }
func (t Boolean) Clone() core.Value { func (t Boolean) Clone() core.Value {

View File

@@ -70,4 +70,16 @@ func TestBoolean(t *testing.T) {
So(values.False.Compare(values.True), ShouldEqual, -1) So(values.False.Compare(values.True), ShouldEqual, -1)
}) })
}) })
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
So(values.True.Hash(), ShouldBeGreaterThan, 0)
So(values.False.Hash(), ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
So(values.True.Hash(), ShouldEqual, values.True.Hash())
So(values.False.Hash(), ShouldEqual, values.False.Hash())
})
})
} }

View File

@@ -1,8 +1,8 @@
package values package values
import ( import (
"crypto/sha512"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"hash/fnv"
"time" "time"
) )
@@ -12,6 +12,10 @@ type DateTime struct {
time.Time time.Time
} }
func NewCurrentDateTime() DateTime {
return DateTime{time.Now()}
}
func NewDateTime(time time.Time) DateTime { func NewDateTime(time time.Time) DateTime {
return DateTime{time} return DateTime{time}
} }
@@ -84,18 +88,21 @@ func (t DateTime) Unwrap() interface{} {
return t.Time return t.Time
} }
func (t DateTime) Hash() int { func (t DateTime) Hash() uint64 {
h := sha512.New() h := fnv.New64a()
t.Time.MarshalJSON() h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
out, err := h.Write([]byte(t.Time.String())) bytes, err := t.Time.GobEncode()
if err != nil { if err != nil {
return 0 return 0
} }
return out h.Write(bytes)
return h.Sum64()
} }
func (t DateTime) Clone() core.Value { func (t DateTime) Clone() core.Value {

View File

@@ -0,0 +1,25 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestDateTime(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
d := values.NewCurrentDateTime()
h := d.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
d := values.NewCurrentDateTime()
So(d.Hash(), ShouldEqual, d.Hash())
})
})
}

View File

@@ -1,11 +1,13 @@
package values package values
import ( import (
"crypto/sha512" "encoding/binary"
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors" "github.com/pkg/errors"
"hash/fnv"
"math"
"strconv" "strconv"
) )
@@ -112,22 +114,17 @@ func (t Float) Unwrap() interface{} {
return float64(t) return float64(t)
} }
func (t Float) Hash() int { func (t Float) Hash() uint64 {
bytes, err := t.MarshalJSON() h := fnv.New64a()
if err != nil { h.Write([]byte(t.Type().String()))
return 0 h.Write([]byte(":"))
}
h := sha512.New() bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, math.Float64bits(float64(t)))
h.Write(bytes)
out, err := h.Write(bytes) return h.Sum64()
if err != nil {
return 0
}
return out
} }
func (t Float) Clone() core.Value { func (t Float) Clone() core.Value {

View File

@@ -0,0 +1,29 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestFloat(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
v := values.NewFloat(1.1)
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
v2 := values.NewFloat(1.2)
So(h, ShouldNotEqual, v2.Hash())
})
Convey("Hash sum should be consistent", func() {
v := values.NewFloat(1.1)
So(v.Hash(), ShouldEqual, v.Hash())
})
})
}

View File

@@ -1,9 +1,11 @@
package values package values
import ( import (
"encoding/binary"
"encoding/json" "encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors" "github.com/pkg/errors"
"hash/fnv"
"strconv" "strconv"
) )
@@ -110,8 +112,17 @@ func (t Int) Unwrap() interface{} {
return int(t) return int(t)
} }
func (t Int) Hash() int { func (t Int) Hash() uint64 {
return int(t) h := fnv.New64a()
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, uint64(t))
h.Write(bytes)
return h.Sum64()
} }
func (t Int) Clone() core.Value { func (t Int) Clone() core.Value {

View File

@@ -0,0 +1,29 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestInt(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
v := values.NewInt(1)
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
v2 := values.NewInt(2)
So(h, ShouldNotEqual, v2.Hash())
})
Convey("Hash sum should be consistent", func() {
v := values.NewInt(1)
So(v.Hash(), ShouldEqual, v.Hash())
})
})
}

View File

@@ -33,7 +33,7 @@ func (t *none) Unwrap() interface{} {
return nil return nil
} }
func (t *none) Hash() int { func (t *none) Hash() uint64 {
return 0 return 0
} }

View File

@@ -1,15 +1,17 @@
package values package values
import ( import (
"crypto/sha512" "encoding/binary"
"encoding/json" "encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"hash/fnv"
"sort"
) )
type ( type (
ObjectPredicate = func(value core.Value, key string) bool ObjectPredicate = func(value core.Value, key string) bool
ObjectProperty struct { ObjectProperty struct {
name string key string
value core.Value value core.Value
} }
Object struct { Object struct {
@@ -29,7 +31,7 @@ func NewObjectWith(props ...*ObjectProperty) *Object {
obj := NewObject() obj := NewObject()
for _, prop := range props { for _, prop := range props {
obj.value[prop.name] = prop.value obj.value[prop.key] = prop.value
} }
return obj return obj
@@ -88,22 +90,43 @@ func (t *Object) Unwrap() interface{} {
return obj return obj
} }
func (t *Object) Hash() int { func (t *Object) Hash() uint64 {
bytes, err := t.MarshalJSON() h := fnv.New64a()
if err != nil { h.Write([]byte(t.Type().String()))
return 0 h.Write([]byte(":"))
h.Write([]byte("{"))
keys := make([]string, 0, len(t.value))
for key := range t.value {
keys = append(keys, key)
} }
h := sha512.New() // order does not really matter
// but it will give us a consistent hash sum
sort.Strings(keys)
endIndex := len(keys) - 1
out, err := h.Write(bytes) for idx, key := range keys {
h.Write([]byte(key))
h.Write([]byte(":"))
if err != nil { el := t.value[key]
return 0
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, el.Hash())
h.Write(bytes)
if idx != endIndex {
h.Write([]byte(","))
}
} }
return out h.Write([]byte("}"))
return h.Sum64()
} }
func (t *Object) Clone() core.Value { func (t *Object) Clone() core.Value {

View File

@@ -142,6 +142,45 @@ func TestObject(t *testing.T) {
}) })
}) })
Convey(".Hash", t, func() {
Convey("It should calculate hash of non-empty object", func() {
v := values.NewObjectWith(
values.NewObjectProperty("foo", values.NewString("bar")),
values.NewObjectProperty("faz", values.NewInt(1)),
values.NewObjectProperty("qaz", values.True),
)
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("It should calculate hash of empty object", func() {
v := values.NewObject()
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
v := values.NewObjectWith(
values.NewObjectProperty("boolean", values.True),
values.NewObjectProperty("int", values.NewInt(1)),
values.NewObjectProperty("float", values.NewFloat(1.1)),
values.NewObjectProperty("string", values.NewString("foobar")),
values.NewObjectProperty("datetime", values.NewCurrentDateTime()),
values.NewObjectProperty("array", values.NewArrayWith(values.NewInt(1), values.True)),
values.NewObjectProperty("object", values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar")))),
)
h1 := v.Hash()
h2 := v.Hash()
So(h1, ShouldEqual, h2)
})
})
Convey(".Length", t, func() { Convey(".Length", t, func() {
Convey("Should return 0 when empty", func() { Convey("Should return 0 when empty", func() {
obj := values.NewObject() obj := values.NewObject()

View File

@@ -1,11 +1,11 @@
package values package values
import ( import (
"crypto/sha512"
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors" "github.com/pkg/errors"
"hash/fnv"
"strings" "strings"
) )
@@ -93,16 +93,14 @@ func (t String) Unwrap() interface{} {
return string(t) return string(t)
} }
func (t String) Hash() int { func (t String) Hash() uint64 {
h := sha512.New() h := fnv.New64a()
out, err := h.Write([]byte(t)) h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(t))
if err != nil { return h.Sum64()
return 0
}
return out
} }
func (t String) Clone() core.Value { func (t String) Clone() core.Value {

View File

@@ -0,0 +1,29 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestString(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
v := values.NewString("a")
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
v2 := values.NewString("b")
So(h, ShouldNotEqual, v2.Hash())
})
Convey("Hash sum should be consistent", func() {
v := values.NewString("foobar")
So(v.Hash(), ShouldEqual, v.Hash())
})
})
}

View File

@@ -2,7 +2,6 @@ package dynamic
import ( import (
"context" "context"
"crypto/sha512"
"fmt" "fmt"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/logging" "github.com/MontFerret/ferret/pkg/runtime/logging"
@@ -17,6 +16,7 @@ import (
"github.com/mafredri/cdp/rpcc" "github.com/mafredri/cdp/rpcc"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/rs/zerolog" "github.com/rs/zerolog"
"hash/fnv"
"sync" "sync"
"time" "time"
) )
@@ -205,19 +205,17 @@ func (doc *HtmlDocument) Unwrap() interface{} {
return doc.element return doc.element
} }
func (doc *HtmlDocument) Hash() int { func (doc *HtmlDocument) Hash() uint64 {
doc.Lock() doc.Lock()
defer doc.Unlock() defer doc.Unlock()
h := sha512.New() h := fnv.New64a()
out, err := h.Write([]byte(doc.url)) h.Write([]byte(doc.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(doc.url))
if err != nil { return h.Sum64()
return 0
}
return out
} }
func (doc *HtmlDocument) Clone() core.Value { func (doc *HtmlDocument) Clone() core.Value {

View File

@@ -3,7 +3,6 @@ package dynamic
import ( import (
"bytes" "bytes"
"context" "context"
"crypto/sha512"
"encoding/json" "encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
@@ -14,6 +13,7 @@ import (
"github.com/mafredri/cdp" "github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/dom"
"github.com/rs/zerolog" "github.com/rs/zerolog"
"hash/fnv"
"strconv" "strconv"
"sync" "sync"
"time" "time"
@@ -188,24 +188,17 @@ func (el *HtmlElement) Unwrap() interface{} {
return el return el
} }
func (el *HtmlElement) Hash() int { func (el *HtmlElement) Hash() uint64 {
el.Lock() el.Lock()
defer el.Unlock() defer el.Unlock()
h := sha512.New() h := fnv.New64a()
out, err := h.Write([]byte(el.innerHtml)) h.Write([]byte(el.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(el.innerHtml))
if err != nil { return h.Sum64()
el.logger.Error().
Timestamp().
Err(err).
Msg("failed to calculate hash value")
return 0
}
return out
} }
func (el *HtmlElement) Value() core.Value { func (el *HtmlElement) Value() core.Value {

View File

@@ -1,12 +1,12 @@
package static package static
import ( import (
"crypto/sha512"
"encoding/json" "encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"hash/fnv"
) )
type HtmlElement struct { type HtmlElement struct {
@@ -53,22 +53,20 @@ func (el *HtmlElement) Unwrap() interface{} {
return el.selection return el.selection
} }
func (el *HtmlElement) Hash() int { func (el *HtmlElement) Hash() uint64 {
h := sha512.New()
str, err := el.selection.Html() str, err := el.selection.Html()
if err != nil { if err != nil {
return 0 return 0
} }
out, err := h.Write([]byte(str)) h := fnv.New64a()
if err != nil { h.Write([]byte(el.Type().String()))
return 0 h.Write([]byte(":"))
} h.Write([]byte(str))
return out return h.Sum64()
} }
func (el *HtmlElement) Clone() core.Value { func (el *HtmlElement) Clone() core.Value {