1
0
mirror of https://github.com/MontFerret/ferret.git synced 2024-12-14 11:23:02 +02:00

Merge remote-tracking branch 'upstream/master'

This commit is contained in:
David Landry 2018-10-05 18:42:33 -04:00
commit 737016dcd2
42 changed files with 569 additions and 163 deletions

9
Gopkg.lock generated
View File

@ -137,6 +137,14 @@
revision = "a96e63847dc3c67d17befa69c303767e2f84e54f"
version = "v2.1"
[[projects]]
branch = "master"
digest = "1:f7aa53146bf79462509d4ce136826ebbd64907e4679e1b04e62758da6b68e589"
name = "github.com/orcaman/concurrent-map"
packages = ["."]
pruneopts = "UT"
revision = "b28018939af9022337862b94a463abb18abb3e0e"
[[projects]]
digest = "1:40e195917a951a8bf867cd05de2a46aaf1806c50cf92eebf4c16f78cd196f747"
name = "github.com/pkg/errors"
@ -228,6 +236,7 @@
"github.com/mafredri/cdp/rpcc",
"github.com/mafredri/cdp/session",
"github.com/natefinch/lumberjack",
"github.com/orcaman/concurrent-map",
"github.com/pkg/errors",
"github.com/rs/zerolog",
"github.com/sethgrid/pester",

View File

@ -5,7 +5,7 @@ export GOPATH
VERSION ?= $(shell git describe --tags --always --dirty)
DIR_BIN = ./bin
DIR_PKG = ./pkg
DIR_CMD = ./cmd
DIR_CLI = ./cli
default: build
@ -14,7 +14,7 @@ build: install vet generate test compile
compile:
go build -v -o ${DIR_BIN}/ferret \
-ldflags "-X main.Version=${VERSION}" \
${DIR_CMD}/main.go
./main.go
install:
dep ensure
@ -30,14 +30,14 @@ doc:
# http://golang.org/cmd/go/#hdr-Run_gofmt_on_package_sources
fmt:
go fmt ${DIR_CMD}/... ${DIR_PKG}/...
go fmt ${DIR_CLI}/... ${DIR_PKG}/...
# https://github.com/golang/lint
# go get github.com/golang/lint/golint
lint:
golint ${DIR_CMD}/... ${DIR_PKG}/...
golint ${DIR_CLI}/... ${DIR_PKG}/...
# http://godoc.org/code.google.com/p/go.tools/cmd/vet
# go get code.google.com/p/go.tools/cmd/vet
vet:
go vet ${DIR_CMD}/... ${DIR_PKG}/...
go vet ${DIR_CLI}/... ${DIR_PKG}/...

View File

@ -3,15 +3,15 @@
![ferret](https://raw.githubusercontent.com/MontFerret/ferret/master/assets/intro.jpg)
## What is it?
```ferret``` is a web scraping system aiming to simplify data extraction from the web for such things like ui testing, machine learning and analytics.
Having it's own declarative language, ```ferret``` abstracts away technical details and complexity of the underlying technologies, helping to focus on the data itself.
```ferret``` is a web scraping system aiming to simplify data extraction from the web for such things like UI testing, machine learning and analytics.
Having its own declarative language, ```ferret``` abstracts away technical details and complexity of the underlying technologies, helping to focus on the data itself.
It's extremely portable, extensible and fast.
## Show me some code
The following example demonstrates the use of dynamic pages.
First of all, we load the main Google Search page, type search criteria into an input box and then click a search button.
The click action triggers a redirect, so we wait till its end.
Once the page gets loaded, we iterate over all elements in search results and assign output to a variable.
Once the page gets loaded, we iterate over all elements in search results and assign the output to a variable.
The final for loop filters out empty elements that might be because of inaccurate use of selectors.
```aql
@ -49,27 +49,32 @@ RETURN (
Nowadays data is everything and who owns data - owns the world.
I have worked on multiple data-driven projects where data was an essential part of a system and I realized how cumbersome writing tons of scrapers is.
After some time looking for a tool that would let me to not write a code, but just express what data I need, decided to come up with my own solution.
```ferret``` project is an ambitious initiative trying to bring universal platform for writing scrapers without any hassle.
```ferret``` project is an ambitious initiative trying to bring the universal platform for writing scrapers without any hassle.
## Inspiration
FQL (Ferret Query Language) is heavily inspired by [AQL](https://www.arangodb.com/) (ArangoDB Query Language).
But due to the domain specifics, there are some differences in how things work.
## WIP
Be aware, the the project is under heavy development. There is no documentation and some things may change in the final release.
Be aware, that the project is under heavy development. There is no documentation and some things may change in the final release.
For query syntax, you may go to [ArangoDB web site](https://docs.arangodb.com/3.3/AQL/index.html) and use AQL docs as docs for FQL - since they are identical.
## Installation
### Prerequisites
* Go >=1.6
#### Production
* Go >=1.9
* Chrome or Docker
#### Development
* GoDep
* GNU Make
* Chrome or Docker (optional)
* ANTLR4 >=4.7.1
```sh
make install && make compile
go get github.com/MontFerret/ferret
```
You can use your local copy of Google Chrome / Chromium, but for ease of use it's recommended to run it inside a Docker container:
@ -91,7 +96,7 @@ chrome.exe --remote-debugging-port=9222
If you want to play with ```fql``` and check its syntax, you can run CLI with the following commands:
```
go run ./cmd/main.go
ferret
```
```ferret``` will run in REPL mode.
@ -107,33 +112,33 @@ Please use `Ctrl-D` to exit this program.
```
**Note:** symbol ```%``` is used to start and end multi line queries. You also can use heredoc format.
**Note:** symbol ```%``` is used to start and end multi-line queries. You also can use the heredoc format.
If you want to execute a query stored in a file, just pass a file name:
```
go run ./cmd/main.go ./docs/examples/hackernews.fql
ferret ./docs/examples/static-page.fql
```
```
cat ./docs/examples/hackernews.fql | go run ./cmd/main.go
cat ./docs/examples/static-page.fql | ferret
```
```
go run ./cmd/main.go < ./docs/examples/hackernews.fql
ferret < ./docs/examples/static-page.fql
```
### Browser mode
By default, ``ferret`` loads HTML pages via http protocol, because it's faster.
By default, ``ferret`` loads HTML pages via HTTP protocol, because it's faster.
But nowadays, there are more and more websites rendered with JavaScript, and therefore, this 'old school' approach does not really work.
For such cases, you may fetch documents using Chrome or Chromium via Chrome DevTools protocol (aka CDP).
First, you need to make sure that you launched Chrome with ```remote-debugging-port=9222``` flag.
Second, you need to pass the address to ```ferret``` CLI.
```
go run ./cmd/main.go --cdp http://127.0.0.1:9222
ferret --cdp http://127.0.0.1:9222
```
**NOTE:** By default, ```ferret``` will try to use this local address as a default one, so it makes sense to explicitly pass the parameter only in case of either different port number or remote address.
@ -141,7 +146,7 @@ go run ./cmd/main.go --cdp http://127.0.0.1:9222
Alternatively, you can tell CLI to launch Chrome for you.
```shell
go run ./cmd/main.go --cdp-launch
ferret --cdp-launch
```
**NOTE:** Launch command is currently broken on MacOS.
@ -345,7 +350,7 @@ func getStrings() ([]string, error) {
}
```
On top of that, you can completely turn off standard library, by passing the following option:
On top of that, you can completely turn off the standard library, bypassing the following option:
```go
comp := compiler.New(compiler.WithoutStdlib())

View File

@ -0,0 +1,3 @@
LET doc = DOCUMENT("about:blank", true)
NAVIGATE(doc, "https://www.google.com/")
RETURN doc.url

View File

@ -1,13 +1,12 @@
LET doc = DOCUMENT("https://github.com/", true)
LET main = ELEMENT(doc, '.application-main')
LOG('innerText:start')
LET mainTxt = main.innerText
LOG('innerText:end')
NAVIGATE(doc, "https://github.com/features")
LET features = ELEMENT(doc, '.application-main')
LET featuresTxt = features.innerText
LOG("featuresTxt:", featuresTxt)
RETURN mainTxt == featuresTxt

View File

@ -5,8 +5,8 @@ import (
"encoding/json"
"flag"
"fmt"
"github.com/MontFerret/ferret/cmd/cli"
"github.com/MontFerret/ferret/pkg/browser"
"github.com/MontFerret/ferret/cli"
"github.com/MontFerret/ferret/cli/browser"
"github.com/MontFerret/ferret/pkg/runtime/core"
"io/ioutil"
"os"

View File

@ -1774,24 +1774,29 @@ func TestParam(t *testing.T) {
})
}
func TestHtml(t *testing.T) {
// Convey("Should load a document", t, func() {
// c := compiler.New()
//
// out, err := c.MustCompile(`
//LET doc = DOCUMENT("https://github.com/", true)
//LET btn = ELEMENT(doc, ".HeaderMenu a")
//
//CLICK(btn)
//WAIT_NAVIGATION(doc)
//WAIT_ELEMENT(doc, '.IconNav')
//
//RETURN INNER_HTML_ALL(doc, '.IconNav a')
//
// `).Run(context.Background())
//
// So(err, ShouldBeNil)
//
// So(string(out), ShouldEqual, `"int"`)
// })
}
//func TestHtml(t *testing.T) {
// Convey("Should load a document", t, func() {
// c := compiler.New()
//
// out, err := c.MustCompile(`
//LET doc = DOCUMENT("https://github.com/", true)
//LET main = ELEMENT(doc, '.application-main')
//LET mainTxt = main.innerText
//
//NAVIGATE(doc, "https://github.com/features")
//
//LET features = ELEMENT(doc, '.application-main')
//LET featuresTxt = features.innerText
//
//LOG("featuresTxt:", featuresTxt)
//
//RETURN mainTxt == featuresTxt
//
//
// `).Run(context.Background())
//
// So(err, ShouldBeNil)
//
// So(string(out), ShouldEqual, `"int"`)
// })
//}

View File

@ -0,0 +1,52 @@
package core_test
import (
"testing"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
. "github.com/smartystreets/goconvey/convey"
)
func TestSourceError(t *testing.T) {
Convey("Should match", t, func() {
sm := core.NewSourceMap("test", 1, 1)
msg := "test at 1:1"
cause := errors.New("cause")
e := errors.Errorf("%s: %s", cause.Error(), msg)
cse := core.SourceError(sm, cause)
So(cse, ShouldNotBeNil)
So(cse.Error(), ShouldEqual, e.Error())
})
}
func TestTypeError(t *testing.T) {
Convey("Should match", t, func() {
e := core.TypeError(core.BooleanType)
So(e, ShouldNotBeNil)
e = core.TypeError(core.BooleanType, core.BooleanType)
So(e, ShouldNotBeNil)
e = core.TypeError(core.BooleanType, core.BooleanType, core.IntType, core.FloatType)
So(e, ShouldNotBeNil)
cause := errors.New("invalid type: expected none or boolean or int or float, but got none")
e = core.TypeError(core.NoneType, core.NoneType, core.BooleanType, core.IntType, core.FloatType)
So(e.Error(), ShouldEqual, cause.Error())
})
}
func TestError(t *testing.T) {
Convey("Should match", t, func() {
msg := "test message"
cause := errors.New("cause")
e := errors.Errorf("%s: %s", cause.Error(), msg)
ce := core.Error(cause, msg)
So(ce, ShouldNotBeNil)
So(ce.Error(), ShouldEqual, e.Error())
})
}

View File

@ -0,0 +1,21 @@
package core_test
import (
"testing"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
)
func TestValidateArgs(t *testing.T) {
Convey("Should match", t, func() {
a := []core.Value{values.NewInt(1), values.NewInt(2)}
e := core.ValidateArgs(a, 1, 2)
So(e, ShouldBeNil)
e = core.ValidateArgs(a, 3, 4)
So(e, ShouldNotBeNil)
})
}

View File

@ -0,0 +1,69 @@
package core_test
import (
"context"
"testing"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
)
func TestParamsWith(t *testing.T) {
Convey("Should match", t, func() {
p := make(map[string]core.Value)
p["val1"] = values.NewInt(1)
p["val2"] = values.NewString("test")
pc := core.ParamsWith(context.Background(), p)
So(pc, ShouldNotBeNil)
So(pc.Value("params"), ShouldEqual, p)
})
}
func TestParamsFrom(t *testing.T) {
Convey("Should match", t, func() {
p := make(map[string]core.Value)
p["val1"] = values.NewInt(1)
p["val2"] = values.NewString("test")
_, err := core.ParamsFrom(context.Background())
So(err, ShouldNotBeNil)
ctx := context.WithValue(context.Background(), "fail", p)
pf, err := core.ParamsFrom(ctx)
So(err, ShouldNotBeNil)
ctx = context.WithValue(context.Background(), "params", p)
pf, err = core.ParamsFrom(ctx)
So(err, ShouldBeNil)
So(pf, ShouldEqual, p)
})
}
func TestParamFrom(t *testing.T) {
Convey("Should match", t, func() {
p := make(map[string]core.Value)
p["val1"] = values.NewInt(1)
p["val2"] = values.NewString("test")
_, err := core.ParamFrom(context.Background(), "")
So(err, ShouldNotBeNil)
ctx := context.WithValue(context.Background(), "fail", p)
_, err = core.ParamFrom(ctx, "val1")
So(err, ShouldNotBeNil)
ctx = context.WithValue(context.Background(), "params", p)
v, err := core.ParamFrom(ctx, "val1")
So(err, ShouldBeNil)
So(v, ShouldEqual, values.NewInt(1))
})
}

View File

@ -45,7 +45,7 @@ type Value interface {
String() string
Compare(other Value) int
Unwrap() interface{}
Hash() int
Hash() uint64
Clone() Value
}

View File

@ -70,10 +70,10 @@ func (e *ForExpression) Exec(ctx context.Context, scope *core.Scope) (core.Value
}
// Hash map for a check for uniqueness
var hashes map[int]bool
var hashes map[uint64]bool
if e.distinct {
hashes = make(map[int]bool)
hashes = make(map[uint64]bool)
}
res := values.NewArray(10)

View File

@ -1,10 +1,11 @@
package values
import (
"crypto/sha512"
"encoding/binary"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
)
type (
@ -77,22 +78,29 @@ func (t *Array) Unwrap() interface{} {
return arr
}
func (t *Array) Hash() int {
bytes, err := t.MarshalJSON()
func (t *Array) Hash() uint64 {
h := fnv.New64a()
if err != nil {
return 0
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte("["))
endIndex := len(t.value) - 1
for i, el := range t.value {
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, el.Hash())
h.Write(bytes)
if i != endIndex {
h.Write([]byte(","))
}
}
h := sha512.New()
h.Write([]byte("]"))
out, err := h.Write(bytes)
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (t *Array) Clone() core.Value {

View File

@ -119,6 +119,45 @@ func TestArray(t *testing.T) {
})
})
Convey(".Hash", t, func() {
Convey("It should calculate hash of non-empty array", func() {
arr := values.NewArrayWith(
values.NewInt(1),
values.NewInt(2),
values.NewInt(3),
)
h := arr.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("It should calculate hash of empty array", func() {
arr := values.NewArrayWith()
h := arr.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
arr := values.NewArrayWith(
values.True,
values.NewInt(1),
values.NewFloat(1.1),
values.NewString("foobar"),
values.NewCurrentDateTime(),
values.NewArrayWith(values.NewInt(1), values.True),
values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar"))),
)
h1 := arr.Hash()
h2 := arr.Hash()
So(h1, ShouldEqual, h2)
})
})
Convey(".Length", t, func() {
Convey("Should return 0 when empty", func() {
arr := values.NewArray(1)

View File

@ -1,9 +1,9 @@
package values
import (
"crypto/sha512"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"hash/fnv"
"io"
"io/ioutil"
)
@ -62,16 +62,14 @@ func (b *Binary) Unwrap() interface{} {
return b.values
}
func (b *Binary) Hash() int {
h := sha512.New()
func (b *Binary) Hash() uint64 {
h := fnv.New64a()
out, err := h.Write(b.values)
h.Write([]byte(b.Type().String()))
h.Write([]byte(":"))
h.Write(b.values)
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (b *Binary) Clone() core.Value {

View File

@ -1,10 +1,10 @@
package values
import (
"crypto/sha512"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
"strings"
)
@ -98,16 +98,14 @@ func (t Boolean) Unwrap() interface{} {
return bool(t)
}
func (t Boolean) Hash() int {
h := sha512.New()
func (t Boolean) Hash() uint64 {
h := fnv.New64a()
out, err := h.Write([]byte(t.String()))
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(t.String()))
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (t Boolean) Clone() core.Value {

View File

@ -70,4 +70,16 @@ func TestBoolean(t *testing.T) {
So(values.False.Compare(values.True), ShouldEqual, -1)
})
})
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
So(values.True.Hash(), ShouldBeGreaterThan, 0)
So(values.False.Hash(), ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
So(values.True.Hash(), ShouldEqual, values.True.Hash())
So(values.False.Hash(), ShouldEqual, values.False.Hash())
})
})
}

View File

@ -1,8 +1,8 @@
package values
import (
"crypto/sha512"
"github.com/MontFerret/ferret/pkg/runtime/core"
"hash/fnv"
"time"
)
@ -12,6 +12,10 @@ type DateTime struct {
time.Time
}
func NewCurrentDateTime() DateTime {
return DateTime{time.Now()}
}
func NewDateTime(time time.Time) DateTime {
return DateTime{time}
}
@ -84,18 +88,21 @@ func (t DateTime) Unwrap() interface{} {
return t.Time
}
func (t DateTime) Hash() int {
h := sha512.New()
func (t DateTime) Hash() uint64 {
h := fnv.New64a()
t.Time.MarshalJSON()
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
out, err := h.Write([]byte(t.Time.String()))
bytes, err := t.Time.GobEncode()
if err != nil {
return 0
}
return out
h.Write(bytes)
return h.Sum64()
}
func (t DateTime) Clone() core.Value {

View File

@ -0,0 +1,25 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestDateTime(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
d := values.NewCurrentDateTime()
h := d.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
d := values.NewCurrentDateTime()
So(d.Hash(), ShouldEqual, d.Hash())
})
})
}

View File

@ -1,11 +1,13 @@
package values
import (
"crypto/sha512"
"encoding/binary"
"encoding/json"
"fmt"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
"math"
"strconv"
)
@ -112,22 +114,17 @@ func (t Float) Unwrap() interface{} {
return float64(t)
}
func (t Float) Hash() int {
bytes, err := t.MarshalJSON()
func (t Float) Hash() uint64 {
h := fnv.New64a()
if err != nil {
return 0
}
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h := sha512.New()
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, math.Float64bits(float64(t)))
h.Write(bytes)
out, err := h.Write(bytes)
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (t Float) Clone() core.Value {

View File

@ -0,0 +1,29 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestFloat(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
v := values.NewFloat(1.1)
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
v2 := values.NewFloat(1.2)
So(h, ShouldNotEqual, v2.Hash())
})
Convey("Hash sum should be consistent", func() {
v := values.NewFloat(1.1)
So(v.Hash(), ShouldEqual, v.Hash())
})
})
}

View File

@ -1,9 +1,11 @@
package values
import (
"encoding/binary"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
"strconv"
)
@ -110,8 +112,17 @@ func (t Int) Unwrap() interface{} {
return int(t)
}
func (t Int) Hash() int {
return int(t)
func (t Int) Hash() uint64 {
h := fnv.New64a()
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, uint64(t))
h.Write(bytes)
return h.Sum64()
}
func (t Int) Clone() core.Value {

View File

@ -0,0 +1,29 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestInt(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
v := values.NewInt(1)
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
v2 := values.NewInt(2)
So(h, ShouldNotEqual, v2.Hash())
})
Convey("Hash sum should be consistent", func() {
v := values.NewInt(1)
So(v.Hash(), ShouldEqual, v.Hash())
})
})
}

View File

@ -33,7 +33,7 @@ func (t *none) Unwrap() interface{} {
return nil
}
func (t *none) Hash() int {
func (t *none) Hash() uint64 {
return 0
}

View File

@ -1,15 +1,17 @@
package values
import (
"crypto/sha512"
"encoding/binary"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"hash/fnv"
"sort"
)
type (
ObjectPredicate = func(value core.Value, key string) bool
ObjectProperty struct {
name string
key string
value core.Value
}
Object struct {
@ -29,7 +31,7 @@ func NewObjectWith(props ...*ObjectProperty) *Object {
obj := NewObject()
for _, prop := range props {
obj.value[prop.name] = prop.value
obj.value[prop.key] = prop.value
}
return obj
@ -88,22 +90,43 @@ func (t *Object) Unwrap() interface{} {
return obj
}
func (t *Object) Hash() int {
bytes, err := t.MarshalJSON()
func (t *Object) Hash() uint64 {
h := fnv.New64a()
if err != nil {
return 0
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte("{"))
keys := make([]string, 0, len(t.value))
for key := range t.value {
keys = append(keys, key)
}
h := sha512.New()
// order does not really matter
// but it will give us a consistent hash sum
sort.Strings(keys)
endIndex := len(keys) - 1
out, err := h.Write(bytes)
for idx, key := range keys {
h.Write([]byte(key))
h.Write([]byte(":"))
if err != nil {
return 0
el := t.value[key]
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, el.Hash())
h.Write(bytes)
if idx != endIndex {
h.Write([]byte(","))
}
}
return out
h.Write([]byte("}"))
return h.Sum64()
}
func (t *Object) Clone() core.Value {

View File

@ -142,6 +142,45 @@ func TestObject(t *testing.T) {
})
})
Convey(".Hash", t, func() {
Convey("It should calculate hash of non-empty object", func() {
v := values.NewObjectWith(
values.NewObjectProperty("foo", values.NewString("bar")),
values.NewObjectProperty("faz", values.NewInt(1)),
values.NewObjectProperty("qaz", values.True),
)
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("It should calculate hash of empty object", func() {
v := values.NewObject()
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
})
Convey("Hash sum should be consistent", func() {
v := values.NewObjectWith(
values.NewObjectProperty("boolean", values.True),
values.NewObjectProperty("int", values.NewInt(1)),
values.NewObjectProperty("float", values.NewFloat(1.1)),
values.NewObjectProperty("string", values.NewString("foobar")),
values.NewObjectProperty("datetime", values.NewCurrentDateTime()),
values.NewObjectProperty("array", values.NewArrayWith(values.NewInt(1), values.True)),
values.NewObjectProperty("object", values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar")))),
)
h1 := v.Hash()
h2 := v.Hash()
So(h1, ShouldEqual, h2)
})
})
Convey(".Length", t, func() {
Convey("Should return 0 when empty", func() {
obj := values.NewObject()

View File

@ -1,11 +1,11 @@
package values
import (
"crypto/sha512"
"encoding/json"
"fmt"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/pkg/errors"
"hash/fnv"
"strings"
)
@ -93,16 +93,14 @@ func (t String) Unwrap() interface{} {
return string(t)
}
func (t String) Hash() int {
h := sha512.New()
func (t String) Hash() uint64 {
h := fnv.New64a()
out, err := h.Write([]byte(t))
h.Write([]byte(t.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(t))
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (t String) Clone() core.Value {

View File

@ -0,0 +1,29 @@
package values_test
import (
"github.com/MontFerret/ferret/pkg/runtime/values"
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestString(t *testing.T) {
Convey(".Hash", t, func() {
Convey("It should calculate hash", func() {
v := values.NewString("a")
h := v.Hash()
So(h, ShouldBeGreaterThan, 0)
v2 := values.NewString("b")
So(h, ShouldNotEqual, v2.Hash())
})
Convey("Hash sum should be consistent", func() {
v := values.NewString("foobar")
So(v.Hash(), ShouldEqual, v.Hash())
})
})
}

View File

@ -2,7 +2,6 @@ package dynamic
import (
"context"
"crypto/sha512"
"fmt"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/logging"
@ -17,10 +16,13 @@ import (
"github.com/mafredri/cdp/rpcc"
"github.com/pkg/errors"
"github.com/rs/zerolog"
"hash/fnv"
"sync"
"time"
)
const BlankPageUrl = "about:blank"
type HtmlDocument struct {
sync.Mutex
logger *zerolog.Logger
@ -78,10 +80,12 @@ func LoadHtmlDocument(
return nil, err
}
err = waitForLoadEvent(ctx, client)
if url != BlankPageUrl {
err = waitForLoadEvent(ctx, client)
if err != nil {
return nil, err
if err != nil {
return nil, err
}
}
root, innerHtml, err := getRootElement(client)
@ -201,19 +205,17 @@ func (doc *HtmlDocument) Unwrap() interface{} {
return doc.element
}
func (doc *HtmlDocument) Hash() int {
func (doc *HtmlDocument) Hash() uint64 {
doc.Lock()
defer doc.Unlock()
h := sha512.New()
h := fnv.New64a()
out, err := h.Write([]byte(doc.url))
h.Write([]byte(doc.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(doc.url))
if err != nil {
return 0
}
return out
return h.Sum64()
}
func (doc *HtmlDocument) Clone() core.Value {
@ -637,6 +639,10 @@ func (doc *HtmlDocument) WaitForNavigation(timeout values.Int) error {
}
func (doc *HtmlDocument) Navigate(url values.String) error {
if url == "" {
url = BlankPageUrl
}
ctx := context.Background()
repl, err := doc.client.Page.Navigate(ctx, page.NewNavigateArgs(url.String()))
@ -648,5 +654,5 @@ func (doc *HtmlDocument) Navigate(url values.String) error {
return errors.New(*repl.ErrorText)
}
return waitForLoadEvent(ctx, doc.client)
return doc.WaitForNavigation(5000)
}

View File

@ -38,6 +38,10 @@ func (drv *Driver) GetDocument(ctx context.Context, url string) (values.HtmlNode
ctx, cancel := context.WithTimeout(ctx, DefaultTimeout)
defer cancel()
if url == "" {
url = BlankPageUrl
}
// Create a new target belonging to the browser context, similar
// to opening a new tab in an incognito window.
createTargetArgs := target.NewCreateTargetArgs(url).SetBrowserContextID(drv.contextID)

View File

@ -3,7 +3,6 @@ package dynamic
import (
"bytes"
"context"
"crypto/sha512"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
@ -14,6 +13,7 @@ import (
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/rs/zerolog"
"hash/fnv"
"strconv"
"sync"
"time"
@ -188,24 +188,17 @@ func (el *HtmlElement) Unwrap() interface{} {
return el
}
func (el *HtmlElement) Hash() int {
func (el *HtmlElement) Hash() uint64 {
el.Lock()
defer el.Unlock()
h := sha512.New()
h := fnv.New64a()
out, err := h.Write([]byte(el.innerHtml))
h.Write([]byte(el.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(el.innerHtml))
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Msg("failed to calculate hash value")
return 0
}
return out
return h.Sum64()
}
func (el *HtmlElement) Value() core.Value {
@ -297,7 +290,7 @@ func (el *HtmlElement) GetChildNode(idx values.Int) core.Value {
func (el *HtmlElement) QuerySelector(selector values.String) core.Value {
if !el.IsConnected() {
return values.NewArray(0)
return values.None
}
ctx := context.Background()

View File

@ -1,12 +1,12 @@
package static
import (
"crypto/sha512"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
"github.com/PuerkitoBio/goquery"
"hash/fnv"
)
type HtmlElement struct {
@ -53,22 +53,20 @@ func (el *HtmlElement) Unwrap() interface{} {
return el.selection
}
func (el *HtmlElement) Hash() int {
h := sha512.New()
func (el *HtmlElement) Hash() uint64 {
str, err := el.selection.Html()
if err != nil {
return 0
}
out, err := h.Write([]byte(str))
h := fnv.New64a()
if err != nil {
return 0
}
h.Write([]byte(el.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(str))
return out
return h.Sum64()
}
func (el *HtmlElement) Clone() core.Value {