mirror of
https://github.com/MontFerret/ferret.git
synced 2024-12-14 11:23:02 +02:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
737016dcd2
9
Gopkg.lock
generated
9
Gopkg.lock
generated
@ -137,6 +137,14 @@
|
||||
revision = "a96e63847dc3c67d17befa69c303767e2f84e54f"
|
||||
version = "v2.1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:f7aa53146bf79462509d4ce136826ebbd64907e4679e1b04e62758da6b68e589"
|
||||
name = "github.com/orcaman/concurrent-map"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "b28018939af9022337862b94a463abb18abb3e0e"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:40e195917a951a8bf867cd05de2a46aaf1806c50cf92eebf4c16f78cd196f747"
|
||||
name = "github.com/pkg/errors"
|
||||
@ -228,6 +236,7 @@
|
||||
"github.com/mafredri/cdp/rpcc",
|
||||
"github.com/mafredri/cdp/session",
|
||||
"github.com/natefinch/lumberjack",
|
||||
"github.com/orcaman/concurrent-map",
|
||||
"github.com/pkg/errors",
|
||||
"github.com/rs/zerolog",
|
||||
"github.com/sethgrid/pester",
|
||||
|
10
Makefile
10
Makefile
@ -5,7 +5,7 @@ export GOPATH
|
||||
VERSION ?= $(shell git describe --tags --always --dirty)
|
||||
DIR_BIN = ./bin
|
||||
DIR_PKG = ./pkg
|
||||
DIR_CMD = ./cmd
|
||||
DIR_CLI = ./cli
|
||||
|
||||
default: build
|
||||
|
||||
@ -14,7 +14,7 @@ build: install vet generate test compile
|
||||
compile:
|
||||
go build -v -o ${DIR_BIN}/ferret \
|
||||
-ldflags "-X main.Version=${VERSION}" \
|
||||
${DIR_CMD}/main.go
|
||||
./main.go
|
||||
|
||||
install:
|
||||
dep ensure
|
||||
@ -30,14 +30,14 @@ doc:
|
||||
|
||||
# http://golang.org/cmd/go/#hdr-Run_gofmt_on_package_sources
|
||||
fmt:
|
||||
go fmt ${DIR_CMD}/... ${DIR_PKG}/...
|
||||
go fmt ${DIR_CLI}/... ${DIR_PKG}/...
|
||||
|
||||
# https://github.com/golang/lint
|
||||
# go get github.com/golang/lint/golint
|
||||
lint:
|
||||
golint ${DIR_CMD}/... ${DIR_PKG}/...
|
||||
golint ${DIR_CLI}/... ${DIR_PKG}/...
|
||||
|
||||
# http://godoc.org/code.google.com/p/go.tools/cmd/vet
|
||||
# go get code.google.com/p/go.tools/cmd/vet
|
||||
vet:
|
||||
go vet ${DIR_CMD}/... ${DIR_PKG}/...
|
||||
go vet ${DIR_CLI}/... ${DIR_PKG}/...
|
39
README.md
39
README.md
@ -3,15 +3,15 @@
|
||||
![ferret](https://raw.githubusercontent.com/MontFerret/ferret/master/assets/intro.jpg)
|
||||
|
||||
## What is it?
|
||||
```ferret``` is a web scraping system aiming to simplify data extraction from the web for such things like ui testing, machine learning and analytics.
|
||||
Having it's own declarative language, ```ferret``` abstracts away technical details and complexity of the underlying technologies, helping to focus on the data itself.
|
||||
```ferret``` is a web scraping system aiming to simplify data extraction from the web for such things like UI testing, machine learning and analytics.
|
||||
Having its own declarative language, ```ferret``` abstracts away technical details and complexity of the underlying technologies, helping to focus on the data itself.
|
||||
It's extremely portable, extensible and fast.
|
||||
|
||||
## Show me some code
|
||||
The following example demonstrates the use of dynamic pages.
|
||||
First of all, we load the main Google Search page, type search criteria into an input box and then click a search button.
|
||||
The click action triggers a redirect, so we wait till its end.
|
||||
Once the page gets loaded, we iterate over all elements in search results and assign output to a variable.
|
||||
Once the page gets loaded, we iterate over all elements in search results and assign the output to a variable.
|
||||
The final for loop filters out empty elements that might be because of inaccurate use of selectors.
|
||||
|
||||
```aql
|
||||
@ -49,27 +49,32 @@ RETURN (
|
||||
Nowadays data is everything and who owns data - owns the world.
|
||||
I have worked on multiple data-driven projects where data was an essential part of a system and I realized how cumbersome writing tons of scrapers is.
|
||||
After some time looking for a tool that would let me to not write a code, but just express what data I need, decided to come up with my own solution.
|
||||
```ferret``` project is an ambitious initiative trying to bring universal platform for writing scrapers without any hassle.
|
||||
```ferret``` project is an ambitious initiative trying to bring the universal platform for writing scrapers without any hassle.
|
||||
|
||||
## Inspiration
|
||||
FQL (Ferret Query Language) is heavily inspired by [AQL](https://www.arangodb.com/) (ArangoDB Query Language).
|
||||
But due to the domain specifics, there are some differences in how things work.
|
||||
|
||||
## WIP
|
||||
Be aware, the the project is under heavy development. There is no documentation and some things may change in the final release.
|
||||
Be aware, that the project is under heavy development. There is no documentation and some things may change in the final release.
|
||||
For query syntax, you may go to [ArangoDB web site](https://docs.arangodb.com/3.3/AQL/index.html) and use AQL docs as docs for FQL - since they are identical.
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
### Prerequisites
|
||||
* Go >=1.6
|
||||
#### Production
|
||||
* Go >=1.9
|
||||
* Chrome or Docker
|
||||
|
||||
#### Development
|
||||
* GoDep
|
||||
* GNU Make
|
||||
* Chrome or Docker (optional)
|
||||
* ANTLR4 >=4.7.1
|
||||
|
||||
|
||||
```sh
|
||||
make install && make compile
|
||||
go get github.com/MontFerret/ferret
|
||||
```
|
||||
|
||||
You can use your local copy of Google Chrome / Chromium, but for ease of use it's recommended to run it inside a Docker container:
|
||||
@ -91,7 +96,7 @@ chrome.exe --remote-debugging-port=9222
|
||||
|
||||
If you want to play with ```fql``` and check its syntax, you can run CLI with the following commands:
|
||||
```
|
||||
go run ./cmd/main.go
|
||||
ferret
|
||||
```
|
||||
|
||||
```ferret``` will run in REPL mode.
|
||||
@ -107,33 +112,33 @@ Please use `Ctrl-D` to exit this program.
|
||||
|
||||
```
|
||||
|
||||
**Note:** symbol ```%``` is used to start and end multi line queries. You also can use heredoc format.
|
||||
**Note:** symbol ```%``` is used to start and end multi-line queries. You also can use the heredoc format.
|
||||
|
||||
If you want to execute a query stored in a file, just pass a file name:
|
||||
|
||||
```
|
||||
go run ./cmd/main.go ./docs/examples/hackernews.fql
|
||||
ferret ./docs/examples/static-page.fql
|
||||
```
|
||||
|
||||
```
|
||||
cat ./docs/examples/hackernews.fql | go run ./cmd/main.go
|
||||
cat ./docs/examples/static-page.fql | ferret
|
||||
```
|
||||
|
||||
```
|
||||
go run ./cmd/main.go < ./docs/examples/hackernews.fql
|
||||
ferret < ./docs/examples/static-page.fql
|
||||
```
|
||||
|
||||
|
||||
### Browser mode
|
||||
|
||||
By default, ``ferret`` loads HTML pages via http protocol, because it's faster.
|
||||
By default, ``ferret`` loads HTML pages via HTTP protocol, because it's faster.
|
||||
But nowadays, there are more and more websites rendered with JavaScript, and therefore, this 'old school' approach does not really work.
|
||||
For such cases, you may fetch documents using Chrome or Chromium via Chrome DevTools protocol (aka CDP).
|
||||
First, you need to make sure that you launched Chrome with ```remote-debugging-port=9222``` flag.
|
||||
Second, you need to pass the address to ```ferret``` CLI.
|
||||
|
||||
```
|
||||
go run ./cmd/main.go --cdp http://127.0.0.1:9222
|
||||
ferret --cdp http://127.0.0.1:9222
|
||||
```
|
||||
|
||||
**NOTE:** By default, ```ferret``` will try to use this local address as a default one, so it makes sense to explicitly pass the parameter only in case of either different port number or remote address.
|
||||
@ -141,7 +146,7 @@ go run ./cmd/main.go --cdp http://127.0.0.1:9222
|
||||
Alternatively, you can tell CLI to launch Chrome for you.
|
||||
|
||||
```shell
|
||||
go run ./cmd/main.go --cdp-launch
|
||||
ferret --cdp-launch
|
||||
```
|
||||
|
||||
**NOTE:** Launch command is currently broken on MacOS.
|
||||
@ -345,7 +350,7 @@ func getStrings() ([]string, error) {
|
||||
}
|
||||
```
|
||||
|
||||
On top of that, you can completely turn off standard library, by passing the following option:
|
||||
On top of that, you can completely turn off the standard library, bypassing the following option:
|
||||
|
||||
```go
|
||||
comp := compiler.New(compiler.WithoutStdlib())
|
||||
|
3
docs/examples/blank-page.fql
Normal file
3
docs/examples/blank-page.fql
Normal file
@ -0,0 +1,3 @@
|
||||
LET doc = DOCUMENT("about:blank", true)
|
||||
NAVIGATE(doc, "https://www.google.com/")
|
||||
RETURN doc.url
|
@ -1,13 +1,12 @@
|
||||
LET doc = DOCUMENT("https://github.com/", true)
|
||||
|
||||
LET main = ELEMENT(doc, '.application-main')
|
||||
LOG('innerText:start')
|
||||
LET mainTxt = main.innerText
|
||||
LOG('innerText:end')
|
||||
|
||||
NAVIGATE(doc, "https://github.com/features")
|
||||
|
||||
LET features = ELEMENT(doc, '.application-main')
|
||||
LET featuresTxt = features.innerText
|
||||
|
||||
LOG("featuresTxt:", featuresTxt)
|
||||
|
||||
RETURN mainTxt == featuresTxt
|
||||
|
@ -5,8 +5,8 @@ import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/MontFerret/ferret/cmd/cli"
|
||||
"github.com/MontFerret/ferret/pkg/browser"
|
||||
"github.com/MontFerret/ferret/cli"
|
||||
"github.com/MontFerret/ferret/cli/browser"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"io/ioutil"
|
||||
"os"
|
@ -1774,24 +1774,29 @@ func TestParam(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestHtml(t *testing.T) {
|
||||
// Convey("Should load a document", t, func() {
|
||||
// c := compiler.New()
|
||||
//
|
||||
// out, err := c.MustCompile(`
|
||||
//LET doc = DOCUMENT("https://github.com/", true)
|
||||
//LET btn = ELEMENT(doc, ".HeaderMenu a")
|
||||
//
|
||||
//CLICK(btn)
|
||||
//WAIT_NAVIGATION(doc)
|
||||
//WAIT_ELEMENT(doc, '.IconNav')
|
||||
//
|
||||
//RETURN INNER_HTML_ALL(doc, '.IconNav a')
|
||||
//
|
||||
// `).Run(context.Background())
|
||||
//
|
||||
// So(err, ShouldBeNil)
|
||||
//
|
||||
// So(string(out), ShouldEqual, `"int"`)
|
||||
// })
|
||||
}
|
||||
//func TestHtml(t *testing.T) {
|
||||
// Convey("Should load a document", t, func() {
|
||||
// c := compiler.New()
|
||||
//
|
||||
// out, err := c.MustCompile(`
|
||||
//LET doc = DOCUMENT("https://github.com/", true)
|
||||
//LET main = ELEMENT(doc, '.application-main')
|
||||
//LET mainTxt = main.innerText
|
||||
//
|
||||
//NAVIGATE(doc, "https://github.com/features")
|
||||
//
|
||||
//LET features = ELEMENT(doc, '.application-main')
|
||||
//LET featuresTxt = features.innerText
|
||||
//
|
||||
//LOG("featuresTxt:", featuresTxt)
|
||||
//
|
||||
//RETURN mainTxt == featuresTxt
|
||||
//
|
||||
//
|
||||
// `).Run(context.Background())
|
||||
//
|
||||
// So(err, ShouldBeNil)
|
||||
//
|
||||
// So(string(out), ShouldEqual, `"int"`)
|
||||
// })
|
||||
//}
|
||||
|
52
pkg/runtime/core/errors_test.go
Normal file
52
pkg/runtime/core/errors_test.go
Normal file
@ -0,0 +1,52 @@
|
||||
package core_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/pkg/errors"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
)
|
||||
|
||||
func TestSourceError(t *testing.T) {
|
||||
Convey("Should match", t, func() {
|
||||
sm := core.NewSourceMap("test", 1, 1)
|
||||
|
||||
msg := "test at 1:1"
|
||||
cause := errors.New("cause")
|
||||
e := errors.Errorf("%s: %s", cause.Error(), msg)
|
||||
|
||||
cse := core.SourceError(sm, cause)
|
||||
So(cse, ShouldNotBeNil)
|
||||
So(cse.Error(), ShouldEqual, e.Error())
|
||||
})
|
||||
}
|
||||
|
||||
func TestTypeError(t *testing.T) {
|
||||
Convey("Should match", t, func() {
|
||||
e := core.TypeError(core.BooleanType)
|
||||
So(e, ShouldNotBeNil)
|
||||
|
||||
e = core.TypeError(core.BooleanType, core.BooleanType)
|
||||
So(e, ShouldNotBeNil)
|
||||
|
||||
e = core.TypeError(core.BooleanType, core.BooleanType, core.IntType, core.FloatType)
|
||||
So(e, ShouldNotBeNil)
|
||||
|
||||
cause := errors.New("invalid type: expected none or boolean or int or float, but got none")
|
||||
e = core.TypeError(core.NoneType, core.NoneType, core.BooleanType, core.IntType, core.FloatType)
|
||||
So(e.Error(), ShouldEqual, cause.Error())
|
||||
})
|
||||
}
|
||||
|
||||
func TestError(t *testing.T) {
|
||||
Convey("Should match", t, func() {
|
||||
msg := "test message"
|
||||
cause := errors.New("cause")
|
||||
e := errors.Errorf("%s: %s", cause.Error(), msg)
|
||||
|
||||
ce := core.Error(cause, msg)
|
||||
So(ce, ShouldNotBeNil)
|
||||
So(ce.Error(), ShouldEqual, e.Error())
|
||||
})
|
||||
}
|
21
pkg/runtime/core/function_test.go
Normal file
21
pkg/runtime/core/function_test.go
Normal file
@ -0,0 +1,21 @@
|
||||
package core_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
)
|
||||
|
||||
func TestValidateArgs(t *testing.T) {
|
||||
Convey("Should match", t, func() {
|
||||
a := []core.Value{values.NewInt(1), values.NewInt(2)}
|
||||
|
||||
e := core.ValidateArgs(a, 1, 2)
|
||||
So(e, ShouldBeNil)
|
||||
|
||||
e = core.ValidateArgs(a, 3, 4)
|
||||
So(e, ShouldNotBeNil)
|
||||
})
|
||||
}
|
69
pkg/runtime/core/param_test.go
Normal file
69
pkg/runtime/core/param_test.go
Normal file
@ -0,0 +1,69 @@
|
||||
package core_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
)
|
||||
|
||||
func TestParamsWith(t *testing.T) {
|
||||
Convey("Should match", t, func() {
|
||||
p := make(map[string]core.Value)
|
||||
p["val1"] = values.NewInt(1)
|
||||
p["val2"] = values.NewString("test")
|
||||
|
||||
pc := core.ParamsWith(context.Background(), p)
|
||||
|
||||
So(pc, ShouldNotBeNil)
|
||||
So(pc.Value("params"), ShouldEqual, p)
|
||||
})
|
||||
}
|
||||
|
||||
func TestParamsFrom(t *testing.T) {
|
||||
Convey("Should match", t, func() {
|
||||
p := make(map[string]core.Value)
|
||||
p["val1"] = values.NewInt(1)
|
||||
p["val2"] = values.NewString("test")
|
||||
|
||||
_, err := core.ParamsFrom(context.Background())
|
||||
|
||||
So(err, ShouldNotBeNil)
|
||||
|
||||
ctx := context.WithValue(context.Background(), "fail", p)
|
||||
pf, err := core.ParamsFrom(ctx)
|
||||
|
||||
So(err, ShouldNotBeNil)
|
||||
|
||||
ctx = context.WithValue(context.Background(), "params", p)
|
||||
pf, err = core.ParamsFrom(ctx)
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
So(pf, ShouldEqual, p)
|
||||
})
|
||||
}
|
||||
|
||||
func TestParamFrom(t *testing.T) {
|
||||
Convey("Should match", t, func() {
|
||||
p := make(map[string]core.Value)
|
||||
p["val1"] = values.NewInt(1)
|
||||
p["val2"] = values.NewString("test")
|
||||
|
||||
_, err := core.ParamFrom(context.Background(), "")
|
||||
|
||||
So(err, ShouldNotBeNil)
|
||||
|
||||
ctx := context.WithValue(context.Background(), "fail", p)
|
||||
_, err = core.ParamFrom(ctx, "val1")
|
||||
|
||||
So(err, ShouldNotBeNil)
|
||||
|
||||
ctx = context.WithValue(context.Background(), "params", p)
|
||||
v, err := core.ParamFrom(ctx, "val1")
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
So(v, ShouldEqual, values.NewInt(1))
|
||||
})
|
||||
}
|
@ -45,7 +45,7 @@ type Value interface {
|
||||
String() string
|
||||
Compare(other Value) int
|
||||
Unwrap() interface{}
|
||||
Hash() int
|
||||
Hash() uint64
|
||||
Clone() Value
|
||||
}
|
||||
|
||||
|
@ -70,10 +70,10 @@ func (e *ForExpression) Exec(ctx context.Context, scope *core.Scope) (core.Value
|
||||
}
|
||||
|
||||
// Hash map for a check for uniqueness
|
||||
var hashes map[int]bool
|
||||
var hashes map[uint64]bool
|
||||
|
||||
if e.distinct {
|
||||
hashes = make(map[int]bool)
|
||||
hashes = make(map[uint64]bool)
|
||||
}
|
||||
|
||||
res := values.NewArray(10)
|
||||
|
@ -1,10 +1,11 @@
|
||||
package values
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/pkg/errors"
|
||||
"hash/fnv"
|
||||
)
|
||||
|
||||
type (
|
||||
@ -77,22 +78,29 @@ func (t *Array) Unwrap() interface{} {
|
||||
return arr
|
||||
}
|
||||
|
||||
func (t *Array) Hash() int {
|
||||
bytes, err := t.MarshalJSON()
|
||||
func (t *Array) Hash() uint64 {
|
||||
h := fnv.New64a()
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
h.Write([]byte(t.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
h.Write([]byte("["))
|
||||
|
||||
endIndex := len(t.value) - 1
|
||||
|
||||
for i, el := range t.value {
|
||||
bytes := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(bytes, el.Hash())
|
||||
|
||||
h.Write(bytes)
|
||||
|
||||
if i != endIndex {
|
||||
h.Write([]byte(","))
|
||||
}
|
||||
}
|
||||
|
||||
h := sha512.New()
|
||||
h.Write([]byte("]"))
|
||||
|
||||
out, err := h.Write(bytes)
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return out
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (t *Array) Clone() core.Value {
|
||||
|
@ -119,6 +119,45 @@ func TestArray(t *testing.T) {
|
||||
})
|
||||
})
|
||||
|
||||
Convey(".Hash", t, func() {
|
||||
Convey("It should calculate hash of non-empty array", func() {
|
||||
arr := values.NewArrayWith(
|
||||
values.NewInt(1),
|
||||
values.NewInt(2),
|
||||
values.NewInt(3),
|
||||
)
|
||||
|
||||
h := arr.Hash()
|
||||
|
||||
So(h, ShouldBeGreaterThan, 0)
|
||||
})
|
||||
|
||||
Convey("It should calculate hash of empty array", func() {
|
||||
arr := values.NewArrayWith()
|
||||
|
||||
h := arr.Hash()
|
||||
|
||||
So(h, ShouldBeGreaterThan, 0)
|
||||
})
|
||||
|
||||
Convey("Hash sum should be consistent", func() {
|
||||
arr := values.NewArrayWith(
|
||||
values.True,
|
||||
values.NewInt(1),
|
||||
values.NewFloat(1.1),
|
||||
values.NewString("foobar"),
|
||||
values.NewCurrentDateTime(),
|
||||
values.NewArrayWith(values.NewInt(1), values.True),
|
||||
values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar"))),
|
||||
)
|
||||
|
||||
h1 := arr.Hash()
|
||||
h2 := arr.Hash()
|
||||
|
||||
So(h1, ShouldEqual, h2)
|
||||
})
|
||||
})
|
||||
|
||||
Convey(".Length", t, func() {
|
||||
Convey("Should return 0 when empty", func() {
|
||||
arr := values.NewArray(1)
|
||||
|
@ -1,9 +1,9 @@
|
||||
package values
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"encoding/json"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
)
|
||||
@ -62,16 +62,14 @@ func (b *Binary) Unwrap() interface{} {
|
||||
return b.values
|
||||
}
|
||||
|
||||
func (b *Binary) Hash() int {
|
||||
h := sha512.New()
|
||||
func (b *Binary) Hash() uint64 {
|
||||
h := fnv.New64a()
|
||||
|
||||
out, err := h.Write(b.values)
|
||||
h.Write([]byte(b.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
h.Write(b.values)
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return out
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (b *Binary) Clone() core.Value {
|
||||
|
@ -1,10 +1,10 @@
|
||||
package values
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"encoding/json"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/pkg/errors"
|
||||
"hash/fnv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@ -98,16 +98,14 @@ func (t Boolean) Unwrap() interface{} {
|
||||
return bool(t)
|
||||
}
|
||||
|
||||
func (t Boolean) Hash() int {
|
||||
h := sha512.New()
|
||||
func (t Boolean) Hash() uint64 {
|
||||
h := fnv.New64a()
|
||||
|
||||
out, err := h.Write([]byte(t.String()))
|
||||
h.Write([]byte(t.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
h.Write([]byte(t.String()))
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return out
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (t Boolean) Clone() core.Value {
|
||||
|
@ -70,4 +70,16 @@ func TestBoolean(t *testing.T) {
|
||||
So(values.False.Compare(values.True), ShouldEqual, -1)
|
||||
})
|
||||
})
|
||||
|
||||
Convey(".Hash", t, func() {
|
||||
Convey("It should calculate hash", func() {
|
||||
So(values.True.Hash(), ShouldBeGreaterThan, 0)
|
||||
So(values.False.Hash(), ShouldBeGreaterThan, 0)
|
||||
})
|
||||
|
||||
Convey("Hash sum should be consistent", func() {
|
||||
So(values.True.Hash(), ShouldEqual, values.True.Hash())
|
||||
So(values.False.Hash(), ShouldEqual, values.False.Hash())
|
||||
})
|
||||
})
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
package values
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"hash/fnv"
|
||||
"time"
|
||||
)
|
||||
|
||||
@ -12,6 +12,10 @@ type DateTime struct {
|
||||
time.Time
|
||||
}
|
||||
|
||||
func NewCurrentDateTime() DateTime {
|
||||
return DateTime{time.Now()}
|
||||
}
|
||||
|
||||
func NewDateTime(time time.Time) DateTime {
|
||||
return DateTime{time}
|
||||
}
|
||||
@ -84,18 +88,21 @@ func (t DateTime) Unwrap() interface{} {
|
||||
return t.Time
|
||||
}
|
||||
|
||||
func (t DateTime) Hash() int {
|
||||
h := sha512.New()
|
||||
func (t DateTime) Hash() uint64 {
|
||||
h := fnv.New64a()
|
||||
|
||||
t.Time.MarshalJSON()
|
||||
h.Write([]byte(t.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
|
||||
out, err := h.Write([]byte(t.Time.String()))
|
||||
bytes, err := t.Time.GobEncode()
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return out
|
||||
h.Write(bytes)
|
||||
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (t DateTime) Clone() core.Value {
|
||||
|
25
pkg/runtime/values/date_time_test.go
Normal file
25
pkg/runtime/values/date_time_test.go
Normal file
@ -0,0 +1,25 @@
|
||||
package values_test
|
||||
|
||||
import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDateTime(t *testing.T) {
|
||||
Convey(".Hash", t, func() {
|
||||
Convey("It should calculate hash", func() {
|
||||
d := values.NewCurrentDateTime()
|
||||
|
||||
h := d.Hash()
|
||||
|
||||
So(h, ShouldBeGreaterThan, 0)
|
||||
})
|
||||
|
||||
Convey("Hash sum should be consistent", func() {
|
||||
d := values.NewCurrentDateTime()
|
||||
|
||||
So(d.Hash(), ShouldEqual, d.Hash())
|
||||
})
|
||||
})
|
||||
}
|
@ -1,11 +1,13 @@
|
||||
package values
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/pkg/errors"
|
||||
"hash/fnv"
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
@ -112,22 +114,17 @@ func (t Float) Unwrap() interface{} {
|
||||
return float64(t)
|
||||
}
|
||||
|
||||
func (t Float) Hash() int {
|
||||
bytes, err := t.MarshalJSON()
|
||||
func (t Float) Hash() uint64 {
|
||||
h := fnv.New64a()
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
h.Write([]byte(t.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
|
||||
h := sha512.New()
|
||||
bytes := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(bytes, math.Float64bits(float64(t)))
|
||||
h.Write(bytes)
|
||||
|
||||
out, err := h.Write(bytes)
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return out
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (t Float) Clone() core.Value {
|
||||
|
29
pkg/runtime/values/float_test.go
Normal file
29
pkg/runtime/values/float_test.go
Normal file
@ -0,0 +1,29 @@
|
||||
package values_test
|
||||
|
||||
import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFloat(t *testing.T) {
|
||||
Convey(".Hash", t, func() {
|
||||
Convey("It should calculate hash", func() {
|
||||
v := values.NewFloat(1.1)
|
||||
|
||||
h := v.Hash()
|
||||
|
||||
So(h, ShouldBeGreaterThan, 0)
|
||||
|
||||
v2 := values.NewFloat(1.2)
|
||||
|
||||
So(h, ShouldNotEqual, v2.Hash())
|
||||
})
|
||||
|
||||
Convey("Hash sum should be consistent", func() {
|
||||
v := values.NewFloat(1.1)
|
||||
|
||||
So(v.Hash(), ShouldEqual, v.Hash())
|
||||
})
|
||||
})
|
||||
}
|
@ -1,9 +1,11 @@
|
||||
package values
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/pkg/errors"
|
||||
"hash/fnv"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
@ -110,8 +112,17 @@ func (t Int) Unwrap() interface{} {
|
||||
return int(t)
|
||||
}
|
||||
|
||||
func (t Int) Hash() int {
|
||||
return int(t)
|
||||
func (t Int) Hash() uint64 {
|
||||
h := fnv.New64a()
|
||||
|
||||
h.Write([]byte(t.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
|
||||
bytes := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(bytes, uint64(t))
|
||||
h.Write(bytes)
|
||||
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (t Int) Clone() core.Value {
|
||||
|
29
pkg/runtime/values/int_test.go
Normal file
29
pkg/runtime/values/int_test.go
Normal file
@ -0,0 +1,29 @@
|
||||
package values_test
|
||||
|
||||
import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestInt(t *testing.T) {
|
||||
Convey(".Hash", t, func() {
|
||||
Convey("It should calculate hash", func() {
|
||||
v := values.NewInt(1)
|
||||
|
||||
h := v.Hash()
|
||||
|
||||
So(h, ShouldBeGreaterThan, 0)
|
||||
|
||||
v2 := values.NewInt(2)
|
||||
|
||||
So(h, ShouldNotEqual, v2.Hash())
|
||||
})
|
||||
|
||||
Convey("Hash sum should be consistent", func() {
|
||||
v := values.NewInt(1)
|
||||
|
||||
So(v.Hash(), ShouldEqual, v.Hash())
|
||||
})
|
||||
})
|
||||
}
|
@ -33,7 +33,7 @@ func (t *none) Unwrap() interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *none) Hash() int {
|
||||
func (t *none) Hash() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
|
@ -1,15 +1,17 @@
|
||||
package values
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"hash/fnv"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type (
|
||||
ObjectPredicate = func(value core.Value, key string) bool
|
||||
ObjectProperty struct {
|
||||
name string
|
||||
key string
|
||||
value core.Value
|
||||
}
|
||||
Object struct {
|
||||
@ -29,7 +31,7 @@ func NewObjectWith(props ...*ObjectProperty) *Object {
|
||||
obj := NewObject()
|
||||
|
||||
for _, prop := range props {
|
||||
obj.value[prop.name] = prop.value
|
||||
obj.value[prop.key] = prop.value
|
||||
}
|
||||
|
||||
return obj
|
||||
@ -88,22 +90,43 @@ func (t *Object) Unwrap() interface{} {
|
||||
return obj
|
||||
}
|
||||
|
||||
func (t *Object) Hash() int {
|
||||
bytes, err := t.MarshalJSON()
|
||||
func (t *Object) Hash() uint64 {
|
||||
h := fnv.New64a()
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
h.Write([]byte(t.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
h.Write([]byte("{"))
|
||||
|
||||
keys := make([]string, 0, len(t.value))
|
||||
|
||||
for key := range t.value {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
|
||||
h := sha512.New()
|
||||
// order does not really matter
|
||||
// but it will give us a consistent hash sum
|
||||
sort.Strings(keys)
|
||||
endIndex := len(keys) - 1
|
||||
|
||||
out, err := h.Write(bytes)
|
||||
for idx, key := range keys {
|
||||
h.Write([]byte(key))
|
||||
h.Write([]byte(":"))
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
el := t.value[key]
|
||||
|
||||
bytes := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(bytes, el.Hash())
|
||||
|
||||
h.Write(bytes)
|
||||
|
||||
if idx != endIndex {
|
||||
h.Write([]byte(","))
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
h.Write([]byte("}"))
|
||||
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (t *Object) Clone() core.Value {
|
||||
|
@ -142,6 +142,45 @@ func TestObject(t *testing.T) {
|
||||
})
|
||||
})
|
||||
|
||||
Convey(".Hash", t, func() {
|
||||
Convey("It should calculate hash of non-empty object", func() {
|
||||
v := values.NewObjectWith(
|
||||
values.NewObjectProperty("foo", values.NewString("bar")),
|
||||
values.NewObjectProperty("faz", values.NewInt(1)),
|
||||
values.NewObjectProperty("qaz", values.True),
|
||||
)
|
||||
|
||||
h := v.Hash()
|
||||
|
||||
So(h, ShouldBeGreaterThan, 0)
|
||||
})
|
||||
|
||||
Convey("It should calculate hash of empty object", func() {
|
||||
v := values.NewObject()
|
||||
|
||||
h := v.Hash()
|
||||
|
||||
So(h, ShouldBeGreaterThan, 0)
|
||||
})
|
||||
|
||||
Convey("Hash sum should be consistent", func() {
|
||||
v := values.NewObjectWith(
|
||||
values.NewObjectProperty("boolean", values.True),
|
||||
values.NewObjectProperty("int", values.NewInt(1)),
|
||||
values.NewObjectProperty("float", values.NewFloat(1.1)),
|
||||
values.NewObjectProperty("string", values.NewString("foobar")),
|
||||
values.NewObjectProperty("datetime", values.NewCurrentDateTime()),
|
||||
values.NewObjectProperty("array", values.NewArrayWith(values.NewInt(1), values.True)),
|
||||
values.NewObjectProperty("object", values.NewObjectWith(values.NewObjectProperty("foo", values.NewString("bar")))),
|
||||
)
|
||||
|
||||
h1 := v.Hash()
|
||||
h2 := v.Hash()
|
||||
|
||||
So(h1, ShouldEqual, h2)
|
||||
})
|
||||
})
|
||||
|
||||
Convey(".Length", t, func() {
|
||||
Convey("Should return 0 when empty", func() {
|
||||
obj := values.NewObject()
|
||||
|
@ -1,11 +1,11 @@
|
||||
package values
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/pkg/errors"
|
||||
"hash/fnv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@ -93,16 +93,14 @@ func (t String) Unwrap() interface{} {
|
||||
return string(t)
|
||||
}
|
||||
|
||||
func (t String) Hash() int {
|
||||
h := sha512.New()
|
||||
func (t String) Hash() uint64 {
|
||||
h := fnv.New64a()
|
||||
|
||||
out, err := h.Write([]byte(t))
|
||||
h.Write([]byte(t.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
h.Write([]byte(t))
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return out
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (t String) Clone() core.Value {
|
||||
|
29
pkg/runtime/values/string_test.go
Normal file
29
pkg/runtime/values/string_test.go
Normal file
@ -0,0 +1,29 @@
|
||||
package values_test
|
||||
|
||||
import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestString(t *testing.T) {
|
||||
Convey(".Hash", t, func() {
|
||||
Convey("It should calculate hash", func() {
|
||||
v := values.NewString("a")
|
||||
|
||||
h := v.Hash()
|
||||
|
||||
So(h, ShouldBeGreaterThan, 0)
|
||||
|
||||
v2 := values.NewString("b")
|
||||
|
||||
So(h, ShouldNotEqual, v2.Hash())
|
||||
})
|
||||
|
||||
Convey("Hash sum should be consistent", func() {
|
||||
v := values.NewString("foobar")
|
||||
|
||||
So(v.Hash(), ShouldEqual, v.Hash())
|
||||
})
|
||||
})
|
||||
}
|
@ -2,7 +2,6 @@ package dynamic
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha512"
|
||||
"fmt"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/logging"
|
||||
@ -17,10 +16,13 @@ import (
|
||||
"github.com/mafredri/cdp/rpcc"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rs/zerolog"
|
||||
"hash/fnv"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const BlankPageUrl = "about:blank"
|
||||
|
||||
type HtmlDocument struct {
|
||||
sync.Mutex
|
||||
logger *zerolog.Logger
|
||||
@ -78,10 +80,12 @@ func LoadHtmlDocument(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = waitForLoadEvent(ctx, client)
|
||||
if url != BlankPageUrl {
|
||||
err = waitForLoadEvent(ctx, client)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
root, innerHtml, err := getRootElement(client)
|
||||
@ -201,19 +205,17 @@ func (doc *HtmlDocument) Unwrap() interface{} {
|
||||
return doc.element
|
||||
}
|
||||
|
||||
func (doc *HtmlDocument) Hash() int {
|
||||
func (doc *HtmlDocument) Hash() uint64 {
|
||||
doc.Lock()
|
||||
defer doc.Unlock()
|
||||
|
||||
h := sha512.New()
|
||||
h := fnv.New64a()
|
||||
|
||||
out, err := h.Write([]byte(doc.url))
|
||||
h.Write([]byte(doc.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
h.Write([]byte(doc.url))
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return out
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (doc *HtmlDocument) Clone() core.Value {
|
||||
@ -637,6 +639,10 @@ func (doc *HtmlDocument) WaitForNavigation(timeout values.Int) error {
|
||||
}
|
||||
|
||||
func (doc *HtmlDocument) Navigate(url values.String) error {
|
||||
if url == "" {
|
||||
url = BlankPageUrl
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
repl, err := doc.client.Page.Navigate(ctx, page.NewNavigateArgs(url.String()))
|
||||
|
||||
@ -648,5 +654,5 @@ func (doc *HtmlDocument) Navigate(url values.String) error {
|
||||
return errors.New(*repl.ErrorText)
|
||||
}
|
||||
|
||||
return waitForLoadEvent(ctx, doc.client)
|
||||
return doc.WaitForNavigation(5000)
|
||||
}
|
||||
|
@ -38,6 +38,10 @@ func (drv *Driver) GetDocument(ctx context.Context, url string) (values.HtmlNode
|
||||
ctx, cancel := context.WithTimeout(ctx, DefaultTimeout)
|
||||
defer cancel()
|
||||
|
||||
if url == "" {
|
||||
url = BlankPageUrl
|
||||
}
|
||||
|
||||
// Create a new target belonging to the browser context, similar
|
||||
// to opening a new tab in an incognito window.
|
||||
createTargetArgs := target.NewCreateTargetArgs(url).SetBrowserContextID(drv.contextID)
|
||||
|
@ -3,7 +3,6 @@ package dynamic
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha512"
|
||||
"encoding/json"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
@ -14,6 +13,7 @@ import (
|
||||
"github.com/mafredri/cdp"
|
||||
"github.com/mafredri/cdp/protocol/dom"
|
||||
"github.com/rs/zerolog"
|
||||
"hash/fnv"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
@ -188,24 +188,17 @@ func (el *HtmlElement) Unwrap() interface{} {
|
||||
return el
|
||||
}
|
||||
|
||||
func (el *HtmlElement) Hash() int {
|
||||
func (el *HtmlElement) Hash() uint64 {
|
||||
el.Lock()
|
||||
defer el.Unlock()
|
||||
|
||||
h := sha512.New()
|
||||
h := fnv.New64a()
|
||||
|
||||
out, err := h.Write([]byte(el.innerHtml))
|
||||
h.Write([]byte(el.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
h.Write([]byte(el.innerHtml))
|
||||
|
||||
if err != nil {
|
||||
el.logger.Error().
|
||||
Timestamp().
|
||||
Err(err).
|
||||
Msg("failed to calculate hash value")
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
return out
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (el *HtmlElement) Value() core.Value {
|
||||
@ -297,7 +290,7 @@ func (el *HtmlElement) GetChildNode(idx values.Int) core.Value {
|
||||
|
||||
func (el *HtmlElement) QuerySelector(selector values.String) core.Value {
|
||||
if !el.IsConnected() {
|
||||
return values.NewArray(0)
|
||||
return values.None
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
@ -1,12 +1,12 @@
|
||||
package static
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
||||
"encoding/json"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"hash/fnv"
|
||||
)
|
||||
|
||||
type HtmlElement struct {
|
||||
@ -53,22 +53,20 @@ func (el *HtmlElement) Unwrap() interface{} {
|
||||
return el.selection
|
||||
}
|
||||
|
||||
func (el *HtmlElement) Hash() int {
|
||||
h := sha512.New()
|
||||
|
||||
func (el *HtmlElement) Hash() uint64 {
|
||||
str, err := el.selection.Html()
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
out, err := h.Write([]byte(str))
|
||||
h := fnv.New64a()
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
h.Write([]byte(el.Type().String()))
|
||||
h.Write([]byte(":"))
|
||||
h.Write([]byte(str))
|
||||
|
||||
return out
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (el *HtmlElement) Clone() core.Value {
|
||||
|
Loading…
Reference in New Issue
Block a user