1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-04-11 11:42:13 +02:00

Feature/#230 xpath (#322)

* Implemented XPath for CDP driver

* Added XPATH function

* Added e2e tests for CDP

* Fixed linting issues

* Added support of XPath to HTTP driver

* Fixed linting issues
This commit is contained in:
Tim Voronov 2019-07-03 14:05:02 -04:00 committed by GitHub
parent 35bfc5e71d
commit 7ce6797e9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 589 additions and 669 deletions

File diff suppressed because one or more lines are too long

View File

@ -14,6 +14,7 @@ import (
"github.com/MontFerret/ferret/pkg/drivers/http"
"github.com/MontFerret/ferret/pkg/runtime"
"github.com/gobwas/glob"
"github.com/pkg/errors"
"github.com/rs/zerolog"
)
@ -103,15 +104,23 @@ func (r *Runner) runQueries(ctx context.Context, dir string) ([]Result, error) {
return nil, err
}
var filter glob.Glob
var useFilter bool
if r.settings.Filter != "" {
f, err := glob.Compile(r.settings.Filter)
if err != nil {
return nil, err
}
filter = f
useFilter = true
}
err := r.traverseDir(ctx, dir, func(name string) error {
if r.settings.Filter != "" {
matched, err := filepath.Match(r.settings.Filter, name)
if err != nil {
return err
}
if !matched {
if useFilter {
if !filter.Match(name) {
return nil
}
}

View File

@ -0,0 +1,6 @@
LET url = @dynamic
LET page = DOCUMENT(url, true)
LET actual = XPATH(page, "count(//body)")
RETURN EXPECT(1, actual)

View File

@ -0,0 +1,6 @@
LET url = @dynamic + "?redirect=/forms"
LET page = DOCUMENT(url, true)
LET actual = XPATH(page, "//div[contains(@class, 'form-group')]")
RETURN EXPECT(4, LENGTH(actual))

View File

@ -0,0 +1,7 @@
LET url = @dynamic
LET page = DOCUMENT(url, true)
LET el = ELEMENT(page, 'main')
LET actual = XPATH(el, "count(//p)")
RETURN EXPECT(1, actual)

View File

@ -0,0 +1,7 @@
LET url = @dynamic + "?redirect=/forms"
LET page = DOCUMENT(url, true)
LET element = ELEMENT(page, '#page-form')
LET actual = XPATH(element, "//div[contains(@class, 'form-group')]")
RETURN EXPECT(4, LENGTH(actual))

View File

@ -0,0 +1,6 @@
LET url = @static + '/overview.html'
LET page = DOCUMENT(url)
LET actual = XPATH(page, "count(//body)")
RETURN EXPECT(1, actual)

View File

@ -0,0 +1,6 @@
LET url = @static + '/value.html'
LET page = DOCUMENT(url)
LET actual = XPATH(page, "//tr[contains(@class, 'odd')]")
RETURN EXPECT(20, LENGTH(actual))

View File

@ -0,0 +1,7 @@
LET url = @static + '/value.html'
LET page = DOCUMENT(url)
LET el = ELEMENT(page, '#listings_table')
LET actual = XPATH(el, "count(//tr)")
RETURN EXPECT(41, actual)

View File

@ -0,0 +1,7 @@
LET url = @static + '/value.html'
LET page = DOCUMENT(url, true)
LET element = ELEMENT(page, '.tablesorter')
LET actual = XPATH(element, "//input[contains(@type, 'hidden')]")
RETURN EXPECT(40, LENGTH(actual))

3
go.mod
View File

@ -5,6 +5,8 @@ go 1.12
require (
github.com/BurntSushi/toml v0.3.1 // indirect
github.com/PuerkitoBio/goquery v1.5.0
github.com/antchfx/htmlquery v1.0.0
github.com/antchfx/xpath v1.0.0
github.com/antlr/antlr4 v0.0.0-20190325153624-837aa60e2c47
github.com/chzyer/logex v1.1.10 // indirect
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
@ -12,6 +14,7 @@ require (
github.com/corpix/uarand v0.0.0
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/derekparker/trie v0.0.0-20190322172448-1ce4922c7ad9
github.com/gobwas/glob v0.2.3
github.com/gofrs/uuid v3.2.0+incompatible
github.com/google/go-cmp v0.2.0 // indirect
github.com/gopherjs/gopherjs v0.0.0-20190328170749-bb2674552d8f // indirect

13
go.sum
View File

@ -4,6 +4,10 @@ github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA=
github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk=
github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/antlr/antlr4 v0.0.0-20190325153624-837aa60e2c47 h1:Lp5nUoQzppfVmfZadpzAytNyb5IMtxyOJLzoQS5dExg=
github.com/antlr/antlr4 v0.0.0-20190325153624-837aa60e2c47/go.mod h1:T7PbCXFs94rrTttyxjbyT5+/1V8T2TYDejxUfHJjw1Y=
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
@ -20,6 +24,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/derekparker/trie v0.0.0-20190322172448-1ce4922c7ad9 h1:aSaTVlEXc2QKl4fzXU1tMYCjlrSc2mA4DZtiVfckQHo=
github.com/derekparker/trie v0.0.0-20190322172448-1ce4922c7ad9/go.mod h1:D6ICZm05D9VN1n/8iOtBxLpXtoGp6HDFUJ1RNVieOSE=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/gofrs/uuid v3.2.0+incompatible h1:y12jRkkFxsd7GpqdSZ+/KCs/fJbqpEXSGd4+jfEaewE=
github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
@ -42,12 +48,6 @@ github.com/labstack/echo v3.3.10+incompatible h1:pGRcYk231ExFAyoAjAfD85kQzRJCRI8
github.com/labstack/echo v3.3.10+incompatible/go.mod h1:0INS7j/VjnFxD4E2wkz67b8cVwCLbBmJyDaka6Cmk1s=
github.com/labstack/gommon v0.2.8 h1:JvRqmeZcfrHC5u6uVleB4NxxNbzx6gpbJiQknDbKQu0=
github.com/labstack/gommon v0.2.8/go.mod h1:/tj9csK2iPSBvn+3NLM9e52usepMtrd5ilFYA+wQNJ4=
github.com/mafredri/cdp v0.23.1 h1:aqW20I/3CzR8/8VEj+d4zV97l3GU7VdCgi8OTGeJKkA=
github.com/mafredri/cdp v0.23.1/go.mod h1:hgdiA0yp1uqhSaDOHJWPgXpMbh+LAfUdD9vbN2AM8gE=
github.com/mafredri/cdp v0.23.2 h1:i+t1sFhPh9V6MbZFczhlp800k6eRiskeRAw2KH5qmmQ=
github.com/mafredri/cdp v0.23.2/go.mod h1:hgdiA0yp1uqhSaDOHJWPgXpMbh+LAfUdD9vbN2AM8gE=
github.com/mafredri/cdp v0.23.3 h1:tqpZsezUug2C3XtL3+9KImpaXxSFuU22u5UNJPZSc3Y=
github.com/mafredri/cdp v0.23.3/go.mod h1:hgdiA0yp1uqhSaDOHJWPgXpMbh+LAfUdD9vbN2AM8gE=
github.com/mafredri/cdp v0.23.4 h1:ffp4qq6slfCL4rFWBDeRHapkLE776gER4tX5Z3LS8CY=
github.com/mafredri/cdp v0.23.4/go.mod h1:hgdiA0yp1uqhSaDOHJWPgXpMbh+LAfUdD9vbN2AM8gE=
github.com/mattn/go-colorable v0.1.1 h1:G1f5SKeVxmagw/IyvzvtZE4Gybcc4Tr1tf7I8z0XgOg=
@ -93,6 +93,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190322080309-f49334f85ddc h1:4gbWbmmPFp4ySWICouJl6emP0MyS31yy9SrTlAGFT+g=
golang.org/x/sys v0.0.0-20190322080309-f49334f85ddc/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

View File

@ -3,13 +3,14 @@ package cdp
import (
"context"
"fmt"
"hash/fnv"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
"github.com/mafredri/cdp/protocol/runtime"
"github.com/pkg/errors"
"github.com/rs/zerolog"
"hash/fnv"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/drivers/cdp/eval"
@ -55,6 +56,12 @@ func LoadRootHTMLDocument(
return nil, err
}
worldRepl, err := client.Page.CreateIsolatedWorld(ctx, page.NewCreateIsolatedWorldArgs(ftRepl.FrameTree.Frame.ID))
if err != nil {
return nil, err
}
return LoadHTMLDocument(
ctx,
logger,
@ -64,7 +71,7 @@ func LoadRootHTMLDocument(
keyboard,
gdRepl.Root,
ftRepl.FrameTree,
eval.EmptyExecutionContextID,
worldRepl.ExecutionContextID,
nil,
)
}
@ -92,7 +99,6 @@ func LoadHTMLDocument(
inputManager,
exec,
node.NodeID,
node.BackendNodeID,
)
if err != nil {
@ -297,6 +303,10 @@ func (doc *HTMLDocument) GetChildDocuments(ctx context.Context) (*values.Array,
return children.Copy().(*values.Array), nil
}
func (doc *HTMLDocument) XPath(ctx context.Context, expression values.String) (core.Value, error) {
return doc.element.XPath(ctx, expression)
}
func (doc *HTMLDocument) Length() values.Int {
return doc.element.Length()
}

View File

@ -4,6 +4,8 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/MontFerret/ferret/pkg/drivers/cdp/templates"
"github.com/pkg/errors"
"golang.org/x/net/html"
"hash/fnv"
"strconv"
@ -26,13 +28,11 @@ import (
)
var emptyNodeID = dom.NodeID(0)
var emptyBackendID = dom.BackendNodeID(0)
type (
HTMLElementIdentity struct {
nodeID dom.NodeID
backendID dom.BackendNodeID
objectID runtime.RemoteObjectID
nodeID dom.NodeID
objectID runtime.RemoteObjectID
}
HTMLElement struct {
@ -64,20 +64,13 @@ func LoadHTMLElement(
input *input.Manager,
exec *eval.ExecutionContext,
nodeID dom.NodeID,
backendID dom.BackendNodeID,
) (*HTMLElement, error) {
if client == nil {
return nil, core.Error(core.ErrMissedArgument, "client")
}
// getting a remote object that represents the current DOM Node
var args *dom.ResolveNodeArgs
if backendID > 0 {
args = dom.NewResolveNodeArgs().SetBackendNodeID(backendID)
} else {
args = dom.NewResolveNodeArgs().SetNodeID(nodeID)
}
args := dom.NewResolveNodeArgs().SetNodeID(nodeID).SetExecutionContextID(exec.ID())
obj, err := client.DOM.ResolveNode(ctx, args)
@ -89,34 +82,46 @@ func LoadHTMLElement(
return nil, core.Error(core.ErrNotFound, fmt.Sprintf("element %d", nodeID))
}
objectID := *obj.Object.ObjectID
id := HTMLElementIdentity{}
id.nodeID = nodeID
id.objectID = *obj.Object.ObjectID
return LoadHTMLElementWithID(
ctx,
logger,
client,
broker,
input,
exec,
id,
)
}
func LoadHTMLElementWithID(
ctx context.Context,
logger *zerolog.Logger,
client *cdp.Client,
broker *events.EventBroker,
input *input.Manager,
exec *eval.ExecutionContext,
id HTMLElementIdentity,
) (*HTMLElement, error) {
node, err := client.DOM.DescribeNode(
ctx,
dom.
NewDescribeNodeArgs().
SetObjectID(objectID).
SetObjectID(id.objectID).
SetDepth(1),
)
if err != nil {
return nil, core.Error(err, strconv.Itoa(int(nodeID)))
}
id := HTMLElementIdentity{}
id.nodeID = nodeID
id.objectID = objectID
if backendID > 0 {
id.backendID = backendID
} else {
id.backendID = node.Node.BackendNodeID
return nil, core.Error(err, strconv.Itoa(int(id.nodeID)))
}
innerHTML, err := loadInnerHTML(ctx, client, exec, id, common.ToHTMLType(node.Node.NodeType))
if err != nil {
return nil, core.Error(err, strconv.Itoa(int(nodeID)))
return nil, core.Error(err, strconv.Itoa(int(id.nodeID)))
}
var val string
@ -506,7 +511,7 @@ func (el *HTMLElement) QuerySelector(ctx context.Context, selector values.String
return values.None
}
res, err := LoadHTMLElement(ctx, el.logger, el.client, el.events, el.input, el.exec, found.NodeID, emptyBackendID)
res, err := LoadHTMLElement(ctx, el.logger, el.client, el.events, el.input, el.exec, found.NodeID)
if err != nil {
el.logError(err).
@ -547,7 +552,7 @@ func (el *HTMLElement) QuerySelectorAll(ctx context.Context, selector values.Str
continue
}
childEl, err := LoadHTMLElement(ctx, el.logger, el.client, el.events, el.input, el.exec, id, emptyBackendID)
childEl, err := LoadHTMLElement(ctx, el.logger, el.client, el.events, el.input, el.exec, id)
if err != nil {
el.logError(err).
@ -572,6 +577,153 @@ func (el *HTMLElement) QuerySelectorAll(ctx context.Context, selector values.Str
return arr
}
func (el *HTMLElement) XPath(ctx context.Context, expression values.String) (result core.Value, err error) {
exp, err := expression.MarshalJSON()
if err != nil {
return values.None, err
}
out, err := el.exec.CallFunction(ctx, templates.XPath(),
runtime.CallArgument{
ObjectID: &el.id.objectID,
},
runtime.CallArgument{
Value: json.RawMessage(exp),
},
)
if err != nil {
return values.None, err
}
typeName := out.Type
// checking whether it's actually an array
if typeName == "object" {
isArrayRes, err := el.exec.CallFunction(ctx, `
(target) => Array.isArray(target)
`,
runtime.CallArgument{
ObjectID: out.ObjectID,
},
)
if err != nil {
return values.None, err
}
isArray, err := eval.Unmarshal(&isArrayRes)
if err != nil {
return values.None, err
}
if isArray == values.True {
typeName = "array"
}
}
switch typeName {
case "string", "number", "boolean":
return eval.Unmarshal(&out)
case "array":
if out.ObjectID == nil {
return values.None, nil
}
props, err := el.client.Runtime.GetProperties(ctx, runtime.NewGetPropertiesArgs(*out.ObjectID).SetOwnProperties(true))
if err != nil {
return values.None, err
}
if props.ExceptionDetails != nil {
exception := *props.ExceptionDetails
return values.None, errors.New(exception.Text)
}
result := values.NewArray(len(props.Result))
defer func() {
if err != nil {
result.ForEach(func(value core.Value, idx int) bool {
el, ok := value.(*HTMLElement)
if ok {
el.Close()
}
return true
})
}
}()
for _, descr := range props.Result {
if !descr.Enumerable {
continue
}
if descr.Value == nil {
continue
}
repl, err := el.client.DOM.RequestNode(ctx, dom.NewRequestNodeArgs(*descr.Value.ObjectID))
if err != nil {
return values.None, err
}
el, err := LoadHTMLElementWithID(
ctx,
el.logger,
el.client,
el.events,
el.input,
el.exec,
HTMLElementIdentity{
nodeID: repl.NodeID,
objectID: *descr.Value.ObjectID,
},
)
if err != nil {
return values.None, err
}
result.Push(el)
}
return result, nil
case "object":
if out.ObjectID == nil {
return values.None, nil
}
repl, err := el.client.DOM.RequestNode(ctx, dom.NewRequestNodeArgs(*out.ObjectID))
if err != nil {
return values.None, err
}
return LoadHTMLElementWithID(
ctx,
el.logger,
el.client,
el.events,
el.input,
el.exec,
HTMLElementIdentity{
nodeID: repl.NodeID,
objectID: *out.ObjectID,
},
)
default:
return values.None, nil
}
}
func (el *HTMLElement) GetInnerText(ctx context.Context) values.String {
val, err := el.innerText.Read(ctx)
@ -985,7 +1137,6 @@ func (el *HTMLElement) loadChildren(ctx context.Context) (core.Value, error) {
el.input,
el.exec,
childID.nodeID,
childID.backendID,
)
if err != nil {
@ -1156,8 +1307,7 @@ func (el *HTMLElement) handleChildInserted(ctx context.Context, message interfac
}
nextIdentity := HTMLElementIdentity{
nodeID: reply.Node.NodeID,
backendID: reply.Node.BackendNodeID,
nodeID: reply.Node.NodeID,
}
arr := el.children
@ -1169,7 +1319,7 @@ func (el *HTMLElement) handleChildInserted(ctx context.Context, message interfac
el.loadedChildren.Write(ctx, func(v core.Value, _ error) {
loadedArr := v.(*values.Array)
loadedEl, err := LoadHTMLElement(ctx, el.logger, el.client, el.events, el.input, el.exec, nextID, emptyBackendID)
loadedEl, err := LoadHTMLElement(ctx, el.logger, el.client, el.events, el.input, el.exec, nextID)
if err != nil {
el.logError(err).Msg("failed to load an inserted element")
@ -1263,7 +1413,6 @@ func (el *HTMLElement) logError(err error) *zerolog.Event {
Error().
Timestamp().
Int("nodeID", int(el.id.nodeID)).
Int("backendID", int(el.id.backendID)).
Str("objectID", string(el.id.objectID)).
Err(err)
}

View File

@ -3,9 +3,11 @@ package eval
import (
"context"
"fmt"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/page"
"github.com/mafredri/cdp/protocol/runtime"
"github.com/pkg/errors"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
@ -33,7 +35,7 @@ func (ec *ExecutionContext) ID() runtime.ExecutionContextID {
}
func (ec *ExecutionContext) Eval(ctx context.Context, exp string) error {
_, err := ec.eval(
_, err := ec.evalWithValueInternal(
ctx,
runtime.
NewEvaluateArgs(PrepareEval(exp)),
@ -42,8 +44,8 @@ func (ec *ExecutionContext) Eval(ctx context.Context, exp string) error {
return err
}
func (ec *ExecutionContext) EvalWithReturn(ctx context.Context, exp string) (core.Value, error) {
return ec.eval(
func (ec *ExecutionContext) EvalWithValue(ctx context.Context, exp string) (core.Value, error) {
return ec.evalWithValueInternal(
ctx,
runtime.
NewEvaluateArgs(PrepareEval(exp)).
@ -52,7 +54,7 @@ func (ec *ExecutionContext) EvalWithReturn(ctx context.Context, exp string) (cor
}
func (ec *ExecutionContext) EvalAsync(ctx context.Context, exp string) (core.Value, error) {
return ec.eval(
return ec.evalWithValueInternal(
ctx,
runtime.
NewEvaluateArgs(PrepareEval(exp)).
@ -61,31 +63,18 @@ func (ec *ExecutionContext) EvalAsync(ctx context.Context, exp string) (core.Val
)
}
func (ec *ExecutionContext) eval(ctx context.Context, args *runtime.EvaluateArgs) (core.Value, error) {
if ec.contextID != EmptyExecutionContextID {
args.SetContextID(ec.contextID)
}
out, err := ec.client.Runtime.Evaluate(ctx, args)
func (ec *ExecutionContext) ResolveRemoteObject(ctx context.Context, exp string) (runtime.RemoteObject, error) {
res, err := ec.evalInternal(ctx, runtime.NewEvaluateArgs(PrepareEval(exp)))
if err != nil {
return values.None, err
return runtime.RemoteObject{}, err
}
if out.ExceptionDetails != nil {
ex := out.ExceptionDetails
return values.None, core.Error(
core.ErrUnexpected,
fmt.Sprintf("%s: %s", ex.Text, *ex.Exception.Description),
)
if res.ObjectID == nil {
return runtime.RemoteObject{}, errors.Wrap(core.ErrUnexpected, "unable to resolve remote object")
}
if out.Result.Type != "undefined" && out.Result.Type != "null" {
return values.Unmarshal(out.Result.Value)
}
return Unmarshal(&out.Result)
return res, nil
}
func (ec *ExecutionContext) CallMethod(
@ -215,3 +204,62 @@ func (ec *ExecutionContext) DispatchEvent(
return values.True, nil
}
func (ec *ExecutionContext) CallFunction(ctx context.Context, declaration string, args ...runtime.CallArgument) (runtime.RemoteObject, error) {
cfArgs := runtime.NewCallFunctionOnArgs(declaration).SetArguments(args)
if ec.contextID != EmptyExecutionContextID {
cfArgs.SetExecutionContextID(ec.contextID)
}
repl, err := ec.client.Runtime.CallFunctionOn(ctx, cfArgs)
if err != nil {
return runtime.RemoteObject{}, err
}
if repl.ExceptionDetails != nil {
exception := *repl.ExceptionDetails
return runtime.RemoteObject{}, errors.New(exception.Error())
}
return repl.Result, nil
}
func (ec *ExecutionContext) evalWithValueInternal(ctx context.Context, args *runtime.EvaluateArgs) (core.Value, error) {
obj, err := ec.evalInternal(ctx, args)
if err != nil {
return values.None, err
}
if obj.Type != "undefined" && obj.Type != "null" {
return values.Unmarshal(obj.Value)
}
return Unmarshal(&obj)
}
func (ec *ExecutionContext) evalInternal(ctx context.Context, args *runtime.EvaluateArgs) (runtime.RemoteObject, error) {
if ec.contextID != EmptyExecutionContextID {
args.SetContextID(ec.contextID)
}
out, err := ec.client.Runtime.Evaluate(ctx, args)
if err != nil {
return runtime.RemoteObject{}, err
}
if out.ExceptionDetails != nil {
ex := out.ExceptionDetails
return runtime.RemoteObject{}, core.Error(
core.ErrUnexpected,
fmt.Sprintf("%s: %s", ex.Text, *ex.Exception.Description),
)
}
return out.Result, nil
}

View File

@ -1,9 +1,10 @@
package eval
import (
"strconv"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"strconv"
)
func Param(input core.Value) string {

View File

@ -5,11 +5,11 @@ import (
"reflect"
"sync"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/runtime/core"
)
type (

View File

@ -2,11 +2,11 @@ package events
import (
"context"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
"github.com/pkg/errors"
"github.com/mafredri/cdp"
)
func WaitForLoadEvent(ctx context.Context, client *cdp.Client) error {

View File

@ -64,7 +64,7 @@ func NewEvalWaitTask(
) *WaitTask {
return NewWaitTask(
func(ctx context.Context) (core.Value, error) {
return ec.EvalWithReturn(
return ec.EvalWithValue(
ctx,
predicate,
)

View File

@ -71,22 +71,9 @@ func loadInnerHTML(ctx context.Context, client *cdp.Client, exec *eval.Execution
if nodeType != html.DocumentNode {
var objID runtime.RemoteObjectID
switch {
case id.objectID != "":
if id.objectID != "" {
objID = id.objectID
case id.backendID > 0:
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetBackendNodeID(id.backendID))
if err != nil {
return "", err
}
if repl.Object.ObjectID == nil {
return "", errors.New("unable to resolve node")
}
objID = *repl.Object.ObjectID
default:
} else {
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(id.nodeID))
if err != nil {
@ -109,7 +96,7 @@ func loadInnerHTML(ctx context.Context, client *cdp.Client, exec *eval.Execution
return values.NewString(res.String()), nil
}
repl, err := exec.EvalWithReturn(ctx, "return document.documentElement.innerHTML")
repl, err := exec.EvalWithValue(ctx, "return document.documentElement.innerHTML")
if err != nil {
return "", err
@ -135,22 +122,9 @@ func loadInnerText(ctx context.Context, client *cdp.Client, exec *eval.Execution
if nodeType != html.DocumentNode {
var objID runtime.RemoteObjectID
switch {
case id.objectID != "":
if id.objectID != "" {
objID = id.objectID
case id.backendID > 0:
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetBackendNodeID(id.backendID))
if err != nil {
return "", err
}
if repl.Object.ObjectID == nil {
return "", errors.New("unable to resolve node")
}
objID = *repl.Object.ObjectID
default:
} else {
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(id.nodeID))
if err != nil {
@ -173,7 +147,7 @@ func loadInnerText(ctx context.Context, client *cdp.Client, exec *eval.Execution
return values.NewString(res.String()), err
}
repl, err := exec.EvalWithReturn(ctx, "return document.documentElement.innerText")
repl, err := exec.EvalWithValue(ctx, "return document.documentElement.innerText")
if err != nil {
return "", err
@ -212,8 +186,7 @@ func createChildrenArray(nodes []dom.Node) []HTMLElementIdentity {
for idx, child := range nodes {
child := child
children[idx] = HTMLElementIdentity{
nodeID: child.NodeID,
backendID: child.BackendNodeID,
nodeID: child.NodeID,
}
}

View File

@ -5,14 +5,14 @@ import (
"fmt"
"time"
"github.com/gofrs/uuid"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/MontFerret/ferret/pkg/drivers/cdp/eval"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/runtime/values/types"
"github.com/gofrs/uuid"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
)
type Manager struct {
@ -254,7 +254,7 @@ func (m *Manager) SelectByNodeID(ctx context.Context, nodeID dom.NodeID, value *
return nil, err
}
res, err := m.exec.EvalWithReturn(
res, err := m.exec.EvalWithValue(
ctx,
fmt.Sprintf(`
var el = document.querySelector('[%s="%s"]');

View File

@ -2,9 +2,10 @@ package input
import (
"context"
"time"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/input"
"time"
)
type Mouse struct {

View File

@ -2,11 +2,11 @@ package input
import (
"context"
"github.com/mafredri/cdp/protocol/runtime"
"math"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/runtime"
"github.com/pkg/errors"
)

View File

@ -2,7 +2,9 @@ package templates
import (
"fmt"
"github.com/MontFerret/ferret/pkg/drivers/cdp/eval"
"github.com/MontFerret/ferret/pkg/runtime/values"
)

View File

@ -2,11 +2,11 @@ package templates
import (
"fmt"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/drivers/cdp/eval"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/drivers"
)
func WaitBySelectorAll(selector values.String, when drivers.WaitEvent, value core.Value, check string) string {

View File

@ -0,0 +1,66 @@
package templates
const xPathTemplate = `
(element, expression) => {
const out = document.evaluate(
expression,
element,
null,
XPathResult.ANY_TYPE
);
let result;
switch (out.resultType) {
case XPathResult.UNORDERED_NODE_ITERATOR_TYPE:
case XPathResult.ORDERED_NODE_ITERATOR_TYPE: {
result = [];
let item;
while ((item = out.iterateNext())) {
result.push(item);
}
break;
}
case XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE:
case XPathResult.ORDERED_NODE_SNAPSHOT_TYPE: {
result = [];
for (let i = 0; i < out.snapshotLength; i++) {
const item = out.snapshotItem(i);
if (item != null) {
result.push(item);
}
}
break;
}
case XPathResult.NUMBER_TYPE: {
result = out.numberValue;
break;
}
case XPathResult.STRING_TYPE: {
result = out.stringValue;
break;
}
case XPathResult.BOOLEAN_TYPE: {
result = out.booleanValue;
break;
}
case XPathResult.ANY_UNORDERED_NODE_TYPE:
case XPathResult.FIRST_ORDERED_NODE_TYPE: {
result = out.singleNodeValue;
break;
}
default: {
break;
}
}
return result;
}
`
func XPath() string {
return xPathTemplate
}

View File

@ -2,6 +2,7 @@ package common
import (
"context"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"

View File

@ -3,12 +3,13 @@ package common
import (
"bytes"
"context"
"github.com/MontFerret/ferret/pkg/runtime/core"
"strconv"
"strings"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/gorilla/css/scanner"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
)
func DeserializeStyles(input values.String) (*values.Object, error) {

View File

@ -2,6 +2,7 @@ package drivers
import (
"context"
"github.com/MontFerret/ferret/pkg/runtime/core"
)

View File

@ -173,6 +173,10 @@ func (doc *HTMLDocument) ExistsBySelector(ctx context.Context, selector values.S
return doc.element.ExistsBySelector(ctx, selector)
}
func (doc *HTMLDocument) XPath(ctx context.Context, expression values.String) (core.Value, error) {
return doc.element.XPath(ctx, expression)
}
func (doc *HTMLDocument) IsDetached() values.Boolean {
return values.False
}

View File

@ -3,6 +3,8 @@ package http
import (
"context"
"encoding/json"
"fmt"
"github.com/antchfx/htmlquery"
"hash/fnv"
"strings"
@ -10,7 +12,9 @@ import (
"github.com/MontFerret/ferret/pkg/drivers/common"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/PuerkitoBio/goquery"
"github.com/antchfx/xpath"
)
type HTMLElement struct {
@ -308,6 +312,53 @@ func (el *HTMLElement) QuerySelectorAll(_ context.Context, selector values.Strin
return arr
}
func (el *HTMLElement) XPath(_ context.Context, expression values.String) (core.Value, error) {
h, err := outerHTML(el.selection)
if err != nil {
return values.None, err
}
exp, err := xpath.Compile(expression.String())
if err != nil {
return values.None, err
}
rootNode, err := htmlquery.Parse(strings.NewReader(h))
if err != nil {
return values.None, err
}
fmt.Println(htmlquery.OutputHTML(rootNode, true))
out := exp.Evaluate(htmlquery.CreateXPathNavigator(rootNode))
switch res := out.(type) {
case *xpath.NodeIterator:
items := values.NewArray(10)
for {
if !res.MoveNext() {
break
}
item, err := parseXPathNode(res.Current().(*htmlquery.NodeNavigator))
if err != nil {
return values.None, err
}
items.Push(item)
}
return items, nil
default:
return values.Parse(res), nil
}
}
func (el *HTMLElement) InnerHTMLBySelector(_ context.Context, selector values.String) values.String {
selection := el.selection.Find(selector.String())

View File

@ -0,0 +1,47 @@
package http
import (
"bytes"
"golang.org/x/net/html"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/PuerkitoBio/goquery"
"github.com/antchfx/htmlquery"
"github.com/antchfx/xpath"
)
func parseXPathNode(nav *htmlquery.NodeNavigator) (core.Value, error) {
node := nav.Current()
if node == nil {
return values.None, nil
}
switch nav.NodeType() {
case xpath.ElementNode:
return NewHTMLElement(&goquery.Selection{Nodes: []*html.Node{node}})
case xpath.RootNode:
url := htmlquery.SelectAttr(node, "url")
return NewHTMLDocument(goquery.NewDocumentFromNode(node), url, nil)
default:
return values.Parse(node.Data), nil
}
}
func outerHTML(s *goquery.Selection) (string, error) {
var buf bytes.Buffer
if len(s.Nodes) > 0 {
c := s.Nodes[0]
err := html.Render(&buf, c)
if err != nil {
return "", err
}
}
return buf.String(), nil
}

View File

@ -41,6 +41,8 @@ type (
CountBySelector(ctx context.Context, selector values.String) values.Int
ExistsBySelector(ctx context.Context, selector values.String) values.Boolean
XPath(ctx context.Context, expression values.String) (core.Value, error)
}
// HTMLElement is the most general base interface which most objects in a GetMainFrame implement.

View File

@ -297,6 +297,15 @@ func ToFloat(input core.Value) Float {
}
}
func ToString(input core.Value) String {
switch val := input.(type) {
case String:
return val
default:
return NewString(val.String())
}
}
func ToInt(input core.Value) Int {
switch val := input.(type) {
case Int:

View File

@ -64,6 +64,7 @@ func NewLib() map[string]core.Function {
"WAIT_STYLE_ALL": WaitStyleAll,
"WAIT_NO_STYLE_ALL": WaitNoStyleAll,
"WAIT_NAVIGATION": WaitNavigation,
"XPATH": XPath,
}
}

31
pkg/stdlib/html/xpath.go Normal file
View File

@ -0,0 +1,31 @@
package html
import (
"context"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
)
// XPath evaluates the XPath expression.
// @param source (HTMLPage | HTMLDocument | HTMLElement) - Target HTML object.
// @param expression (String) - XPath expression.
// @returns (Value) - Returns result of a given XPath expression.
func XPath(ctx context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 2, 2)
if err != nil {
return values.None, err
}
element, err := drivers.ToElement(args[0])
if err != nil {
return values.None, err
}
expr := values.ToString(args[1])
return element.XPath(ctx, expr)
}