1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-08-13 19:52:52 +02:00

Bug/#80 element not found (#99)

* SOme work

* Some refactoring

* Work on stabalizing queries

* Removed unit test for debugging

* Fixed linter errors

* Added logging when NodeID is 0

* Added --time param to CLI
This commit is contained in:
Tim Voronov
2018-10-11 12:39:03 -04:00
committed by GitHub
parent ad21fa6482
commit 570c1b4548
13 changed files with 531 additions and 330 deletions

View File

@@ -50,6 +50,13 @@ func Exec(query string, opts Options) {
}
}()
var timer *Timer
if opts.ShowTime {
timer = NewTimer()
timer.Start()
}
out, err := prog.Run(
ctx,
runtime.WithBrowser(opts.Cdp),
@@ -60,6 +67,10 @@ func Exec(query string, opts Options) {
runtime.WithUserAgent(opts.UserAgent),
)
if opts.ShowTime {
timer.Stop()
}
if err != nil {
fmt.Println("Failed to execute the query")
fmt.Println(err)
@@ -68,4 +79,8 @@ func Exec(query string, opts Options) {
}
fmt.Println(string(out))
if opts.ShowTime {
fmt.Println(timer.Print())
}
}

View File

@@ -5,4 +5,5 @@ type Options struct {
Params map[string]interface{}
Proxy string
UserAgent string
ShowTime bool
}

View File

@@ -34,7 +34,11 @@ func Repl(version string, opts Options) {
var commands []string
var multiline bool
timer := NewTimer()
var timer *Timer
if opts.ShowTime {
timer = NewTimer()
}
l := NewLogger()
@@ -90,7 +94,9 @@ func Repl(version string, opts Options) {
continue
}
timer.Start()
if opts.ShowTime {
timer.Start()
}
out, err := program.Run(
ctx,
@@ -102,9 +108,6 @@ func Repl(version string, opts Options) {
runtime.WithUserAgent(opts.UserAgent),
)
timer.Stop()
fmt.Println(timer.Print())
if err != nil {
fmt.Println("Failed to execute the query")
fmt.Println(err)
@@ -112,5 +115,10 @@ func Repl(version string, opts Options) {
}
fmt.Println(string(out))
if opts.ShowTime {
timer.Stop()
fmt.Println(timer.Print())
}
}
}

View File

@@ -0,0 +1,8 @@
LET doc = DOCUMENT('https://soundcloud.com/charts/top', true)
WAIT_ELEMENT(doc, '.chartTrack__details', 5000)
LET tracks = ELEMENTS(doc, '.chartTrack')
FOR track IN tracks
RETURN INNER_TEXT_ALL(track, '.chartTrack__details')

View File

@@ -4,7 +4,7 @@ INPUT(google, 'input[name="q"]', "ferret", 25)
CLICK(google, 'input[name="btnK"]')
WAIT_NAVIGATION(google)
WAIT_ELEMENT(google, '.g')
WAIT_ELEMENT(google, '.g', 5000)
FOR result IN ELEMENTS(google, '.g')
// filter out extra elements like videos and 'People also ask'

View File

@@ -19,15 +19,14 @@ LET result = (
LET items = (
FOR el IN ELEMENTS(amazon, resultItemSelector)
LET priceTxtMain = INNER_TEXT(el, priceSelector)
LET priceTxt = priceTxtMain != "" ? priceTxtMain : INNER_TEXT(el, altPriceSelector)
LET priceTxtMain = INNER_TEXT(el, priceSelector)
LET priceTxt = priceTxtMain != "" ? priceTxtMain : INNER_TEXT(el, altPriceSelector)
RETURN {
title: INNER_TEXT(el, 'h2'),
vendor: INNER_TEXT(el, vendorSelector),
price: TO_FLOAT(SUBSTITUTE(priceTxt, "$", ""))
}
RETURN {
title: INNER_TEXT(el, 'h2'),
vendor: INNER_TEXT(el, vendorSelector),
price: TO_FLOAT(SUBSTITUTE(priceTxt, "$", ""))
}
)
RETURN items

View File

@@ -77,6 +77,12 @@ var (
"set custom user agent. '*' triggers UA generation",
)
showTime = flag.Bool(
"time",
false,
"show how much time was taken to execute a query",
)
version = flag.Bool(
"version",
false,
@@ -151,6 +157,7 @@ func main() {
Params: p,
Proxy: *proxyAddress,
UserAgent: *userAgent,
ShowTime: *showTime,
}
stat, _ := os.Stdin.Stat()

View File

@@ -2104,12 +2104,22 @@ func TestParam(t *testing.T) {
// c := compiler.New()
//
// out, err := c.MustCompile(`
//LET doc = DOCUMENT("http://getbootstrap.com/docs/4.1/components/collapse/", true)
//LET google = DOCUMENT("https://www.google.com/", true)
//
//CLICK(doc, "#headingTwo > h5 > button")
//WAIT_CLASS(doc, "#collapseTwo", "bar")
//INPUT(google, 'input[name="q"]', "ferret", 25)
//CLICK(google, 'input[name="btnK"]')
//
//RETURN TRUE
//WAIT_NAVIGATION(google)
//WAIT_ELEMENT(google, '.g', 5000)
//
//FOR result IN ELEMENTS(google, '.g')
// // filter out extra elements like videos and 'People also ask'
// FILTER TRIM(result.attributes.class) == 'g'
// RETURN {
// title: INNER_TEXT(result, 'h3'),
// description: INNER_TEXT(result, '.st'),
// url: INNER_TEXT(result, 'cite')
// }
// `).Run(context.Background())
//
// So(err, ShouldBeNil)

View File

@@ -13,7 +13,6 @@ import (
"github.com/MontFerret/ferret/pkg/runtime/logging"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/input"
"github.com/mafredri/cdp/protocol/page"
"github.com/mafredri/cdp/rpcc"
@@ -80,25 +79,40 @@ func LoadHTMLDocument(
}
}
root, innerHTML, err := getRootElement(client)
node, err := getRootElement(ctx, client)
if err != nil {
return nil, err
return nil, errors.Wrap(err, "failed to get root element")
}
broker, err := createEventBroker(client)
if err != nil {
return nil, err
return nil, errors.Wrap(err, "failed to create event events")
}
logger := logging.FromContext(ctx)
rootElement, err := LoadElement(
ctx,
logger,
client,
broker,
node.Root.NodeID,
node.Root.BackendNodeID,
)
if err != nil {
return nil, errors.Wrap(err, "failed to load root element")
}
return NewHTMLDocument(
logging.FromContext(ctx),
logger,
conn,
client,
broker,
root,
innerHTML,
values.NewString(url),
rootElement,
), nil
}
@@ -107,20 +121,16 @@ func NewHTMLDocument(
conn *rpcc.Conn,
client *cdp.Client,
broker *events.EventBroker,
root dom.Node,
innerHTML values.String,
url values.String,
rootElement *HTMLElement,
) *HTMLDocument {
doc := new(HTMLDocument)
doc.logger = logger
doc.conn = conn
doc.client = client
doc.events = broker
doc.element = NewHTMLElement(doc.logger, client, broker, root.NodeID, root, innerHTML)
doc.url = ""
if root.BaseURL != nil {
doc.url = values.NewString(*root.BaseURL)
}
doc.url = url
doc.element = rootElement
broker.AddEventListener("load", doc.handlePageLoad)
broker.AddEventListener("error", doc.handleError)
@@ -201,7 +211,7 @@ func (doc *HTMLDocument) Close() error {
Timestamp().
Str("url", doc.url.String()).
Err(err).
Msg("failed to stop event broker")
Msg("failed to stop event events")
}
err = doc.events.Close()
@@ -211,7 +221,7 @@ func (doc *HTMLDocument) Close() error {
Timestamp().
Str("url", doc.url.String()).
Err(err).
Msg("failed to close event broker")
Msg("failed to close event events")
}
err = doc.element.Close()
@@ -645,7 +655,10 @@ func (doc *HTMLDocument) handlePageLoad(_ interface{}) {
doc.Lock()
defer doc.Unlock()
updated, innerHTML, err := getRootElement(doc.client)
ctx, cancel := contextWithTimeout()
defer cancel()
node, err := getRootElement(ctx, doc.client)
if err != nil {
doc.logger.Error().
@@ -656,22 +669,33 @@ func (doc *HTMLDocument) handlePageLoad(_ interface{}) {
return
}
updated, err := LoadElement(
ctx,
doc.logger,
doc.client,
doc.events,
node.Root.NodeID,
node.Root.BackendNodeID,
)
if err != nil {
doc.logger.Error().
Timestamp().
Err(err).
Msg("failed to load root node after page load")
return
}
// close the prev element
doc.element.Close()
// create a new root element wrapper
doc.element = NewHTMLElement(
doc.logger,
doc.client,
doc.events,
updated.NodeID,
updated,
innerHTML,
)
doc.element = updated
doc.url = ""
if updated.BaseURL != nil {
doc.url = values.NewString(*updated.BaseURL)
if node.Root.BaseURL != nil {
doc.url = values.NewString(*node.Root.BaseURL)
}
}

View File

@@ -3,6 +3,7 @@ package dynamic
import (
"context"
"encoding/json"
"fmt"
"hash/fnv"
"strconv"
"strings"
@@ -17,59 +18,107 @@ import (
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/input"
"github.com/mafredri/cdp/protocol/runtime"
"github.com/rs/zerolog"
)
const DefaultTimeout = time.Second * 30
type HTMLElement struct {
sync.Mutex
logger *zerolog.Logger
client *cdp.Client
broker *events.EventBroker
connected values.Boolean
id dom.NodeID
nodeType values.Int
nodeName values.String
innerHTML values.String
innerText *common.LazyValue
value core.Value
rawAttrs []string
attributes *common.LazyValue
children []dom.NodeID
loadedChildren *common.LazyValue
}
var emptyNodeID = dom.NodeID(0)
var emptyBackendID = dom.BackendNodeID(0)
var emptyObjectID = ""
type (
HTMLElementIdentity struct {
nodeID dom.NodeID
backendID dom.BackendNodeID
objectID runtime.RemoteObjectID
}
HTMLElement struct {
sync.Mutex
logger *zerolog.Logger
client *cdp.Client
events *events.EventBroker
connected values.Boolean
id *HTMLElementIdentity
nodeType values.Int
nodeName values.String
innerHTML values.String
innerText *common.LazyValue
value core.Value
rawAttrs []string
attributes *common.LazyValue
children []*HTMLElementIdentity
loadedChildren *common.LazyValue
}
)
func LoadElement(
ctx context.Context,
logger *zerolog.Logger,
client *cdp.Client,
broker *events.EventBroker,
id dom.NodeID,
nodeID dom.NodeID,
backendID dom.BackendNodeID,
) (*HTMLElement, error) {
if client == nil {
return nil, core.Error(core.ErrMissedArgument, "client")
}
ctx, cancelFn := context.WithTimeout(context.Background(), DefaultTimeout)
// getting a remote object that represents the current DOM Node
var args *dom.ResolveNodeArgs
defer cancelFn()
if backendID > 0 {
args = dom.NewResolveNodeArgs().SetBackendNodeID(backendID)
} else {
args = dom.NewResolveNodeArgs().SetNodeID(nodeID)
}
obj, err := client.DOM.ResolveNode(ctx, args)
if err != nil {
return nil, err
}
if obj.Object.ObjectID == nil {
return nil, core.Error(core.ErrNotFound, fmt.Sprintf("element %d", nodeID))
}
objectID := *obj.Object.ObjectID
node, err := client.DOM.DescribeNode(
ctx,
dom.
NewDescribeNodeArgs().
SetNodeID(id).
SetObjectID(objectID).
SetDepth(1),
)
if err != nil {
return nil, core.Error(err, strconv.Itoa(int(id)))
return nil, core.Error(err, strconv.Itoa(int(nodeID)))
}
innerHTML, err := loadInnerHTML(client, id)
id := new(HTMLElementIdentity)
id.nodeID = nodeID
id.objectID = objectID
if backendID > 0 {
id.backendID = backendID
} else {
id.backendID = node.Node.BackendNodeID
}
innerHTML, err := loadInnerHTML(ctx, client, id)
if err != nil {
return nil, core.Error(err, strconv.Itoa(int(id)))
return nil, core.Error(err, strconv.Itoa(int(nodeID)))
}
var val string
if node.Node.Value != nil {
val = *node.Node.Value
}
return NewHTMLElement(
@@ -77,8 +126,12 @@ func LoadElement(
client,
broker,
id,
node.Node,
node.Node.NodeType,
node.Node.NodeName,
node.Node.Attributes,
val,
innerHTML,
createChildrenArray(node.Node.Children),
), nil
}
@@ -86,30 +139,30 @@ func NewHTMLElement(
logger *zerolog.Logger,
client *cdp.Client,
broker *events.EventBroker,
id dom.NodeID,
node dom.Node,
id *HTMLElementIdentity,
nodeType int,
nodeName string,
attributes []string,
value string,
innerHTML values.String,
children []*HTMLElementIdentity,
) *HTMLElement {
el := new(HTMLElement)
el.logger = logger
el.client = client
el.broker = broker
el.events = broker
el.connected = values.True
el.id = id
el.nodeType = values.NewInt(node.NodeType)
el.nodeName = values.NewString(node.NodeName)
el.nodeType = values.NewInt(nodeType)
el.nodeName = values.NewString(nodeName)
el.innerHTML = innerHTML
el.innerText = common.NewLazyValue(el.loadInnerText)
el.rawAttrs = node.Attributes[:]
el.rawAttrs = attributes
el.attributes = common.NewLazyValue(el.loadAttrs)
el.value = values.EmptyString
el.loadedChildren = common.NewLazyValue(el.loadChildren)
if node.Value != nil {
el.value = values.NewString(*node.Value)
}
el.children = createChildrenArray(node.Children)
el.value = values.NewString(value)
el.children = children
broker.AddEventListener("reload", el.handlePageReload)
broker.AddEventListener("attr:modified", el.handleAttrModified)
@@ -131,12 +184,12 @@ func (el *HTMLElement) Close() error {
}
el.connected = false
el.broker.RemoveEventListener("reload", el.handlePageReload)
el.broker.RemoveEventListener("attr:modified", el.handleAttrModified)
el.broker.RemoveEventListener("attr:removed", el.handleAttrRemoved)
el.broker.RemoveEventListener("children:count", el.handleChildrenCountChanged)
el.broker.RemoveEventListener("children:inserted", el.handleChildInserted)
el.broker.RemoveEventListener("children:deleted", el.handleChildDeleted)
el.events.RemoveEventListener("reload", el.handlePageReload)
el.events.RemoveEventListener("attr:modified", el.handleAttrModified)
el.events.RemoveEventListener("attr:removed", el.handleAttrRemoved)
el.events.RemoveEventListener("children:count", el.handleChildrenCountChanged)
el.events.RemoveEventListener("children:inserted", el.handleChildInserted)
el.events.RemoveEventListener("children:deleted", el.handleChildDeleted)
return nil
}
@@ -164,8 +217,8 @@ func (el *HTMLElement) Compare(other core.Value) int {
case core.HTMLDocumentType:
other := other.(*HTMLElement)
id := int(el.id)
otherID := int(other.id)
id := int(el.id.backendID)
otherID := int(other.id.backendID)
if id == otherID {
return 0
@@ -210,14 +263,10 @@ func (el *HTMLElement) Value() core.Value {
ctx, cancel := contextWithTimeout()
defer cancel()
val, err := eval.Property(ctx, el.client, el.id, "value")
val, err := eval.Property(ctx, el.client, el.id.objectID, "value")
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to get node value")
el.logError(err).Msg("failed to get node value")
return el.value
}
@@ -295,29 +344,33 @@ func (el *HTMLElement) QuerySelector(selector values.String) core.Value {
return values.None
}
ctx := context.Background()
ctx, cancel := contextWithTimeout()
defer cancel()
selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String())
// TODO: Can we use RemoteObjectID or BackendID instead of NodeId?
selectorArgs := dom.NewQuerySelectorArgs(el.id.nodeID, selector.String())
found, err := el.client.DOM.QuerySelector(ctx, selectorArgs)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to retrieve a node by selector")
return values.None
}
res, err := LoadElement(el.logger, el.client, el.broker, found.NodeID)
if found.NodeID == emptyNodeID {
el.logError(err).
Str("selector", selector.String()).
Msg("failed to find a node by selector. returned 0 NodeID")
return values.None
}
res, err := LoadElement(ctx, el.logger, el.client, el.events, found.NodeID, emptyBackendID)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to load a child node by selector")
@@ -332,16 +385,15 @@ func (el *HTMLElement) QuerySelectorAll(selector values.String) core.Value {
return values.NewArray(0)
}
ctx := context.Background()
ctx, cancel := contextWithTimeout()
defer cancel()
selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String())
// TODO: Can we use RemoteObjectID or BackendID instead of NodeId?
selectorArgs := dom.NewQuerySelectorAllArgs(el.id.nodeID, selector.String())
res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to retrieve nodes by selector")
@@ -351,13 +403,18 @@ func (el *HTMLElement) QuerySelectorAll(selector values.String) core.Value {
arr := values.NewArray(len(res.NodeIDs))
for _, id := range res.NodeIDs {
childEl, err := LoadElement(el.logger, el.client, el.broker, id)
if id == emptyNodeID {
el.logError(err).
Str("selector", selector.String()).
Msg("failed to find a node by selector. returned 0 NodeID")
continue
}
childEl, err := LoadElement(ctx, el.logger, el.client, el.events, id, emptyBackendID)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to load nodes by selector")
@@ -424,49 +481,75 @@ func (el *HTMLElement) InnerTextBySelector(selector values.String) values.String
return values.EmptyString
}
ctx := context.Background()
ctx, cancel := contextWithTimeout()
defer cancel()
selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String())
found, err := el.client.DOM.QuerySelector(ctx, selectorArgs)
// TODO: Can we use RemoteObjectID or BackendID instead of NodeId?
found, err := el.client.DOM.QuerySelector(ctx, dom.NewQuerySelectorArgs(el.id.nodeID, selector.String()))
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to retrieve nodes by selector")
Msg("failed to retrieve a node by selector")
return values.EmptyString
}
text, err := loadInnerText(el.client, found.NodeID)
if found.NodeID == emptyNodeID {
el.logError(err).
Str("selector", selector.String()).
Msg("failed to find a node by selector. returned 0 NodeID")
return values.EmptyString
}
childNodeID := found.NodeID
obj, err := el.client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(childNodeID))
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Int("childNodeID", int(childNodeID)).
Str("selector", selector.String()).
Msg("failed to resolve remote object for child element")
return values.EmptyString
}
if obj.Object.ObjectID == nil {
el.logError(err).
Int("childNodeID", int(childNodeID)).
Str("selector", selector.String()).
Msg("failed to resolve remote object for child element")
return values.EmptyString
}
objID := *obj.Object.ObjectID
text, err := eval.Property(ctx, el.client, objID, "innerText")
if err != nil {
el.logError(err).
Str("childObjectID", string(objID)).
Str("selector", selector.String()).
Msg("failed to load inner text for found child element")
return values.EmptyString
}
return text
return values.NewString(text.String())
}
func (el *HTMLElement) InnerTextBySelectorAll(selector values.String) *values.Array {
ctx := context.Background()
ctx, cancel := contextWithTimeout()
defer cancel()
selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String())
res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs)
// TODO: Can we use RemoteObjectID or BackendID instead of NodeId?
res, err := el.client.DOM.QuerySelectorAll(ctx, dom.NewQuerySelectorAllArgs(el.id.nodeID, selector.String()))
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to retrieve nodes by selector")
@@ -475,19 +558,42 @@ func (el *HTMLElement) InnerTextBySelectorAll(selector values.String) *values.Ar
arr := values.NewArray(len(res.NodeIDs))
for _, id := range res.NodeIDs {
text, err := loadInnerText(el.client, id)
for idx, id := range res.NodeIDs {
if id == emptyNodeID {
el.logError(err).
Str("selector", selector.String()).
Msg("failed to find a node by selector. returned 0 NodeID")
continue
}
obj, err := el.client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(id))
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Int("index", idx).
Int("childNodeID", int(id)).
Str("selector", selector.String()).
Msg("failed to resolve remote object for child element")
continue
}
if obj.Object.ObjectID == nil {
continue
}
objID := *obj.Object.ObjectID
text, err := eval.Property(ctx, el.client, objID, "innerText")
if err != nil {
el.logError(err).
Str("childObjectID", string(objID)).
Str("selector", selector.String()).
Msg("failed to load inner text for found child element")
// return what we have
return arr
continue
}
arr.Push(text)
@@ -508,29 +614,26 @@ func (el *HTMLElement) InnerHTMLBySelector(selector values.String) values.String
return values.EmptyString
}
ctx := context.Background()
ctx, cancel := contextWithTimeout()
defer cancel()
selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String())
found, err := el.client.DOM.QuerySelector(ctx, selectorArgs)
// TODO: Can we use RemoteObjectID or BackendID instead of NodeId?
found, err := el.client.DOM.QuerySelector(ctx, dom.NewQuerySelectorArgs(el.id.nodeID, selector.String()))
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to retrieve nodes by selector")
return values.EmptyString
}
text, err := loadInnerHTML(el.client, found.NodeID)
text, err := loadInnerHTML(ctx, el.client, &HTMLElementIdentity{
nodeID: found.NodeID,
})
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to load inner HTML for found child element")
@@ -541,16 +644,15 @@ func (el *HTMLElement) InnerHTMLBySelector(selector values.String) values.String
}
func (el *HTMLElement) InnerHTMLBySelectorAll(selector values.String) *values.Array {
ctx := context.Background()
ctx, cancel := contextWithTimeout()
defer cancel()
selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String())
// TODO: Can we use RemoteObjectID or BackendID instead of NodeId?
selectorArgs := dom.NewQuerySelectorAllArgs(el.id.nodeID, selector.String())
res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to retrieve nodes by selector")
@@ -560,13 +662,12 @@ func (el *HTMLElement) InnerHTMLBySelectorAll(selector values.String) *values.Ar
arr := values.NewArray(len(res.NodeIDs))
for _, id := range res.NodeIDs {
text, err := loadInnerHTML(el.client, id)
text, err := loadInnerHTML(ctx, el.client, &HTMLElementIdentity{
nodeID: id,
})
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
el.logError(err).
Str("selector", selector.String()).
Msg("failed to load inner HTML for found child element")
@@ -585,14 +686,16 @@ func (el *HTMLElement) Click() (values.Boolean, error) {
defer cancel()
return events.DispatchEvent(ctx, el.client, el.id, "click")
return events.DispatchEvent(ctx, el.client, el.id.objectID, "click")
}
func (el *HTMLElement) Input(value core.Value, delay values.Int) error {
ctx, cancel := contextWithTimeout()
defer cancel()
if err := el.client.DOM.Focus(ctx, dom.NewFocusArgs().SetNodeID(el.id)); err != nil {
if err := el.client.DOM.Focus(ctx, dom.NewFocusArgs().SetObjectID(el.id.objectID)); err != nil {
el.logError(err).Msg("failed to focus")
return err
}
@@ -605,9 +708,13 @@ func (el *HTMLElement) Input(value core.Value, delay values.Int) error {
for _, ch := range valStr {
for _, ev := range []string{"keyDown", "keyUp"} {
ke := input.NewDispatchKeyEventArgs(ev).SetText(string(ch))
if err := el.client.Input.DispatchKeyEvent(ctx, ke); err != nil {
el.logError(err).Str("value", value.String()).Msg("failed to input a value")
return err
}
time.Sleep(delayMs * time.Millisecond)
}
}
@@ -623,25 +730,36 @@ func (el *HTMLElement) IsConnected() values.Boolean {
}
func (el *HTMLElement) loadInnerText() (core.Value, error) {
if el.IsConnected() {
ctx, cancel := contextWithTimeout()
defer cancel()
text, err := eval.Property(ctx, el.client, el.id.objectID, "innerText")
if err == nil {
return text, nil
}
el.logError(err).Msg("failed to read 'innerText' property of remote object")
// and just parse innerHTML
}
h := el.InnerHTML()
if h == values.EmptyString {
return h, nil
}
parser, err := parseInnerText(h.String())
parsed, err := parseInnerText(h.String())
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to parse inner html")
el.logError(err).Msg("failed to parse inner html")
return values.EmptyString, err
}
return parser, nil
return parsed, nil
}
func (el *HTMLElement) loadAttrs() (core.Value, error) {
@@ -653,16 +771,28 @@ func (el *HTMLElement) loadChildren() (core.Value, error) {
return values.NewArray(0), nil
}
loaded, err := loadNodes(el.logger, el.client, el.broker, el.children)
ctx, cancel := contextWithTimeout()
defer cancel()
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to load child nodes")
loaded := values.NewArray(len(el.children))
return values.None, err
for _, childID := range el.children {
child, err := LoadElement(
ctx,
el.logger,
el.client,
el.events,
childID.nodeID,
childID.backendID,
)
if err != nil {
el.logError(err).Msg("failed to load child nodes")
continue
}
loaded.Push(child)
}
return loaded, nil
@@ -681,17 +811,13 @@ func (el *HTMLElement) handleAttrModified(message interface{}) {
}
// it's not for this element
if reply.NodeID != el.id {
if reply.NodeID != el.id.nodeID {
return
}
el.attributes.Write(func(v core.Value, err error) {
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to update node")
el.logError(err).Msg("failed to update node")
return
}
@@ -715,7 +841,7 @@ func (el *HTMLElement) handleAttrRemoved(message interface{}) {
}
// it's not for this element
if reply.NodeID != el.id {
if reply.NodeID != el.id.nodeID {
return
}
@@ -727,11 +853,7 @@ func (el *HTMLElement) handleAttrRemoved(message interface{}) {
el.attributes.Write(func(v core.Value, err error) {
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to update node")
el.logError(err).Msg("failed to update node")
return
}
@@ -753,18 +875,20 @@ func (el *HTMLElement) handleChildrenCountChanged(message interface{}) {
return
}
if reply.NodeID != el.id {
if reply.NodeID != el.id.nodeID {
return
}
node, err := el.client.DOM.DescribeNode(context.Background(), dom.NewDescribeNodeArgs())
ctx, cancel := contextWithTimeout()
defer cancel()
node, err := el.client.DOM.DescribeNode(
ctx,
dom.NewDescribeNodeArgs().SetObjectID(el.id.objectID),
)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to update node")
el.logError(err).Msg("failed to update node")
return
}
@@ -782,7 +906,7 @@ func (el *HTMLElement) handleChildInserted(message interface{}) {
return
}
if reply.ParentNodeID != el.id {
if reply.ParentNodeID != el.id.nodeID {
return
}
@@ -794,7 +918,7 @@ func (el *HTMLElement) handleChildInserted(message interface{}) {
defer el.Unlock()
for idx, id := range el.children {
if id == prevID {
if id.nodeID == prevID {
targetIDx = idx
break
}
@@ -804,42 +928,43 @@ func (el *HTMLElement) handleChildInserted(message interface{}) {
return
}
nextIdentity := &HTMLElementIdentity{
nodeID: reply.Node.NodeID,
backendID: reply.Node.BackendNodeID,
}
arr := el.children
el.children = append(arr[:targetIDx], append([]dom.NodeID{nextID}, arr[targetIDx:]...)...)
el.children = append(arr[:targetIDx], append([]*HTMLElementIdentity{nextIdentity}, arr[targetIDx:]...)...)
if !el.loadedChildren.Ready() {
return
}
el.loadedChildren.Write(func(v core.Value, err error) {
ctx, cancel := contextWithTimeout()
defer cancel()
loadedArr := v.(*values.Array)
loadedEl, err := LoadElement(el.logger, el.client, el.broker, nextID)
loadedEl, err := LoadElement(ctx, el.logger, el.client, el.events, nextID, emptyBackendID)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to load an inserted node")
el.logError(err).Msg("failed to load an inserted node")
return
}
loadedArr.Insert(values.NewInt(targetIDx), loadedEl)
newInnerHTML, err := loadInnerHTML(el.client, el.id)
newInnerHTML, err := loadInnerHTML(ctx, el.client, el.id)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to update node")
el.logError(err).Msg("failed to update node")
return
}
el.innerHTML = newInnerHTML
el.innerText.Reset()
})
}
@@ -850,7 +975,7 @@ func (el *HTMLElement) handleChildDeleted(message interface{}) {
return
}
if reply.ParentNodeID != el.id {
if reply.ParentNodeID != el.id.nodeID {
return
}
@@ -861,7 +986,7 @@ func (el *HTMLElement) handleChildDeleted(message interface{}) {
defer el.Unlock()
for idx, id := range el.children {
if id == targetID {
if id.nodeID == targetID {
targetIDx = idx
break
}
@@ -883,27 +1008,41 @@ func (el *HTMLElement) handleChildDeleted(message interface{}) {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Int("nodeID", int(el.id.nodeID)).
Msg("failed to update node")
return
}
ctx, cancel := contextWithTimeout()
defer cancel()
loadedArr := v.(*values.Array)
loadedArr.RemoveAt(values.NewInt(targetIDx))
newInnerHTML, err := loadInnerHTML(el.client, el.id)
newInnerHTML, err := loadInnerHTML(ctx, el.client, el.id)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Int("nodeID", int(el.id.nodeID)).
Msg("failed to update node")
return
}
el.innerHTML = newInnerHTML
el.innerText.Reset()
})
}
func (el *HTMLElement) logError(err error) *zerolog.Event {
return el.logger.
Error().
Timestamp().
Int("nodeID", int(el.id.nodeID)).
Int("backendID", int(el.id.backendID)).
Str("objectID", string(el.id.objectID)).
Err(err)
}

View File

@@ -2,11 +2,11 @@ package eval
import (
"context"
"encoding/json"
"fmt"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/runtime"
)
@@ -49,29 +49,12 @@ func Eval(client *cdp.Client, exp string, ret bool, async bool) (core.Value, err
func Property(
ctx context.Context,
client *cdp.Client,
id dom.NodeID,
objectID runtime.RemoteObjectID,
propName string,
) (core.Value, error) {
// get a ref to remote object representing the node
obj, err := client.DOM.ResolveNode(
ctx,
dom.NewResolveNodeArgs().
SetNodeID(id),
)
if err != nil {
return values.None, err
}
if obj.Object.ObjectID == nil {
return values.None, core.Error(core.ErrNotFound, fmt.Sprintf("element %d", id))
}
defer client.Runtime.ReleaseObject(ctx, runtime.NewReleaseObjectArgs(*obj.Object.ObjectID))
res, err := client.Runtime.GetProperties(
ctx,
runtime.NewGetPropertiesArgs(*obj.Object.ObjectID),
runtime.NewGetPropertiesArgs(objectID),
)
if err != nil {
@@ -109,6 +92,64 @@ func Property(
return values.None, nil
}
func Method(
ctx context.Context,
client *cdp.Client,
objectID runtime.RemoteObjectID,
methodName string,
args []runtime.CallArgument,
) (*runtime.RemoteObject, error) {
found, err := client.Runtime.CallFunctionOn(
ctx,
runtime.NewCallFunctionOnArgs(methodName).
SetObjectID(objectID).
SetArguments(args),
)
if err != nil {
return nil, err
}
if found.ExceptionDetails != nil {
return nil, found.ExceptionDetails
}
if found.Result.ObjectID == nil {
return nil, nil
}
return &found.Result, nil
}
func MethodQuerySelector(
ctx context.Context,
client *cdp.Client,
objectID runtime.RemoteObjectID,
selector string,
) (runtime.RemoteObjectID, error) {
bytes, err := json.Marshal(selector)
if err != nil {
return "", err
}
obj, err := Method(ctx, client, objectID, "querySelector", []runtime.CallArgument{
{
Value: json.RawMessage(bytes),
},
})
if err != nil {
return "", err
}
if obj.ObjectID == nil {
return "", nil
}
return *obj.ObjectID, nil
}
func Unmarshal(obj *runtime.RemoteObject) (core.Value, error) {
if obj == nil {
return values.None, nil

View File

@@ -6,31 +6,15 @@ import (
"github.com/MontFerret/ferret/pkg/html/dynamic/eval"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/runtime"
)
func DispatchEvent(
ctx context.Context,
client *cdp.Client,
id dom.NodeID,
objectID runtime.RemoteObjectID,
eventName string,
) (values.Boolean, error) {
// get a ref to remote object representing the node
obj, err := client.DOM.ResolveNode(
ctx,
dom.NewResolveNodeArgs().
SetNodeID(id),
)
if err != nil {
return values.False, err
}
if obj.Object.ObjectID == nil {
return values.False, nil
}
evt, err := client.Runtime.Evaluate(ctx, runtime.NewEvaluateArgs(eval.PrepareEval(fmt.Sprintf(`
return new window.MouseEvent('%s', { bubbles: true })
`, eventName))))
@@ -52,24 +36,21 @@ func DispatchEvent(
// release the event object
defer client.Runtime.ReleaseObject(ctx, runtime.NewReleaseObjectArgs(*evtID))
res, err := client.Runtime.CallFunctionOn(
_, err = eval.Method(
ctx,
runtime.NewCallFunctionOnArgs("dispatchEvent").
SetObjectID(*obj.Object.ObjectID).
SetArguments([]runtime.CallArgument{
{
ObjectID: evt.Result.ObjectID,
},
}),
client,
objectID,
"dispatchEvent",
[]runtime.CallArgument{
{
ObjectID: evt.Result.ObjectID,
},
},
)
if err != nil {
return values.False, err
}
if res.ExceptionDetails != nil {
return values.False, res.ExceptionDetails
}
return values.True, nil
}

View File

@@ -10,15 +10,10 @@ import (
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
"github.com/rs/zerolog"
"golang.org/x/sync/errgroup"
"strings"
)
func pointerInt(input int) *int {
return &input
}
type batchFunc = func() error
func runBatch(funcs ...batchFunc) error {
@@ -31,24 +26,14 @@ func runBatch(funcs ...batchFunc) error {
return eg.Wait()
}
func getRootElement(client *cdp.Client) (dom.Node, values.String, error) {
args := dom.NewGetDocumentArgs()
args.Depth = pointerInt(1) // lets load the entire document
ctx := context.Background()
d, err := client.DOM.GetDocument(ctx, args)
func getRootElement(ctx context.Context, client *cdp.Client) (*dom.GetDocumentReply, error) {
d, err := client.DOM.GetDocument(ctx, dom.NewGetDocumentArgs().SetDepth(1))
if err != nil {
return dom.Node{}, values.EmptyString, err
return nil, err
}
innerHTML, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID))
if err != nil {
return dom.Node{}, values.EmptyString, err
}
return d.Root, values.NewString(innerHTML.OuterHTML), nil
return d, nil
}
func parseAttrs(attrs []string) *values.Object {
@@ -79,8 +64,18 @@ func parseAttrs(attrs []string) *values.Object {
return res
}
func loadInnerHTML(client *cdp.Client, id dom.NodeID) (values.String, error) {
res, err := client.DOM.GetOuterHTML(context.Background(), dom.NewGetOuterHTMLArgs().SetNodeID(id))
func loadInnerHTML(ctx context.Context, client *cdp.Client, id *HTMLElementIdentity) (values.String, error) {
var args *dom.GetOuterHTMLArgs
if id.objectID != "" {
args = dom.NewGetOuterHTMLArgs().SetObjectID(id.objectID)
} else if id.backendID > 0 {
args = dom.NewGetOuterHTMLArgs().SetBackendNodeID(id.backendID)
} else {
args = dom.NewGetOuterHTMLArgs().SetNodeID(id.nodeID)
}
res, err := client.DOM.GetOuterHTML(ctx, args)
if err != nil {
return "", err
@@ -89,20 +84,6 @@ func loadInnerHTML(client *cdp.Client, id dom.NodeID) (values.String, error) {
return values.NewString(res.OuterHTML), err
}
func loadInnerText(client *cdp.Client, id dom.NodeID) (values.String, error) {
h, err := loadInnerHTML(client, id)
if err != nil {
return values.EmptyString, err
}
if h == values.EmptyString {
return h, nil
}
return parseInnerText(h.String())
}
func parseInnerText(innerHTML string) (values.String, error) {
buff := bytes.NewBuffer([]byte(innerHTML))
@@ -115,32 +96,19 @@ func parseInnerText(innerHTML string) (values.String, error) {
return values.NewString(parsed.Text()), nil
}
func createChildrenArray(nodes []dom.Node) []dom.NodeID {
children := make([]dom.NodeID, len(nodes))
func createChildrenArray(nodes []dom.Node) []*HTMLElementIdentity {
children := make([]*HTMLElementIdentity, len(nodes))
for idx, child := range nodes {
children[idx] = child.NodeID
children[idx] = &HTMLElementIdentity{
nodeID: child.NodeID,
backendID: child.BackendNodeID,
}
}
return children
}
func loadNodes(logger *zerolog.Logger, client *cdp.Client, broker *events.EventBroker, nodes []dom.NodeID) (*values.Array, error) {
arr := values.NewArray(len(nodes))
for _, id := range nodes {
child, err := LoadElement(logger, client, broker, id)
if err != nil {
return nil, err
}
arr.Push(child)
}
return arr, nil
}
func contextWithTimeout() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), DefaultTimeout)
}