2018-09-27 04:03:06 +02:00
|
|
|
package dynamic
|
|
|
|
|
|
|
|
import (
|
2018-10-09 02:20:40 +02:00
|
|
|
"bytes"
|
2018-09-27 04:03:06 +02:00
|
|
|
"context"
|
2018-11-12 21:53:36 +02:00
|
|
|
"errors"
|
2018-10-08 03:32:30 +02:00
|
|
|
"github.com/MontFerret/ferret/pkg/html/common"
|
2018-11-12 21:53:36 +02:00
|
|
|
"github.com/MontFerret/ferret/pkg/html/dynamic/eval"
|
2018-10-08 03:32:30 +02:00
|
|
|
"github.com/MontFerret/ferret/pkg/html/dynamic/events"
|
2018-09-27 04:03:06 +02:00
|
|
|
"github.com/MontFerret/ferret/pkg/runtime/values"
|
2018-10-09 02:20:40 +02:00
|
|
|
"github.com/PuerkitoBio/goquery"
|
2018-09-27 04:03:06 +02:00
|
|
|
"github.com/mafredri/cdp"
|
|
|
|
"github.com/mafredri/cdp/protocol/dom"
|
|
|
|
"github.com/mafredri/cdp/protocol/page"
|
2018-11-12 21:53:36 +02:00
|
|
|
"github.com/mafredri/cdp/protocol/runtime"
|
2018-09-27 04:03:06 +02:00
|
|
|
"golang.org/x/sync/errgroup"
|
2018-11-15 21:33:53 +02:00
|
|
|
"math"
|
2018-10-07 04:33:39 +02:00
|
|
|
"strings"
|
2018-09-27 04:03:06 +02:00
|
|
|
)
|
|
|
|
|
2018-11-15 21:33:53 +02:00
|
|
|
type (
|
|
|
|
batchFunc = func() error
|
|
|
|
|
|
|
|
Quad struct {
|
|
|
|
X float64
|
|
|
|
Y float64
|
|
|
|
}
|
|
|
|
)
|
2018-09-27 04:03:06 +02:00
|
|
|
|
|
|
|
func runBatch(funcs ...batchFunc) error {
|
|
|
|
eg := errgroup.Group{}
|
|
|
|
|
|
|
|
for _, f := range funcs {
|
|
|
|
eg.Go(f)
|
|
|
|
}
|
|
|
|
|
|
|
|
return eg.Wait()
|
|
|
|
}
|
|
|
|
|
2018-10-11 18:39:03 +02:00
|
|
|
func getRootElement(ctx context.Context, client *cdp.Client) (*dom.GetDocumentReply, error) {
|
|
|
|
d, err := client.DOM.GetDocument(ctx, dom.NewGetDocumentArgs().SetDepth(1))
|
2018-10-08 03:23:36 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2018-10-11 18:39:03 +02:00
|
|
|
return nil, err
|
2018-10-08 03:23:36 +02:00
|
|
|
}
|
|
|
|
|
2018-10-11 18:39:03 +02:00
|
|
|
return d, nil
|
2018-10-08 03:23:36 +02:00
|
|
|
}
|
|
|
|
|
2018-11-15 21:33:53 +02:00
|
|
|
func fromProtocolQuad(quad dom.Quad) []Quad {
|
|
|
|
return []Quad{
|
|
|
|
{
|
|
|
|
X: quad[0],
|
|
|
|
Y: quad[1],
|
|
|
|
},
|
|
|
|
{
|
|
|
|
X: quad[2],
|
|
|
|
Y: quad[3],
|
|
|
|
},
|
|
|
|
{
|
|
|
|
X: quad[4],
|
|
|
|
Y: quad[5],
|
|
|
|
},
|
|
|
|
{
|
|
|
|
X: quad[6],
|
|
|
|
Y: quad[7],
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func computeQuadArea(quads []Quad) float64 {
|
|
|
|
var area float64
|
|
|
|
|
|
|
|
for i := range quads {
|
|
|
|
p1 := quads[i]
|
|
|
|
p2 := quads[(i+1)%len(quads)]
|
|
|
|
area += (p1.X*p2.Y - p2.X*p1.Y) / 2
|
|
|
|
}
|
|
|
|
|
|
|
|
return math.Abs(area)
|
|
|
|
}
|
|
|
|
|
|
|
|
func getClickablePoint(ctx context.Context, client *cdp.Client, id *HTMLElementIdentity) (Quad, error) {
|
|
|
|
qargs := dom.NewGetContentQuadsArgs()
|
|
|
|
|
|
|
|
if id.objectID != "" {
|
|
|
|
qargs.SetObjectID(id.objectID)
|
|
|
|
} else if id.backendID != 0 {
|
|
|
|
qargs.SetBackendNodeID(id.backendID)
|
|
|
|
} else {
|
|
|
|
qargs.SetNodeID(id.nodeID)
|
|
|
|
}
|
|
|
|
|
|
|
|
res, err := client.DOM.GetContentQuads(ctx, qargs)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return Quad{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if res.Quads == nil || len(res.Quads) == 0 {
|
|
|
|
return Quad{}, errors.New("node is either not visible or not an HTMLElement")
|
|
|
|
}
|
|
|
|
|
|
|
|
quads := make([][]Quad, 0, len(res.Quads))
|
|
|
|
|
|
|
|
for _, q := range res.Quads {
|
|
|
|
quad := fromProtocolQuad(q)
|
|
|
|
|
|
|
|
if computeQuadArea(quad) > 1 {
|
|
|
|
quads = append(quads, quad)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(quads) == 0 {
|
|
|
|
return Quad{}, errors.New("node is either not visible or not an HTMLElement")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the middle point of the first quad.
|
|
|
|
quad := quads[0]
|
|
|
|
var x float64
|
|
|
|
var y float64
|
|
|
|
|
|
|
|
for _, q := range quad {
|
|
|
|
x += q.X
|
|
|
|
y += q.Y
|
|
|
|
}
|
|
|
|
|
|
|
|
return Quad{
|
|
|
|
X: x / 4,
|
|
|
|
Y: y / 4,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2018-09-27 04:03:06 +02:00
|
|
|
func parseAttrs(attrs []string) *values.Object {
|
|
|
|
var attr values.String
|
|
|
|
|
|
|
|
res := values.NewObject()
|
|
|
|
|
|
|
|
for _, el := range attrs {
|
2018-10-07 04:33:39 +02:00
|
|
|
el = strings.TrimSpace(el)
|
2018-09-27 04:03:06 +02:00
|
|
|
str := values.NewString(el)
|
|
|
|
|
|
|
|
if common.IsAttribute(el) {
|
|
|
|
attr = str
|
|
|
|
res.Set(str, values.EmptyString)
|
|
|
|
} else {
|
|
|
|
current, ok := res.Get(attr)
|
|
|
|
|
|
|
|
if ok {
|
2018-10-07 04:33:39 +02:00
|
|
|
if current.String() != "" {
|
|
|
|
res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str))
|
|
|
|
} else {
|
|
|
|
res.Set(attr, str)
|
|
|
|
}
|
2018-09-27 04:03:06 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
2018-10-11 18:39:03 +02:00
|
|
|
func loadInnerHTML(ctx context.Context, client *cdp.Client, id *HTMLElementIdentity) (values.String, error) {
|
2018-11-12 21:53:36 +02:00
|
|
|
var objID runtime.RemoteObjectID
|
2018-09-27 06:26:56 +02:00
|
|
|
|
2018-10-11 18:39:03 +02:00
|
|
|
if id.objectID != "" {
|
2018-11-12 21:53:36 +02:00
|
|
|
objID = id.objectID
|
2018-10-11 18:39:03 +02:00
|
|
|
} else if id.backendID > 0 {
|
2018-11-12 21:53:36 +02:00
|
|
|
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetBackendNodeID(id.backendID))
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if repl.Object.ObjectID == nil {
|
|
|
|
return "", errors.New("unable to resolve node")
|
|
|
|
}
|
|
|
|
|
|
|
|
objID = *repl.Object.ObjectID
|
2018-10-11 18:39:03 +02:00
|
|
|
} else {
|
2018-11-12 21:53:36 +02:00
|
|
|
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(id.nodeID))
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if repl.Object.ObjectID == nil {
|
|
|
|
return "", errors.New("unable to resolve node")
|
|
|
|
}
|
|
|
|
|
|
|
|
objID = *repl.Object.ObjectID
|
2018-09-27 06:26:56 +02:00
|
|
|
}
|
|
|
|
|
2018-11-22 05:45:00 +02:00
|
|
|
// not a document
|
|
|
|
if id.nodeID != 1 {
|
|
|
|
res, err := eval.Property(ctx, client, objID, "innerHTML")
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
return values.NewString(res.String()), err
|
|
|
|
}
|
|
|
|
|
|
|
|
repl, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetObjectID(objID))
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
return values.NewString(repl.OuterHTML), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func loadInnerText(ctx context.Context, client *cdp.Client, id *HTMLElementIdentity) (values.String, error) {
|
|
|
|
var objID runtime.RemoteObjectID
|
|
|
|
|
|
|
|
if id.objectID != "" {
|
|
|
|
objID = id.objectID
|
|
|
|
} else if id.backendID > 0 {
|
|
|
|
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetBackendNodeID(id.backendID))
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if repl.Object.ObjectID == nil {
|
|
|
|
return "", errors.New("unable to resolve node")
|
|
|
|
}
|
|
|
|
|
|
|
|
objID = *repl.Object.ObjectID
|
|
|
|
} else {
|
|
|
|
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(id.nodeID))
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if repl.Object.ObjectID == nil {
|
|
|
|
return "", errors.New("unable to resolve node")
|
|
|
|
}
|
|
|
|
|
|
|
|
objID = *repl.Object.ObjectID
|
|
|
|
}
|
|
|
|
|
|
|
|
// not a document
|
|
|
|
if id.nodeID != 1 {
|
|
|
|
res, err := eval.Property(ctx, client, objID, "innerText")
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
return values.NewString(res.String()), err
|
|
|
|
}
|
|
|
|
|
|
|
|
repl, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetObjectID(objID))
|
2018-10-09 02:20:40 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2018-10-11 18:39:03 +02:00
|
|
|
return "", err
|
2018-10-09 02:20:40 +02:00
|
|
|
}
|
|
|
|
|
2018-11-22 05:45:00 +02:00
|
|
|
return parseInnerText(repl.OuterHTML)
|
2018-10-09 02:20:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func parseInnerText(innerHTML string) (values.String, error) {
|
|
|
|
buff := bytes.NewBuffer([]byte(innerHTML))
|
|
|
|
|
|
|
|
parsed, err := goquery.NewDocumentFromReader(buff)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return values.EmptyString, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return values.NewString(parsed.Text()), nil
|
|
|
|
}
|
|
|
|
|
2018-10-11 18:39:03 +02:00
|
|
|
func createChildrenArray(nodes []dom.Node) []*HTMLElementIdentity {
|
|
|
|
children := make([]*HTMLElementIdentity, len(nodes))
|
2018-09-27 06:26:56 +02:00
|
|
|
|
|
|
|
for idx, child := range nodes {
|
2018-10-11 18:39:03 +02:00
|
|
|
children[idx] = &HTMLElementIdentity{
|
|
|
|
nodeID: child.NodeID,
|
|
|
|
backendID: child.BackendNodeID,
|
2018-09-27 04:03:06 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-11 18:39:03 +02:00
|
|
|
return children
|
2018-09-27 04:03:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func contextWithTimeout() (context.Context, context.CancelFunc) {
|
|
|
|
return context.WithTimeout(context.Background(), DefaultTimeout)
|
|
|
|
}
|
|
|
|
|
|
|
|
func waitForLoadEvent(ctx context.Context, client *cdp.Client) error {
|
|
|
|
loadEventFired, err := client.Page.LoadEventFired(ctx)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = loadEventFired.Recv()
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return loadEventFired.Close()
|
|
|
|
}
|
|
|
|
|
|
|
|
func createEventBroker(client *cdp.Client) (*events.EventBroker, error) {
|
2018-10-15 23:17:15 +02:00
|
|
|
var err error
|
|
|
|
var onLoad page.LoadEventFiredClient
|
|
|
|
var onReload dom.DocumentUpdatedClient
|
|
|
|
var onAttrModified dom.AttributeModifiedClient
|
|
|
|
var onAttrRemoved dom.AttributeRemovedClient
|
|
|
|
var onChildCountUpdated dom.ChildNodeCountUpdatedClient
|
|
|
|
var onChildNodeInserted dom.ChildNodeInsertedClient
|
|
|
|
var onChildNodeRemoved dom.ChildNodeRemovedClient
|
2018-09-27 04:03:06 +02:00
|
|
|
ctx := context.Background()
|
2018-10-15 23:17:15 +02:00
|
|
|
|
|
|
|
onLoad, err = client.Page.LoadEventFired(ctx)
|
2018-09-27 04:03:06 +02:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-10-15 23:17:15 +02:00
|
|
|
onReload, err = client.DOM.DocumentUpdated(ctx)
|
2018-09-27 04:03:06 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2018-10-15 23:17:15 +02:00
|
|
|
onLoad.Close()
|
2018-09-27 04:03:06 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-10-15 23:17:15 +02:00
|
|
|
onAttrModified, err = client.DOM.AttributeModified(ctx)
|
2018-09-27 04:03:06 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2018-10-15 23:17:15 +02:00
|
|
|
onLoad.Close()
|
|
|
|
onReload.Close()
|
2018-09-27 04:03:06 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-10-15 23:17:15 +02:00
|
|
|
onAttrRemoved, err = client.DOM.AttributeRemoved(ctx)
|
2018-09-27 04:03:06 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2018-10-15 23:17:15 +02:00
|
|
|
onLoad.Close()
|
|
|
|
onReload.Close()
|
|
|
|
onAttrModified.Close()
|
2018-09-27 04:03:06 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-10-15 23:17:15 +02:00
|
|
|
onChildCountUpdated, err = client.DOM.ChildNodeCountUpdated(ctx)
|
2018-09-27 04:03:06 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2018-10-15 23:17:15 +02:00
|
|
|
onLoad.Close()
|
|
|
|
onReload.Close()
|
|
|
|
onAttrModified.Close()
|
|
|
|
onAttrRemoved.Close()
|
2018-09-27 04:03:06 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-10-15 23:17:15 +02:00
|
|
|
onChildNodeInserted, err = client.DOM.ChildNodeInserted(ctx)
|
2018-09-27 04:03:06 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2018-10-15 23:17:15 +02:00
|
|
|
onLoad.Close()
|
|
|
|
onReload.Close()
|
|
|
|
onAttrModified.Close()
|
|
|
|
onAttrRemoved.Close()
|
|
|
|
onChildCountUpdated.Close()
|
2018-09-27 04:03:06 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-10-15 23:17:15 +02:00
|
|
|
onChildNodeRemoved, err = client.DOM.ChildNodeRemoved(ctx)
|
2018-09-27 04:03:06 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2018-10-15 23:17:15 +02:00
|
|
|
onLoad.Close()
|
|
|
|
onReload.Close()
|
|
|
|
onAttrModified.Close()
|
|
|
|
onAttrRemoved.Close()
|
|
|
|
onChildCountUpdated.Close()
|
|
|
|
onChildNodeInserted.Close()
|
2018-09-27 04:03:06 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-10-15 23:17:15 +02:00
|
|
|
broker := events.NewEventBroker(
|
|
|
|
onLoad,
|
|
|
|
onReload,
|
|
|
|
onAttrModified,
|
|
|
|
onAttrRemoved,
|
|
|
|
onChildCountUpdated,
|
|
|
|
onChildNodeInserted,
|
|
|
|
onChildNodeRemoved,
|
|
|
|
)
|
2018-09-27 04:03:06 +02:00
|
|
|
|
2018-10-15 23:17:15 +02:00
|
|
|
err = broker.Start()
|
2018-09-27 04:03:06 +02:00
|
|
|
|
|
|
|
if err != nil {
|
2018-10-15 23:17:15 +02:00
|
|
|
onLoad.Close()
|
|
|
|
onReload.Close()
|
|
|
|
onAttrModified.Close()
|
|
|
|
onAttrRemoved.Close()
|
|
|
|
onChildCountUpdated.Close()
|
|
|
|
onChildNodeInserted.Close()
|
|
|
|
onChildNodeRemoved.Close()
|
2018-09-27 04:03:06 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return broker, nil
|
|
|
|
}
|