1
0
mirror of https://github.com/MontFerret/ferret.git synced 2024-12-16 11:37:36 +02:00
ferret/pkg/html/dynamic/helpers.go

408 lines
7.9 KiB
Go
Raw Normal View History

2018-09-27 04:03:06 +02:00
package dynamic
import (
"bytes"
2018-09-27 04:03:06 +02:00
"context"
2018-11-12 21:53:36 +02:00
"errors"
"github.com/MontFerret/ferret/pkg/html/common"
2018-11-12 21:53:36 +02:00
"github.com/MontFerret/ferret/pkg/html/dynamic/eval"
"github.com/MontFerret/ferret/pkg/html/dynamic/events"
2018-09-27 04:03:06 +02:00
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/PuerkitoBio/goquery"
2018-09-27 04:03:06 +02:00
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
2018-11-12 21:53:36 +02:00
"github.com/mafredri/cdp/protocol/runtime"
2018-09-27 04:03:06 +02:00
"golang.org/x/sync/errgroup"
2018-11-15 21:33:53 +02:00
"math"
"strings"
2018-09-27 04:03:06 +02:00
)
2018-11-15 21:33:53 +02:00
type (
batchFunc = func() error
Quad struct {
X float64
Y float64
}
)
2018-09-27 04:03:06 +02:00
func runBatch(funcs ...batchFunc) error {
eg := errgroup.Group{}
for _, f := range funcs {
eg.Go(f)
}
return eg.Wait()
}
func getRootElement(ctx context.Context, client *cdp.Client) (*dom.GetDocumentReply, error) {
d, err := client.DOM.GetDocument(ctx, dom.NewGetDocumentArgs().SetDepth(1))
if err != nil {
return nil, err
}
return d, nil
}
2018-11-15 21:33:53 +02:00
func fromProtocolQuad(quad dom.Quad) []Quad {
return []Quad{
{
X: quad[0],
Y: quad[1],
},
{
X: quad[2],
Y: quad[3],
},
{
X: quad[4],
Y: quad[5],
},
{
X: quad[6],
Y: quad[7],
},
}
}
func computeQuadArea(quads []Quad) float64 {
var area float64
for i := range quads {
p1 := quads[i]
p2 := quads[(i+1)%len(quads)]
area += (p1.X*p2.Y - p2.X*p1.Y) / 2
}
return math.Abs(area)
}
func getClickablePoint(ctx context.Context, client *cdp.Client, id *HTMLElementIdentity) (Quad, error) {
qargs := dom.NewGetContentQuadsArgs()
if id.objectID != "" {
qargs.SetObjectID(id.objectID)
} else if id.backendID != 0 {
qargs.SetBackendNodeID(id.backendID)
} else {
qargs.SetNodeID(id.nodeID)
}
res, err := client.DOM.GetContentQuads(ctx, qargs)
if err != nil {
return Quad{}, err
}
if res.Quads == nil || len(res.Quads) == 0 {
return Quad{}, errors.New("node is either not visible or not an HTMLElement")
}
quads := make([][]Quad, 0, len(res.Quads))
for _, q := range res.Quads {
quad := fromProtocolQuad(q)
if computeQuadArea(quad) > 1 {
quads = append(quads, quad)
}
}
if len(quads) == 0 {
return Quad{}, errors.New("node is either not visible or not an HTMLElement")
}
// Return the middle point of the first quad.
quad := quads[0]
var x float64
var y float64
for _, q := range quad {
x += q.X
y += q.Y
}
return Quad{
X: x / 4,
Y: y / 4,
}, nil
}
2018-09-27 04:03:06 +02:00
func parseAttrs(attrs []string) *values.Object {
var attr values.String
res := values.NewObject()
for _, el := range attrs {
el = strings.TrimSpace(el)
2018-09-27 04:03:06 +02:00
str := values.NewString(el)
if common.IsAttribute(el) {
attr = str
res.Set(str, values.EmptyString)
} else {
current, ok := res.Get(attr)
if ok {
if current.String() != "" {
res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str))
} else {
res.Set(attr, str)
}
2018-09-27 04:03:06 +02:00
}
}
}
return res
}
func loadInnerHTML(ctx context.Context, client *cdp.Client, id *HTMLElementIdentity) (values.String, error) {
2018-11-12 21:53:36 +02:00
var objID runtime.RemoteObjectID
2018-09-27 06:26:56 +02:00
if id.objectID != "" {
2018-11-12 21:53:36 +02:00
objID = id.objectID
} else if id.backendID > 0 {
2018-11-12 21:53:36 +02:00
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetBackendNodeID(id.backendID))
if err != nil {
return "", err
}
if repl.Object.ObjectID == nil {
return "", errors.New("unable to resolve node")
}
objID = *repl.Object.ObjectID
} else {
2018-11-12 21:53:36 +02:00
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(id.nodeID))
if err != nil {
return "", err
}
if repl.Object.ObjectID == nil {
return "", errors.New("unable to resolve node")
}
objID = *repl.Object.ObjectID
2018-09-27 06:26:56 +02:00
}
2018-11-22 05:45:00 +02:00
// not a document
if id.nodeID != 1 {
res, err := eval.Property(ctx, client, objID, "innerHTML")
if err != nil {
return "", err
}
return values.NewString(res.String()), err
}
repl, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetObjectID(objID))
if err != nil {
return "", err
}
return values.NewString(repl.OuterHTML), nil
}
func loadInnerText(ctx context.Context, client *cdp.Client, id *HTMLElementIdentity) (values.String, error) {
var objID runtime.RemoteObjectID
if id.objectID != "" {
objID = id.objectID
} else if id.backendID > 0 {
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetBackendNodeID(id.backendID))
if err != nil {
return "", err
}
if repl.Object.ObjectID == nil {
return "", errors.New("unable to resolve node")
}
objID = *repl.Object.ObjectID
} else {
repl, err := client.DOM.ResolveNode(ctx, dom.NewResolveNodeArgs().SetNodeID(id.nodeID))
if err != nil {
return "", err
}
if repl.Object.ObjectID == nil {
return "", errors.New("unable to resolve node")
}
objID = *repl.Object.ObjectID
}
// not a document
if id.nodeID != 1 {
res, err := eval.Property(ctx, client, objID, "innerText")
if err != nil {
return "", err
}
return values.NewString(res.String()), err
}
repl, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetObjectID(objID))
if err != nil {
return "", err
}
2018-11-22 05:45:00 +02:00
return parseInnerText(repl.OuterHTML)
}
func parseInnerText(innerHTML string) (values.String, error) {
buff := bytes.NewBuffer([]byte(innerHTML))
parsed, err := goquery.NewDocumentFromReader(buff)
if err != nil {
return values.EmptyString, err
}
return values.NewString(parsed.Text()), nil
}
func createChildrenArray(nodes []dom.Node) []*HTMLElementIdentity {
children := make([]*HTMLElementIdentity, len(nodes))
2018-09-27 06:26:56 +02:00
for idx, child := range nodes {
children[idx] = &HTMLElementIdentity{
nodeID: child.NodeID,
backendID: child.BackendNodeID,
2018-09-27 04:03:06 +02:00
}
}
return children
2018-09-27 04:03:06 +02:00
}
func contextWithTimeout() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), DefaultTimeout)
}
func waitForLoadEvent(ctx context.Context, client *cdp.Client) error {
loadEventFired, err := client.Page.LoadEventFired(ctx)
if err != nil {
return err
}
_, err = loadEventFired.Recv()
if err != nil {
return err
}
return loadEventFired.Close()
}
func createEventBroker(client *cdp.Client) (*events.EventBroker, error) {
var err error
var onLoad page.LoadEventFiredClient
var onReload dom.DocumentUpdatedClient
var onAttrModified dom.AttributeModifiedClient
var onAttrRemoved dom.AttributeRemovedClient
var onChildCountUpdated dom.ChildNodeCountUpdatedClient
var onChildNodeInserted dom.ChildNodeInsertedClient
var onChildNodeRemoved dom.ChildNodeRemovedClient
2018-09-27 04:03:06 +02:00
ctx := context.Background()
onLoad, err = client.Page.LoadEventFired(ctx)
2018-09-27 04:03:06 +02:00
if err != nil {
return nil, err
}
onReload, err = client.DOM.DocumentUpdated(ctx)
2018-09-27 04:03:06 +02:00
if err != nil {
onLoad.Close()
2018-09-27 04:03:06 +02:00
return nil, err
}
onAttrModified, err = client.DOM.AttributeModified(ctx)
2018-09-27 04:03:06 +02:00
if err != nil {
onLoad.Close()
onReload.Close()
2018-09-27 04:03:06 +02:00
return nil, err
}
onAttrRemoved, err = client.DOM.AttributeRemoved(ctx)
2018-09-27 04:03:06 +02:00
if err != nil {
onLoad.Close()
onReload.Close()
onAttrModified.Close()
2018-09-27 04:03:06 +02:00
return nil, err
}
onChildCountUpdated, err = client.DOM.ChildNodeCountUpdated(ctx)
2018-09-27 04:03:06 +02:00
if err != nil {
onLoad.Close()
onReload.Close()
onAttrModified.Close()
onAttrRemoved.Close()
2018-09-27 04:03:06 +02:00
return nil, err
}
onChildNodeInserted, err = client.DOM.ChildNodeInserted(ctx)
2018-09-27 04:03:06 +02:00
if err != nil {
onLoad.Close()
onReload.Close()
onAttrModified.Close()
onAttrRemoved.Close()
onChildCountUpdated.Close()
2018-09-27 04:03:06 +02:00
return nil, err
}
onChildNodeRemoved, err = client.DOM.ChildNodeRemoved(ctx)
2018-09-27 04:03:06 +02:00
if err != nil {
onLoad.Close()
onReload.Close()
onAttrModified.Close()
onAttrRemoved.Close()
onChildCountUpdated.Close()
onChildNodeInserted.Close()
2018-09-27 04:03:06 +02:00
return nil, err
}
broker := events.NewEventBroker(
onLoad,
onReload,
onAttrModified,
onAttrRemoved,
onChildCountUpdated,
onChildNodeInserted,
onChildNodeRemoved,
)
2018-09-27 04:03:06 +02:00
err = broker.Start()
2018-09-27 04:03:06 +02:00
if err != nil {
onLoad.Close()
onReload.Close()
onAttrModified.Close()
onAttrRemoved.Close()
onChildCountUpdated.Close()
onChildNodeInserted.Close()
onChildNodeRemoved.Close()
2018-09-27 04:03:06 +02:00
return nil, err
}
return broker, nil
}