1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-01-04 03:02:02 +02:00
ferret/pkg/html/dynamic/helpers.go

258 lines
5.1 KiB
Go
Raw Normal View History

2018-09-27 04:03:06 +02:00
package dynamic
import (
"bytes"
2018-09-27 04:03:06 +02:00
"context"
"github.com/MontFerret/ferret/pkg/html/common"
"github.com/MontFerret/ferret/pkg/html/dynamic/events"
2018-09-27 04:03:06 +02:00
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/PuerkitoBio/goquery"
2018-09-27 04:03:06 +02:00
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
2018-09-28 06:28:33 +02:00
"github.com/rs/zerolog"
2018-09-27 04:03:06 +02:00
"golang.org/x/sync/errgroup"
"strings"
2018-09-27 04:03:06 +02:00
)
func pointerInt(input int) *int {
return &input
}
type batchFunc = func() error
func runBatch(funcs ...batchFunc) error {
eg := errgroup.Group{}
for _, f := range funcs {
eg.Go(f)
}
return eg.Wait()
}
func getRootElement(client *cdp.Client) (dom.Node, values.String, error) {
args := dom.NewGetDocumentArgs()
args.Depth = pointerInt(1) // lets load the entire document
ctx := context.Background()
d, err := client.DOM.GetDocument(ctx, args)
if err != nil {
return dom.Node{}, values.EmptyString, err
}
innerHTML, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID))
if err != nil {
return dom.Node{}, values.EmptyString, err
}
return d.Root, values.NewString(innerHTML.OuterHTML), nil
}
2018-09-27 04:03:06 +02:00
func parseAttrs(attrs []string) *values.Object {
var attr values.String
res := values.NewObject()
for _, el := range attrs {
el = strings.TrimSpace(el)
2018-09-27 04:03:06 +02:00
str := values.NewString(el)
if common.IsAttribute(el) {
attr = str
res.Set(str, values.EmptyString)
} else {
current, ok := res.Get(attr)
if ok {
if current.String() != "" {
res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str))
} else {
res.Set(attr, str)
}
2018-09-27 04:03:06 +02:00
}
}
}
return res
}
func loadInnerHTML(client *cdp.Client, id dom.NodeID) (values.String, error) {
2018-09-27 06:26:56 +02:00
res, err := client.DOM.GetOuterHTML(context.Background(), dom.NewGetOuterHTMLArgs().SetNodeID(id))
if err != nil {
return "", err
}
return values.NewString(res.OuterHTML), err
}
func loadInnerText(client *cdp.Client, id dom.NodeID) (values.String, error) {
h, err := loadInnerHTML(client, id)
if err != nil {
return values.EmptyString, err
}
if h == values.EmptyString {
return h, nil
}
return parseInnerText(h.String())
}
func parseInnerText(innerHTML string) (values.String, error) {
buff := bytes.NewBuffer([]byte(innerHTML))
parsed, err := goquery.NewDocumentFromReader(buff)
if err != nil {
return values.EmptyString, err
}
return values.NewString(parsed.Text()), nil
}
2018-09-27 06:26:56 +02:00
func createChildrenArray(nodes []dom.Node) []dom.NodeID {
children := make([]dom.NodeID, len(nodes))
for idx, child := range nodes {
children[idx] = child.NodeID
}
return children
}
2018-09-28 06:28:33 +02:00
func loadNodes(logger *zerolog.Logger, client *cdp.Client, broker *events.EventBroker, nodes []dom.NodeID) (*values.Array, error) {
2018-09-27 04:03:06 +02:00
arr := values.NewArray(len(nodes))
for _, id := range nodes {
2018-09-28 06:28:33 +02:00
child, err := LoadElement(logger, client, broker, id)
2018-09-27 04:03:06 +02:00
if err != nil {
return nil, err
}
arr.Push(child)
}
return arr, nil
}
func contextWithTimeout() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), DefaultTimeout)
}
func waitForLoadEvent(ctx context.Context, client *cdp.Client) error {
loadEventFired, err := client.Page.LoadEventFired(ctx)
if err != nil {
return err
}
_, err = loadEventFired.Recv()
if err != nil {
return err
}
return loadEventFired.Close()
}
func createEventBroker(client *cdp.Client) (*events.EventBroker, error) {
ctx := context.Background()
load, err := client.Page.LoadEventFired(ctx)
if err != nil {
return nil, err
}
broker := events.NewEventBroker()
broker.AddEventStream("load", load, func() interface{} {
return new(page.LoadEventFiredReply)
})
err = broker.Start()
if err != nil {
broker.Close()
return nil, err
}
destroy, err := client.DOM.DocumentUpdated(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("reload", destroy, func() interface{} {
return new(dom.DocumentUpdatedReply)
})
attrModified, err := client.DOM.AttributeModified(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("attr:modified", attrModified, func() interface{} {
return new(dom.AttributeModifiedReply)
})
attrRemoved, err := client.DOM.AttributeRemoved(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("attr:removed", attrRemoved, func() interface{} {
return new(dom.AttributeRemovedReply)
})
childrenCount, err := client.DOM.ChildNodeCountUpdated(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("children:count", childrenCount, func() interface{} {
return new(dom.ChildNodeCountUpdatedReply)
})
childrenInsert, err := client.DOM.ChildNodeInserted(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("children:inserted", childrenInsert, func() interface{} {
return new(dom.ChildNodeInsertedReply)
})
childDeleted, err := client.DOM.ChildNodeRemoved(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("children:deleted", childDeleted, func() interface{} {
return new(dom.ChildNodeRemovedReply)
})
return broker, nil
}