mirror of
https://github.com/MontFerret/ferret.git
synced 2024-12-14 11:23:02 +02:00
Refactored dynamic elements
This commit is contained in:
parent
5cad22e3b3
commit
825c33010c
@ -1,4 +1,4 @@
|
||||
package cmd
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
|
@ -27,8 +27,8 @@ func (p *Program) Run(ctx context.Context, setters ...Option) ([]byte, error) {
|
||||
}
|
||||
|
||||
ctx = opts.withContext(ctx)
|
||||
ctx = driver.WithCdpDriver(ctx, opts.cdp)
|
||||
ctx = driver.WithHttpDriver(ctx)
|
||||
ctx = driver.WithDynamicDriver(ctx, opts.cdp)
|
||||
ctx = driver.WithStaticDriver(ctx)
|
||||
|
||||
out, err := p.exp.Exec(ctx, scope)
|
||||
|
||||
|
@ -4,7 +4,7 @@ import (
|
||||
"context"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic"
|
||||
)
|
||||
|
||||
/*
|
||||
@ -29,7 +29,7 @@ func Click(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
return values.False, err
|
||||
}
|
||||
|
||||
el, ok := arg1.(*browser.HtmlElement)
|
||||
el, ok := arg1.(*dynamic.HtmlElement)
|
||||
|
||||
if !ok {
|
||||
return values.False, core.Error(core.ErrInvalidType, "expected dynamic element")
|
||||
@ -47,7 +47,7 @@ func Click(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
doc, ok := arg1.(*browser.HtmlDocument)
|
||||
doc, ok := arg1.(*dynamic.HtmlDocument)
|
||||
|
||||
if !ok {
|
||||
return values.False, core.Error(core.ErrInvalidType, "expected dynamic document")
|
||||
@ -83,7 +83,7 @@ func Navigate(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
doc, ok := args[0].(*browser.HtmlDocument)
|
||||
doc, ok := args[0].(*dynamic.HtmlDocument)
|
||||
|
||||
if !ok {
|
||||
return values.False, core.Error(core.ErrInvalidType, "expected dynamic document")
|
||||
|
@ -5,11 +5,11 @@ import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/http"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/static"
|
||||
)
|
||||
|
||||
func Document(ctx context.Context, inputs ...core.Value) (core.Value, error) {
|
||||
url, js, err := documentArgs(inputs)
|
||||
url, dynamic, err := documentArgs(inputs)
|
||||
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
@ -17,10 +17,10 @@ func Document(ctx context.Context, inputs ...core.Value) (core.Value, error) {
|
||||
|
||||
var drv driver.Driver
|
||||
|
||||
if !js {
|
||||
drv, err = driver.FromContext(ctx, driver.Http)
|
||||
if !dynamic {
|
||||
drv, err = driver.FromContext(ctx, driver.Static)
|
||||
} else {
|
||||
drv, err = driver.FromContext(ctx, driver.Cdp)
|
||||
drv, err = driver.FromContext(ctx, driver.Dynamic)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@ -43,13 +43,13 @@ func DocumentParse(ctx context.Context, inputs ...core.Value) (core.Value, error
|
||||
return arg1, core.Error(core.TypeError(a1.Type(), core.StringType), "arg 1")
|
||||
}
|
||||
|
||||
drv, err := driver.FromContext(ctx, driver.Http)
|
||||
drv, err := driver.FromContext(ctx, driver.Static)
|
||||
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
return drv.(*http.HttpDriver).ParseDocument(ctx, arg1.String())
|
||||
return drv.(*static.Driver).ParseDocument(ctx, arg1.String())
|
||||
}
|
||||
|
||||
func documentArgs(inputs []core.Value) (values.String, values.Boolean, error) {
|
||||
|
@ -1,43 +0,0 @@
|
||||
package browser
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/mafredri/cdp"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
func pointerInt(input int) *int {
|
||||
return &input
|
||||
}
|
||||
|
||||
type batchFunc = func() error
|
||||
|
||||
func runBatch(funcs ...batchFunc) error {
|
||||
eg := errgroup.Group{}
|
||||
|
||||
for _, f := range funcs {
|
||||
eg.Go(f)
|
||||
}
|
||||
|
||||
return eg.Wait()
|
||||
}
|
||||
|
||||
func contextWithTimeout() (context.Context, context.CancelFunc) {
|
||||
return context.WithTimeout(context.Background(), DefaultTimeout)
|
||||
}
|
||||
|
||||
func waitForLoadEvent(ctx context.Context, client *cdp.Client) error {
|
||||
loadEventFired, err := client.Page.LoadEventFired(ctx)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = loadEventFired.Recv()
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return loadEventFired.Close()
|
||||
}
|
56
pkg/stdlib/html/driver/common/lazy.go
Normal file
56
pkg/stdlib/html/driver/common/lazy.go
Normal file
@ -0,0 +1,56 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type (
|
||||
LazyFactory func() (core.Value, error)
|
||||
|
||||
LazyValue struct {
|
||||
sync.Mutex
|
||||
factory LazyFactory
|
||||
ready bool
|
||||
value core.Value
|
||||
err error
|
||||
}
|
||||
)
|
||||
|
||||
func NewLazyValue(factory LazyFactory) *LazyValue {
|
||||
lz := new(LazyValue)
|
||||
lz.ready = false
|
||||
lz.factory = factory
|
||||
lz.value = values.None
|
||||
|
||||
return lz
|
||||
}
|
||||
|
||||
func (lv *LazyValue) Value() (core.Value, error) {
|
||||
lv.Lock()
|
||||
defer lv.Unlock()
|
||||
|
||||
if !lv.ready {
|
||||
val, err := lv.factory()
|
||||
|
||||
if err == nil {
|
||||
lv.value = val
|
||||
lv.err = nil
|
||||
} else {
|
||||
lv.value = values.None
|
||||
lv.err = err
|
||||
}
|
||||
}
|
||||
|
||||
return lv.value, lv.err
|
||||
}
|
||||
|
||||
func (lv *LazyValue) Reset() {
|
||||
lv.Lock()
|
||||
defer lv.Unlock()
|
||||
|
||||
lv.ready = false
|
||||
lv.value = values.None
|
||||
lv.err = nil
|
||||
}
|
@ -5,23 +5,27 @@ import (
|
||||
"fmt"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/http"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/static"
|
||||
)
|
||||
|
||||
const Cdp = "cdp"
|
||||
const Http = "http"
|
||||
type DriverName string
|
||||
|
||||
const (
|
||||
Dynamic DriverName = "dynamic"
|
||||
Static DriverName = "static"
|
||||
)
|
||||
|
||||
type Driver interface {
|
||||
GetDocument(ctx context.Context, url string) (values.HtmlNode, error)
|
||||
Close() error
|
||||
}
|
||||
|
||||
func ToContext(ctx context.Context, name string, drv Driver) context.Context {
|
||||
func ToContext(ctx context.Context, name DriverName, drv Driver) context.Context {
|
||||
return context.WithValue(ctx, name, drv)
|
||||
}
|
||||
|
||||
func FromContext(ctx context.Context, name string) (Driver, error) {
|
||||
func FromContext(ctx context.Context, name DriverName) (Driver, error) {
|
||||
val := ctx.Value(name)
|
||||
|
||||
drv, ok := val.(Driver)
|
||||
@ -33,10 +37,10 @@ func FromContext(ctx context.Context, name string) (Driver, error) {
|
||||
return nil, core.Error(core.ErrNotFound, fmt.Sprintf("%s driver", name))
|
||||
}
|
||||
|
||||
func WithCdpDriver(ctx context.Context, addr string) context.Context {
|
||||
return context.WithValue(ctx, Cdp, browser.NewDriver(addr))
|
||||
func WithDynamicDriver(ctx context.Context, addr string) context.Context {
|
||||
return context.WithValue(ctx, Dynamic, dynamic.NewDriver(addr))
|
||||
}
|
||||
|
||||
func WithHttpDriver(ctx context.Context, opts ...http.Option) context.Context {
|
||||
return context.WithValue(ctx, Http, http.NewDriver(opts...))
|
||||
func WithStaticDriver(ctx context.Context, opts ...static.Option) context.Context {
|
||||
return context.WithValue(ctx, Static, static.NewDriver(opts...))
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
package browser
|
||||
package dynamic
|
||||
|
||||
import (
|
||||
"context"
|
||||
@ -6,8 +6,8 @@ import (
|
||||
"fmt"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/eval"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/events"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events"
|
||||
"github.com/corpix/uarand"
|
||||
"github.com/mafredri/cdp"
|
||||
"github.com/mafredri/cdp/protocol/dom"
|
||||
@ -27,6 +27,7 @@ type HtmlDocument struct {
|
||||
events *events.EventBroker
|
||||
url string
|
||||
element *HtmlElement
|
||||
history []*HtmlElement
|
||||
}
|
||||
|
||||
func LoadHtmlDocument(
|
||||
@ -82,7 +83,7 @@ func LoadHtmlDocument(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
root, err := getRootElement(client)
|
||||
root, innerHtml, err := getRootElement(client)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -94,56 +95,42 @@ func LoadHtmlDocument(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return NewHtmlDocument(conn, client, root, broker), nil
|
||||
return NewHtmlDocument(conn, client, broker, root, innerHtml), nil
|
||||
}
|
||||
|
||||
func getRootElement(client *cdp.Client) (dom.Node, error) {
|
||||
func getRootElement(client *cdp.Client) (dom.Node, values.String, error) {
|
||||
args := dom.NewGetDocumentArgs()
|
||||
args.Depth = pointerInt(1) // lets load the entire document
|
||||
ctx := context.Background()
|
||||
|
||||
d, err := client.DOM.GetDocument(context.Background(), args)
|
||||
d, err := client.DOM.GetDocument(ctx, args)
|
||||
|
||||
if err != nil {
|
||||
return dom.Node{}, err
|
||||
return dom.Node{}, values.EmptyString, err
|
||||
}
|
||||
|
||||
return d.Root, nil
|
||||
}
|
||||
|
||||
func createEventBroker(client *cdp.Client) (*events.EventBroker, error) {
|
||||
load, err := client.Page.LoadEventFired(context.Background())
|
||||
innerHtml, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID))
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return dom.Node{}, values.EmptyString, err
|
||||
}
|
||||
|
||||
broker := events.NewEventBroker()
|
||||
broker.AddEventStream("load", load, func() interface{} {
|
||||
return new(page.LoadEventFiredReply)
|
||||
})
|
||||
|
||||
err = broker.Start()
|
||||
|
||||
if err != nil {
|
||||
broker.Close()
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return broker, nil
|
||||
return d.Root, values.NewString(innerHtml.OuterHTML), nil
|
||||
}
|
||||
|
||||
func NewHtmlDocument(
|
||||
conn *rpcc.Conn,
|
||||
client *cdp.Client,
|
||||
root dom.Node,
|
||||
broker *events.EventBroker,
|
||||
root dom.Node,
|
||||
innerHtml values.String,
|
||||
) *HtmlDocument {
|
||||
doc := new(HtmlDocument)
|
||||
doc.conn = conn
|
||||
doc.client = client
|
||||
doc.events = broker
|
||||
doc.element = NewHtmlElement(client, root.NodeID, root)
|
||||
doc.element = NewHtmlElement(client, broker, root.NodeID, root, innerHtml)
|
||||
doc.history = make([]*HtmlElement, 0, 10)
|
||||
doc.url = ""
|
||||
|
||||
if root.BaseURL != nil {
|
||||
@ -154,18 +141,18 @@ func NewHtmlDocument(
|
||||
doc.Lock()
|
||||
defer doc.Unlock()
|
||||
|
||||
updated, err := getRootElement(client)
|
||||
updated, innerHtml, err := getRootElement(client)
|
||||
|
||||
if err != nil {
|
||||
// TODO: We need somehow log all errors outside of stdout
|
||||
return
|
||||
}
|
||||
|
||||
// close an old root element
|
||||
doc.element.Close()
|
||||
// put the root element in a history list, since it might be still used
|
||||
doc.history = append(doc.history, doc.element)
|
||||
|
||||
// create a new root element wrapper
|
||||
doc.element = NewHtmlElement(client, updated.NodeID, updated)
|
||||
doc.element = NewHtmlElement(client, broker, updated.NodeID, updated, innerHtml)
|
||||
doc.url = ""
|
||||
|
||||
if updated.BaseURL != nil {
|
||||
@ -241,6 +228,11 @@ func (doc *HtmlDocument) Close() error {
|
||||
doc.events.Stop()
|
||||
doc.events.Close()
|
||||
|
||||
for _, h := range doc.history {
|
||||
h.Close()
|
||||
}
|
||||
|
||||
doc.element.Close()
|
||||
doc.client.Page.Close(context.Background())
|
||||
|
||||
return doc.conn.Close()
|
@ -1,4 +1,4 @@
|
||||
package browser
|
||||
package dynamic
|
||||
|
||||
import (
|
||||
"context"
|
||||
@ -12,7 +12,7 @@ import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type CdpDriver struct {
|
||||
type Driver struct {
|
||||
sync.Mutex
|
||||
dev *devtool.DevTools
|
||||
conn *rpcc.Conn
|
||||
@ -21,14 +21,14 @@ type CdpDriver struct {
|
||||
contextID target.BrowserContextID
|
||||
}
|
||||
|
||||
func NewDriver(address string) *CdpDriver {
|
||||
drv := new(CdpDriver)
|
||||
func NewDriver(address string) *Driver {
|
||||
drv := new(Driver)
|
||||
drv.dev = devtool.New(address)
|
||||
|
||||
return drv
|
||||
}
|
||||
|
||||
func (drv *CdpDriver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) {
|
||||
func (drv *Driver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) {
|
||||
err := drv.init(ctx)
|
||||
|
||||
if err != nil {
|
||||
@ -57,7 +57,7 @@ func (drv *CdpDriver) GetDocument(ctx context.Context, url string) (values.HtmlN
|
||||
return LoadHtmlDocument(ctx, conn, url)
|
||||
}
|
||||
|
||||
func (drv *CdpDriver) Close() error {
|
||||
func (drv *Driver) Close() error {
|
||||
drv.Lock()
|
||||
defer drv.Unlock()
|
||||
|
||||
@ -70,7 +70,7 @@ func (drv *CdpDriver) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (drv *CdpDriver) init(ctx context.Context) error {
|
||||
func (drv *Driver) init(ctx context.Context) error {
|
||||
drv.Lock()
|
||||
defer drv.Unlock()
|
||||
|
@ -1,4 +1,4 @@
|
||||
package browser
|
||||
package dynamic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@ -7,30 +7,37 @@ import (
|
||||
"encoding/json"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/events"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/mafredri/cdp"
|
||||
"github.com/mafredri/cdp/protocol/dom"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const DefaultTimeout = time.Second * 30
|
||||
|
||||
type HtmlElement struct {
|
||||
sync.Mutex
|
||||
client *cdp.Client
|
||||
broker *events.EventBroker
|
||||
connected bool
|
||||
id dom.NodeID
|
||||
nodeType values.Int
|
||||
nodeName values.String
|
||||
innerHtml values.String
|
||||
innerText *common.LazyValue
|
||||
value string
|
||||
attributes *values.Object
|
||||
attributes *common.LazyValue
|
||||
children []dom.NodeID
|
||||
loadedChildren *values.Array
|
||||
loadedChildren *common.LazyValue
|
||||
}
|
||||
|
||||
func LoadElement(
|
||||
client *cdp.Client,
|
||||
broker *events.EventBroker,
|
||||
id dom.NodeID,
|
||||
) (*HtmlElement, error) {
|
||||
if client == nil {
|
||||
@ -46,28 +53,70 @@ func LoadElement(
|
||||
dom.
|
||||
NewDescribeNodeArgs().
|
||||
SetNodeID(id).
|
||||
SetDepth(1),
|
||||
SetDepth(-1),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return nil, core.Error(err, strconv.Itoa(int(id)))
|
||||
}
|
||||
|
||||
return NewHtmlElement(client, id, node.Node), nil
|
||||
innerHtml, err := client.DOM.GetOuterHTML(
|
||||
ctx,
|
||||
dom.NewGetOuterHTMLArgs().SetNodeID(id),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return nil, core.Error(err, strconv.Itoa(int(id)))
|
||||
}
|
||||
|
||||
return NewHtmlElement(
|
||||
client,
|
||||
broker,
|
||||
id,
|
||||
node.Node,
|
||||
values.NewString(innerHtml.OuterHTML),
|
||||
), nil
|
||||
}
|
||||
|
||||
func NewHtmlElement(
|
||||
client *cdp.Client,
|
||||
broker *events.EventBroker,
|
||||
id dom.NodeID,
|
||||
node dom.Node,
|
||||
innerHtml values.String,
|
||||
) *HtmlElement {
|
||||
el := new(HtmlElement)
|
||||
el.client = client
|
||||
el.broker = broker
|
||||
el.connected = true
|
||||
el.id = id
|
||||
el.nodeType = values.NewInt(node.NodeType)
|
||||
el.nodeName = values.NewString(node.NodeName)
|
||||
el.innerHtml = innerHtml
|
||||
el.innerText = common.NewLazyValue(func() (core.Value, error) {
|
||||
h := el.InnerHtml()
|
||||
|
||||
if h == values.EmptyString {
|
||||
return h, nil
|
||||
}
|
||||
|
||||
buff := bytes.NewBuffer([]byte(h))
|
||||
|
||||
parsed, err := goquery.NewDocumentFromReader(buff)
|
||||
|
||||
if err != nil {
|
||||
return values.EmptyString, err
|
||||
}
|
||||
|
||||
return values.NewString(parsed.Text()), nil
|
||||
})
|
||||
el.attributes = common.NewLazyValue(func() (core.Value, error) {
|
||||
return parseAttrs(node.Attributes), nil
|
||||
})
|
||||
el.value = ""
|
||||
el.attributes = parseAttrs(node.Attributes)
|
||||
el.loadedChildren = common.NewLazyValue(func() (core.Value, error) {
|
||||
return loadNodes(client, broker, el.children)
|
||||
})
|
||||
|
||||
var childCount int
|
||||
|
||||
@ -89,8 +138,6 @@ func NewHtmlElement(
|
||||
}
|
||||
|
||||
func (el *HtmlElement) Close() error {
|
||||
// el.client = nil
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -178,11 +225,23 @@ func (el *HtmlElement) NodeName() values.String {
|
||||
}
|
||||
|
||||
func (el *HtmlElement) GetAttributes() core.Value {
|
||||
return el.attributes
|
||||
val, err := el.attributes.Value()
|
||||
|
||||
if err != nil {
|
||||
return values.None
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
func (el *HtmlElement) GetAttribute(name values.String) core.Value {
|
||||
val, found := el.attributes.Get(name)
|
||||
attrs, err := el.attributes.Value()
|
||||
|
||||
if err != nil {
|
||||
return values.None
|
||||
}
|
||||
|
||||
val, found := attrs.(*values.Object).Get(name)
|
||||
|
||||
if !found {
|
||||
return values.None
|
||||
@ -192,19 +251,23 @@ func (el *HtmlElement) GetAttribute(name values.String) core.Value {
|
||||
}
|
||||
|
||||
func (el *HtmlElement) GetChildNodes() core.Value {
|
||||
if el.loadedChildren == nil {
|
||||
el.loadedChildren = loadNodes(el.client, el.children)
|
||||
val, err := el.loadedChildren.Value()
|
||||
|
||||
if err != nil {
|
||||
return values.NewArray(0)
|
||||
}
|
||||
|
||||
return el.loadedChildren
|
||||
return val
|
||||
}
|
||||
|
||||
func (el *HtmlElement) GetChildNode(idx values.Int) core.Value {
|
||||
if el.loadedChildren == nil {
|
||||
el.loadedChildren = loadNodes(el.client, el.children)
|
||||
val, err := el.loadedChildren.Value()
|
||||
|
||||
if err != nil {
|
||||
return values.None
|
||||
}
|
||||
|
||||
return el.loadedChildren.Get(idx)
|
||||
return val.(*values.Array).Get(idx)
|
||||
}
|
||||
|
||||
func (el *HtmlElement) QuerySelector(selector values.String) core.Value {
|
||||
@ -217,7 +280,7 @@ func (el *HtmlElement) QuerySelector(selector values.String) core.Value {
|
||||
return values.None
|
||||
}
|
||||
|
||||
res, err := LoadElement(el.client, found.NodeID)
|
||||
res, err := LoadElement(el.client, el.broker, found.NodeID)
|
||||
|
||||
if err != nil {
|
||||
return values.None
|
||||
@ -239,7 +302,7 @@ func (el *HtmlElement) QuerySelectorAll(selector values.String) core.Value {
|
||||
arr := values.NewArray(len(res.NodeIDs))
|
||||
|
||||
for _, id := range res.NodeIDs {
|
||||
childEl, err := LoadElement(el.client, id)
|
||||
childEl, err := LoadElement(el.client, el.broker, id)
|
||||
|
||||
if err != nil {
|
||||
return values.None
|
||||
@ -252,84 +315,23 @@ func (el *HtmlElement) QuerySelectorAll(selector values.String) core.Value {
|
||||
}
|
||||
|
||||
func (el *HtmlElement) InnerText() values.String {
|
||||
h := el.InnerHtml()
|
||||
|
||||
if h == values.EmptyString {
|
||||
return h
|
||||
}
|
||||
|
||||
buff := bytes.NewBuffer([]byte(h))
|
||||
|
||||
parsed, err := goquery.NewDocumentFromReader(buff)
|
||||
val, err := el.innerText.Value()
|
||||
|
||||
if err != nil {
|
||||
return values.EmptyString
|
||||
}
|
||||
|
||||
return values.NewString(parsed.Text())
|
||||
return val.(values.String)
|
||||
}
|
||||
|
||||
func (el *HtmlElement) InnerHtml() values.String {
|
||||
ctx, cancelFn := createCtx()
|
||||
|
||||
defer cancelFn()
|
||||
|
||||
res, err := el.client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(el.id))
|
||||
|
||||
if err != nil {
|
||||
return values.EmptyString
|
||||
}
|
||||
|
||||
return values.NewString(res.OuterHTML)
|
||||
return el.innerHtml
|
||||
}
|
||||
|
||||
func (el *HtmlElement) Click() (values.Boolean, error) {
|
||||
ctx, cancel := createCtx()
|
||||
ctx, cancel := contextWithTimeout()
|
||||
|
||||
defer cancel()
|
||||
|
||||
return events.DispatchEvent(ctx, el.client, el.id, "click")
|
||||
}
|
||||
|
||||
func createCtx() (context.Context, context.CancelFunc) {
|
||||
return context.WithTimeout(context.Background(), DefaultTimeout)
|
||||
}
|
||||
|
||||
func parseAttrs(attrs []string) *values.Object {
|
||||
var attr values.String
|
||||
|
||||
res := values.NewObject()
|
||||
|
||||
for _, el := range attrs {
|
||||
str := values.NewString(el)
|
||||
|
||||
if common.IsAttribute(el) {
|
||||
attr = str
|
||||
res.Set(str, values.EmptyString)
|
||||
} else {
|
||||
current, ok := res.Get(attr)
|
||||
|
||||
if ok {
|
||||
res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func loadNodes(client *cdp.Client, nodes []dom.NodeID) *values.Array {
|
||||
arr := values.NewArray(len(nodes))
|
||||
|
||||
for _, id := range nodes {
|
||||
child, err := LoadElement(client, id)
|
||||
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
arr.Push(child)
|
||||
}
|
||||
|
||||
return arr
|
||||
}
|
@ -4,7 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/eval"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval"
|
||||
"github.com/mafredri/cdp"
|
||||
"github.com/mafredri/cdp/protocol/dom"
|
||||
"github.com/mafredri/cdp/protocol/runtime"
|
@ -3,7 +3,7 @@ package events
|
||||
import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/eval"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval"
|
||||
"github.com/mafredri/cdp"
|
||||
"time"
|
||||
)
|
182
pkg/stdlib/html/driver/dynamic/helpers.go
Normal file
182
pkg/stdlib/html/driver/dynamic/helpers.go
Normal file
@ -0,0 +1,182 @@
|
||||
package dynamic
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events"
|
||||
"github.com/mafredri/cdp"
|
||||
"github.com/mafredri/cdp/protocol/dom"
|
||||
"github.com/mafredri/cdp/protocol/page"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
func pointerInt(input int) *int {
|
||||
return &input
|
||||
}
|
||||
|
||||
type batchFunc = func() error
|
||||
|
||||
func runBatch(funcs ...batchFunc) error {
|
||||
eg := errgroup.Group{}
|
||||
|
||||
for _, f := range funcs {
|
||||
eg.Go(f)
|
||||
}
|
||||
|
||||
return eg.Wait()
|
||||
}
|
||||
|
||||
func parseAttrs(attrs []string) *values.Object {
|
||||
var attr values.String
|
||||
|
||||
res := values.NewObject()
|
||||
|
||||
for _, el := range attrs {
|
||||
str := values.NewString(el)
|
||||
|
||||
if common.IsAttribute(el) {
|
||||
attr = str
|
||||
res.Set(str, values.EmptyString)
|
||||
} else {
|
||||
current, ok := res.Get(attr)
|
||||
|
||||
if ok {
|
||||
res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func loadNodes(client *cdp.Client, broker *events.EventBroker, nodes []dom.NodeID) (*values.Array, error) {
|
||||
arr := values.NewArray(len(nodes))
|
||||
|
||||
for _, id := range nodes {
|
||||
child, err := LoadElement(client, broker, id)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
arr.Push(child)
|
||||
}
|
||||
|
||||
return arr, nil
|
||||
}
|
||||
|
||||
func contextWithTimeout() (context.Context, context.CancelFunc) {
|
||||
return context.WithTimeout(context.Background(), DefaultTimeout)
|
||||
}
|
||||
|
||||
func waitForLoadEvent(ctx context.Context, client *cdp.Client) error {
|
||||
loadEventFired, err := client.Page.LoadEventFired(ctx)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = loadEventFired.Recv()
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return loadEventFired.Close()
|
||||
}
|
||||
|
||||
func createEventBroker(client *cdp.Client) (*events.EventBroker, error) {
|
||||
ctx := context.Background()
|
||||
load, err := client.Page.LoadEventFired(ctx)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
broker := events.NewEventBroker()
|
||||
broker.AddEventStream("load", load, func() interface{} {
|
||||
return new(page.LoadEventFiredReply)
|
||||
})
|
||||
|
||||
err = broker.Start()
|
||||
|
||||
if err != nil {
|
||||
broker.Close()
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
destroy, err := client.DOM.DocumentUpdated(ctx)
|
||||
|
||||
if err != nil {
|
||||
broker.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
broker.AddEventStream("reload", destroy, func() interface{} {
|
||||
return new(dom.DocumentUpdatedReply)
|
||||
})
|
||||
|
||||
attrModified, err := client.DOM.AttributeModified(ctx)
|
||||
|
||||
if err != nil {
|
||||
broker.Close()
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
broker.AddEventStream("attr:modified", attrModified, func() interface{} {
|
||||
return new(dom.AttributeModifiedReply)
|
||||
})
|
||||
|
||||
attrRemoved, err := client.DOM.AttributeRemoved(ctx)
|
||||
|
||||
if err != nil {
|
||||
broker.Close()
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
broker.AddEventStream("attr:removed", attrRemoved, func() interface{} {
|
||||
return new(dom.AttributeRemovedReply)
|
||||
})
|
||||
|
||||
childrenCount, err := client.DOM.ChildNodeCountUpdated(ctx)
|
||||
|
||||
if err != nil {
|
||||
broker.Close()
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
broker.AddEventStream("children:count", childrenCount, func() interface{} {
|
||||
return new(dom.ChildNodeCountUpdatedReply)
|
||||
})
|
||||
|
||||
childrenInsert, err := client.DOM.ChildNodeInserted(ctx)
|
||||
|
||||
if err != nil {
|
||||
broker.Close()
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
broker.AddEventStream("children:inserted", childrenInsert, func() interface{} {
|
||||
return new(dom.ChildNodeInsertedReply)
|
||||
})
|
||||
|
||||
childDeleted, err := client.DOM.ChildNodeRemoved(ctx)
|
||||
|
||||
if err != nil {
|
||||
broker.Close()
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
broker.AddEventStream("children:deleted", childDeleted, func() interface{} {
|
||||
return new(dom.ChildNodeRemovedReply)
|
||||
})
|
||||
|
||||
return broker, nil
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package http
|
||||
package static
|
||||
|
||||
import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
@ -1,8 +1,8 @@
|
||||
package http_test
|
||||
package static_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/http"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/static"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"testing"
|
||||
@ -228,7 +228,7 @@ func TestDocument(t *testing.T) {
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
el, err := http.NewHtmlElement(doc.Selection)
|
||||
el, err := static.NewHtmlElement(doc.Selection)
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
@ -1,4 +1,4 @@
|
||||
package http
|
||||
package static
|
||||
|
||||
import (
|
||||
"crypto/sha512"
|
@ -1,8 +1,8 @@
|
||||
package http_test
|
||||
package static_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/http"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/static"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"testing"
|
||||
@ -250,7 +250,7 @@ func TestElement(t *testing.T) {
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
el, err := http.NewHtmlElement(doc.Find("body"))
|
||||
el, err := static.NewHtmlElement(doc.Find("body"))
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
@ -266,7 +266,7 @@ func TestElement(t *testing.T) {
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
el, err := http.NewHtmlElement(doc.Find("body"))
|
||||
el, err := static.NewHtmlElement(doc.Find("body"))
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
@ -290,7 +290,7 @@ func TestElement(t *testing.T) {
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
el, err := http.NewHtmlElement(doc.Find("body"))
|
||||
el, err := static.NewHtmlElement(doc.Find("body"))
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
@ -315,7 +315,7 @@ func TestElement(t *testing.T) {
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
el, err := http.NewHtmlElement(doc.Find("#q"))
|
||||
el, err := static.NewHtmlElement(doc.Find("#q"))
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
@ -342,7 +342,7 @@ func TestElement(t *testing.T) {
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
el, err := http.NewHtmlElement(doc.Find("h2"))
|
||||
el, err := static.NewHtmlElement(doc.Find("h2"))
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
@ -369,7 +369,7 @@ func TestElement(t *testing.T) {
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
el, err := http.NewHtmlElement(doc.Find("#content"))
|
||||
el, err := static.NewHtmlElement(doc.Find("#content"))
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
@ -385,7 +385,7 @@ func TestElement(t *testing.T) {
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
el, err := http.NewHtmlElement(doc.Find("body .card-img-top:nth-child(1)"))
|
||||
el, err := static.NewHtmlElement(doc.Find("body .card-img-top:nth-child(1)"))
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
|
@ -1,4 +1,4 @@
|
||||
package http
|
||||
package static
|
||||
|
||||
import "github.com/sethgrid/pester"
|
||||
|
@ -1,4 +1,4 @@
|
||||
package http
|
||||
package static
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@ -11,11 +11,11 @@ import (
|
||||
httpx "net/http"
|
||||
)
|
||||
|
||||
type HttpDriver struct {
|
||||
type Driver struct {
|
||||
client *pester.Client
|
||||
}
|
||||
|
||||
func NewDriver(setters ...Option) *HttpDriver {
|
||||
func NewDriver(setters ...Option) *Driver {
|
||||
client := pester.New()
|
||||
client.Concurrency = 3
|
||||
client.MaxRetries = 5
|
||||
@ -25,10 +25,10 @@ func NewDriver(setters ...Option) *HttpDriver {
|
||||
setter(client)
|
||||
}
|
||||
|
||||
return &HttpDriver{client}
|
||||
return &Driver{client}
|
||||
}
|
||||
|
||||
func (d *HttpDriver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) {
|
||||
func (d *Driver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) {
|
||||
req, err := httpx.NewRequest(httpx.MethodGet, url, nil)
|
||||
|
||||
if err != nil {
|
||||
@ -58,7 +58,7 @@ func (d *HttpDriver) GetDocument(ctx context.Context, url string) (values.HtmlNo
|
||||
return NewHtmlDocument(url, doc)
|
||||
}
|
||||
|
||||
func (d *HttpDriver) ParseDocument(ctx context.Context, str string) (values.HtmlNode, error) {
|
||||
func (d *Driver) ParseDocument(ctx context.Context, str string) (values.HtmlNode, error) {
|
||||
buf := bytes.NewBuffer([]byte(str))
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(buf)
|
||||
@ -70,7 +70,7 @@ func (d *HttpDriver) ParseDocument(ctx context.Context, str string) (values.Html
|
||||
return NewHtmlDocument("#string", doc)
|
||||
}
|
||||
|
||||
func (d *HttpDriver) Close() error {
|
||||
func (d *Driver) Close() error {
|
||||
d.client = nil
|
||||
|
||||
return nil
|
@ -4,7 +4,7 @@ import (
|
||||
"context"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic"
|
||||
)
|
||||
|
||||
func WaitElement(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
@ -30,7 +30,7 @@ func WaitElement(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
doc, ok := arg.(*browser.HtmlDocument)
|
||||
doc, ok := arg.(*dynamic.HtmlDocument)
|
||||
|
||||
if !ok {
|
||||
return values.False, core.Error(core.ErrInvalidType, "expected dynamic document")
|
||||
@ -52,7 +52,7 @@ func WaitNavigation(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
doc, ok := args[0].(*browser.HtmlDocument)
|
||||
doc, ok := args[0].(*dynamic.HtmlDocument)
|
||||
|
||||
if !ok {
|
||||
return values.None, core.Error(core.ErrInvalidType, "expected dynamic document")
|
||||
|
Loading…
Reference in New Issue
Block a user