1
0
mirror of https://github.com/MontFerret/ferret.git synced 2024-12-14 11:23:02 +02:00

Refactored dynamic elements

This commit is contained in:
Tim Voronov 2018-09-26 22:03:06 -04:00
parent 5cad22e3b3
commit 825c33010c
22 changed files with 412 additions and 219 deletions

View File

@ -1,4 +1,4 @@
package cmd package main
import ( import (
"bufio" "bufio"

View File

@ -27,8 +27,8 @@ func (p *Program) Run(ctx context.Context, setters ...Option) ([]byte, error) {
} }
ctx = opts.withContext(ctx) ctx = opts.withContext(ctx)
ctx = driver.WithCdpDriver(ctx, opts.cdp) ctx = driver.WithDynamicDriver(ctx, opts.cdp)
ctx = driver.WithHttpDriver(ctx) ctx = driver.WithStaticDriver(ctx)
out, err := p.exp.Exec(ctx, scope) out, err := p.exp.Exec(ctx, scope)

View File

@ -4,7 +4,7 @@ import (
"context" "context"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic"
) )
/* /*
@ -29,7 +29,7 @@ func Click(_ context.Context, args ...core.Value) (core.Value, error) {
return values.False, err return values.False, err
} }
el, ok := arg1.(*browser.HtmlElement) el, ok := arg1.(*dynamic.HtmlElement)
if !ok { if !ok {
return values.False, core.Error(core.ErrInvalidType, "expected dynamic element") return values.False, core.Error(core.ErrInvalidType, "expected dynamic element")
@ -47,7 +47,7 @@ func Click(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err return values.None, err
} }
doc, ok := arg1.(*browser.HtmlDocument) doc, ok := arg1.(*dynamic.HtmlDocument)
if !ok { if !ok {
return values.False, core.Error(core.ErrInvalidType, "expected dynamic document") return values.False, core.Error(core.ErrInvalidType, "expected dynamic document")
@ -83,7 +83,7 @@ func Navigate(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err return values.None, err
} }
doc, ok := args[0].(*browser.HtmlDocument) doc, ok := args[0].(*dynamic.HtmlDocument)
if !ok { if !ok {
return values.False, core.Error(core.ErrInvalidType, "expected dynamic document") return values.False, core.Error(core.ErrInvalidType, "expected dynamic document")

View File

@ -5,11 +5,11 @@ import (
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver" "github.com/MontFerret/ferret/pkg/stdlib/html/driver"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/http" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/static"
) )
func Document(ctx context.Context, inputs ...core.Value) (core.Value, error) { func Document(ctx context.Context, inputs ...core.Value) (core.Value, error) {
url, js, err := documentArgs(inputs) url, dynamic, err := documentArgs(inputs)
if err != nil { if err != nil {
return values.None, err return values.None, err
@ -17,10 +17,10 @@ func Document(ctx context.Context, inputs ...core.Value) (core.Value, error) {
var drv driver.Driver var drv driver.Driver
if !js { if !dynamic {
drv, err = driver.FromContext(ctx, driver.Http) drv, err = driver.FromContext(ctx, driver.Static)
} else { } else {
drv, err = driver.FromContext(ctx, driver.Cdp) drv, err = driver.FromContext(ctx, driver.Dynamic)
} }
if err != nil { if err != nil {
@ -43,13 +43,13 @@ func DocumentParse(ctx context.Context, inputs ...core.Value) (core.Value, error
return arg1, core.Error(core.TypeError(a1.Type(), core.StringType), "arg 1") return arg1, core.Error(core.TypeError(a1.Type(), core.StringType), "arg 1")
} }
drv, err := driver.FromContext(ctx, driver.Http) drv, err := driver.FromContext(ctx, driver.Static)
if err != nil { if err != nil {
return values.None, err return values.None, err
} }
return drv.(*http.HttpDriver).ParseDocument(ctx, arg1.String()) return drv.(*static.Driver).ParseDocument(ctx, arg1.String())
} }
func documentArgs(inputs []core.Value) (values.String, values.Boolean, error) { func documentArgs(inputs []core.Value) (values.String, values.Boolean, error) {

View File

@ -1,43 +0,0 @@
package browser
import (
"context"
"github.com/mafredri/cdp"
"golang.org/x/sync/errgroup"
)
func pointerInt(input int) *int {
return &input
}
type batchFunc = func() error
func runBatch(funcs ...batchFunc) error {
eg := errgroup.Group{}
for _, f := range funcs {
eg.Go(f)
}
return eg.Wait()
}
func contextWithTimeout() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), DefaultTimeout)
}
func waitForLoadEvent(ctx context.Context, client *cdp.Client) error {
loadEventFired, err := client.Page.LoadEventFired(ctx)
if err != nil {
return err
}
_, err = loadEventFired.Recv()
if err != nil {
return err
}
return loadEventFired.Close()
}

View File

@ -0,0 +1,56 @@
package common
import (
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"sync"
)
type (
LazyFactory func() (core.Value, error)
LazyValue struct {
sync.Mutex
factory LazyFactory
ready bool
value core.Value
err error
}
)
func NewLazyValue(factory LazyFactory) *LazyValue {
lz := new(LazyValue)
lz.ready = false
lz.factory = factory
lz.value = values.None
return lz
}
func (lv *LazyValue) Value() (core.Value, error) {
lv.Lock()
defer lv.Unlock()
if !lv.ready {
val, err := lv.factory()
if err == nil {
lv.value = val
lv.err = nil
} else {
lv.value = values.None
lv.err = err
}
}
return lv.value, lv.err
}
func (lv *LazyValue) Reset() {
lv.Lock()
defer lv.Unlock()
lv.ready = false
lv.value = values.None
lv.err = nil
}

View File

@ -5,23 +5,27 @@ import (
"fmt" "fmt"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/http" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/static"
) )
const Cdp = "cdp" type DriverName string
const Http = "http"
const (
Dynamic DriverName = "dynamic"
Static DriverName = "static"
)
type Driver interface { type Driver interface {
GetDocument(ctx context.Context, url string) (values.HtmlNode, error) GetDocument(ctx context.Context, url string) (values.HtmlNode, error)
Close() error Close() error
} }
func ToContext(ctx context.Context, name string, drv Driver) context.Context { func ToContext(ctx context.Context, name DriverName, drv Driver) context.Context {
return context.WithValue(ctx, name, drv) return context.WithValue(ctx, name, drv)
} }
func FromContext(ctx context.Context, name string) (Driver, error) { func FromContext(ctx context.Context, name DriverName) (Driver, error) {
val := ctx.Value(name) val := ctx.Value(name)
drv, ok := val.(Driver) drv, ok := val.(Driver)
@ -33,10 +37,10 @@ func FromContext(ctx context.Context, name string) (Driver, error) {
return nil, core.Error(core.ErrNotFound, fmt.Sprintf("%s driver", name)) return nil, core.Error(core.ErrNotFound, fmt.Sprintf("%s driver", name))
} }
func WithCdpDriver(ctx context.Context, addr string) context.Context { func WithDynamicDriver(ctx context.Context, addr string) context.Context {
return context.WithValue(ctx, Cdp, browser.NewDriver(addr)) return context.WithValue(ctx, Dynamic, dynamic.NewDriver(addr))
} }
func WithHttpDriver(ctx context.Context, opts ...http.Option) context.Context { func WithStaticDriver(ctx context.Context, opts ...static.Option) context.Context {
return context.WithValue(ctx, Http, http.NewDriver(opts...)) return context.WithValue(ctx, Static, static.NewDriver(opts...))
} }

View File

@ -1,4 +1,4 @@
package browser package dynamic
import ( import (
"context" "context"
@ -6,8 +6,8 @@ import (
"fmt" "fmt"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/eval" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/events" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events"
"github.com/corpix/uarand" "github.com/corpix/uarand"
"github.com/mafredri/cdp" "github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/dom"
@ -27,6 +27,7 @@ type HtmlDocument struct {
events *events.EventBroker events *events.EventBroker
url string url string
element *HtmlElement element *HtmlElement
history []*HtmlElement
} }
func LoadHtmlDocument( func LoadHtmlDocument(
@ -82,7 +83,7 @@ func LoadHtmlDocument(
return nil, err return nil, err
} }
root, err := getRootElement(client) root, innerHtml, err := getRootElement(client)
if err != nil { if err != nil {
return nil, err return nil, err
@ -94,56 +95,42 @@ func LoadHtmlDocument(
return nil, err return nil, err
} }
return NewHtmlDocument(conn, client, root, broker), nil return NewHtmlDocument(conn, client, broker, root, innerHtml), nil
} }
func getRootElement(client *cdp.Client) (dom.Node, error) { func getRootElement(client *cdp.Client) (dom.Node, values.String, error) {
args := dom.NewGetDocumentArgs() args := dom.NewGetDocumentArgs()
args.Depth = pointerInt(1) // lets load the entire document args.Depth = pointerInt(1) // lets load the entire document
ctx := context.Background()
d, err := client.DOM.GetDocument(context.Background(), args) d, err := client.DOM.GetDocument(ctx, args)
if err != nil { if err != nil {
return dom.Node{}, err return dom.Node{}, values.EmptyString, err
} }
return d.Root, nil innerHtml, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID))
}
func createEventBroker(client *cdp.Client) (*events.EventBroker, error) {
load, err := client.Page.LoadEventFired(context.Background())
if err != nil { if err != nil {
return nil, err return dom.Node{}, values.EmptyString, err
} }
broker := events.NewEventBroker() return d.Root, values.NewString(innerHtml.OuterHTML), nil
broker.AddEventStream("load", load, func() interface{} {
return new(page.LoadEventFiredReply)
})
err = broker.Start()
if err != nil {
broker.Close()
return nil, err
}
return broker, nil
} }
func NewHtmlDocument( func NewHtmlDocument(
conn *rpcc.Conn, conn *rpcc.Conn,
client *cdp.Client, client *cdp.Client,
root dom.Node,
broker *events.EventBroker, broker *events.EventBroker,
root dom.Node,
innerHtml values.String,
) *HtmlDocument { ) *HtmlDocument {
doc := new(HtmlDocument) doc := new(HtmlDocument)
doc.conn = conn doc.conn = conn
doc.client = client doc.client = client
doc.events = broker doc.events = broker
doc.element = NewHtmlElement(client, root.NodeID, root) doc.element = NewHtmlElement(client, broker, root.NodeID, root, innerHtml)
doc.history = make([]*HtmlElement, 0, 10)
doc.url = "" doc.url = ""
if root.BaseURL != nil { if root.BaseURL != nil {
@ -154,18 +141,18 @@ func NewHtmlDocument(
doc.Lock() doc.Lock()
defer doc.Unlock() defer doc.Unlock()
updated, err := getRootElement(client) updated, innerHtml, err := getRootElement(client)
if err != nil { if err != nil {
// TODO: We need somehow log all errors outside of stdout // TODO: We need somehow log all errors outside of stdout
return return
} }
// close an old root element // put the root element in a history list, since it might be still used
doc.element.Close() doc.history = append(doc.history, doc.element)
// create a new root element wrapper // create a new root element wrapper
doc.element = NewHtmlElement(client, updated.NodeID, updated) doc.element = NewHtmlElement(client, broker, updated.NodeID, updated, innerHtml)
doc.url = "" doc.url = ""
if updated.BaseURL != nil { if updated.BaseURL != nil {
@ -241,6 +228,11 @@ func (doc *HtmlDocument) Close() error {
doc.events.Stop() doc.events.Stop()
doc.events.Close() doc.events.Close()
for _, h := range doc.history {
h.Close()
}
doc.element.Close()
doc.client.Page.Close(context.Background()) doc.client.Page.Close(context.Background())
return doc.conn.Close() return doc.conn.Close()

View File

@ -1,4 +1,4 @@
package browser package dynamic
import ( import (
"context" "context"
@ -12,7 +12,7 @@ import (
"sync" "sync"
) )
type CdpDriver struct { type Driver struct {
sync.Mutex sync.Mutex
dev *devtool.DevTools dev *devtool.DevTools
conn *rpcc.Conn conn *rpcc.Conn
@ -21,14 +21,14 @@ type CdpDriver struct {
contextID target.BrowserContextID contextID target.BrowserContextID
} }
func NewDriver(address string) *CdpDriver { func NewDriver(address string) *Driver {
drv := new(CdpDriver) drv := new(Driver)
drv.dev = devtool.New(address) drv.dev = devtool.New(address)
return drv return drv
} }
func (drv *CdpDriver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) { func (drv *Driver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) {
err := drv.init(ctx) err := drv.init(ctx)
if err != nil { if err != nil {
@ -57,7 +57,7 @@ func (drv *CdpDriver) GetDocument(ctx context.Context, url string) (values.HtmlN
return LoadHtmlDocument(ctx, conn, url) return LoadHtmlDocument(ctx, conn, url)
} }
func (drv *CdpDriver) Close() error { func (drv *Driver) Close() error {
drv.Lock() drv.Lock()
defer drv.Unlock() defer drv.Unlock()
@ -70,7 +70,7 @@ func (drv *CdpDriver) Close() error {
return nil return nil
} }
func (drv *CdpDriver) init(ctx context.Context) error { func (drv *Driver) init(ctx context.Context) error {
drv.Lock() drv.Lock()
defer drv.Unlock() defer drv.Unlock()

View File

@ -1,4 +1,4 @@
package browser package dynamic
import ( import (
"bytes" "bytes"
@ -7,30 +7,37 @@ import (
"encoding/json" "encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/events"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/mafredri/cdp" "github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/dom"
"strconv" "strconv"
"sync"
"time" "time"
) )
const DefaultTimeout = time.Second * 30 const DefaultTimeout = time.Second * 30
type HtmlElement struct { type HtmlElement struct {
sync.Mutex
client *cdp.Client client *cdp.Client
broker *events.EventBroker
connected bool
id dom.NodeID id dom.NodeID
nodeType values.Int nodeType values.Int
nodeName values.String nodeName values.String
innerHtml values.String
innerText *common.LazyValue
value string value string
attributes *values.Object attributes *common.LazyValue
children []dom.NodeID children []dom.NodeID
loadedChildren *values.Array loadedChildren *common.LazyValue
} }
func LoadElement( func LoadElement(
client *cdp.Client, client *cdp.Client,
broker *events.EventBroker,
id dom.NodeID, id dom.NodeID,
) (*HtmlElement, error) { ) (*HtmlElement, error) {
if client == nil { if client == nil {
@ -46,28 +53,70 @@ func LoadElement(
dom. dom.
NewDescribeNodeArgs(). NewDescribeNodeArgs().
SetNodeID(id). SetNodeID(id).
SetDepth(1), SetDepth(-1),
) )
if err != nil { if err != nil {
return nil, core.Error(err, strconv.Itoa(int(id))) return nil, core.Error(err, strconv.Itoa(int(id)))
} }
return NewHtmlElement(client, id, node.Node), nil innerHtml, err := client.DOM.GetOuterHTML(
ctx,
dom.NewGetOuterHTMLArgs().SetNodeID(id),
)
if err != nil {
return nil, core.Error(err, strconv.Itoa(int(id)))
}
return NewHtmlElement(
client,
broker,
id,
node.Node,
values.NewString(innerHtml.OuterHTML),
), nil
} }
func NewHtmlElement( func NewHtmlElement(
client *cdp.Client, client *cdp.Client,
broker *events.EventBroker,
id dom.NodeID, id dom.NodeID,
node dom.Node, node dom.Node,
innerHtml values.String,
) *HtmlElement { ) *HtmlElement {
el := new(HtmlElement) el := new(HtmlElement)
el.client = client el.client = client
el.broker = broker
el.connected = true
el.id = id el.id = id
el.nodeType = values.NewInt(node.NodeType) el.nodeType = values.NewInt(node.NodeType)
el.nodeName = values.NewString(node.NodeName) el.nodeName = values.NewString(node.NodeName)
el.innerHtml = innerHtml
el.innerText = common.NewLazyValue(func() (core.Value, error) {
h := el.InnerHtml()
if h == values.EmptyString {
return h, nil
}
buff := bytes.NewBuffer([]byte(h))
parsed, err := goquery.NewDocumentFromReader(buff)
if err != nil {
return values.EmptyString, err
}
return values.NewString(parsed.Text()), nil
})
el.attributes = common.NewLazyValue(func() (core.Value, error) {
return parseAttrs(node.Attributes), nil
})
el.value = "" el.value = ""
el.attributes = parseAttrs(node.Attributes) el.loadedChildren = common.NewLazyValue(func() (core.Value, error) {
return loadNodes(client, broker, el.children)
})
var childCount int var childCount int
@ -89,8 +138,6 @@ func NewHtmlElement(
} }
func (el *HtmlElement) Close() error { func (el *HtmlElement) Close() error {
// el.client = nil
return nil return nil
} }
@ -178,11 +225,23 @@ func (el *HtmlElement) NodeName() values.String {
} }
func (el *HtmlElement) GetAttributes() core.Value { func (el *HtmlElement) GetAttributes() core.Value {
return el.attributes val, err := el.attributes.Value()
if err != nil {
return values.None
}
return val
} }
func (el *HtmlElement) GetAttribute(name values.String) core.Value { func (el *HtmlElement) GetAttribute(name values.String) core.Value {
val, found := el.attributes.Get(name) attrs, err := el.attributes.Value()
if err != nil {
return values.None
}
val, found := attrs.(*values.Object).Get(name)
if !found { if !found {
return values.None return values.None
@ -192,19 +251,23 @@ func (el *HtmlElement) GetAttribute(name values.String) core.Value {
} }
func (el *HtmlElement) GetChildNodes() core.Value { func (el *HtmlElement) GetChildNodes() core.Value {
if el.loadedChildren == nil { val, err := el.loadedChildren.Value()
el.loadedChildren = loadNodes(el.client, el.children)
if err != nil {
return values.NewArray(0)
} }
return el.loadedChildren return val
} }
func (el *HtmlElement) GetChildNode(idx values.Int) core.Value { func (el *HtmlElement) GetChildNode(idx values.Int) core.Value {
if el.loadedChildren == nil { val, err := el.loadedChildren.Value()
el.loadedChildren = loadNodes(el.client, el.children)
if err != nil {
return values.None
} }
return el.loadedChildren.Get(idx) return val.(*values.Array).Get(idx)
} }
func (el *HtmlElement) QuerySelector(selector values.String) core.Value { func (el *HtmlElement) QuerySelector(selector values.String) core.Value {
@ -217,7 +280,7 @@ func (el *HtmlElement) QuerySelector(selector values.String) core.Value {
return values.None return values.None
} }
res, err := LoadElement(el.client, found.NodeID) res, err := LoadElement(el.client, el.broker, found.NodeID)
if err != nil { if err != nil {
return values.None return values.None
@ -239,7 +302,7 @@ func (el *HtmlElement) QuerySelectorAll(selector values.String) core.Value {
arr := values.NewArray(len(res.NodeIDs)) arr := values.NewArray(len(res.NodeIDs))
for _, id := range res.NodeIDs { for _, id := range res.NodeIDs {
childEl, err := LoadElement(el.client, id) childEl, err := LoadElement(el.client, el.broker, id)
if err != nil { if err != nil {
return values.None return values.None
@ -252,84 +315,23 @@ func (el *HtmlElement) QuerySelectorAll(selector values.String) core.Value {
} }
func (el *HtmlElement) InnerText() values.String { func (el *HtmlElement) InnerText() values.String {
h := el.InnerHtml() val, err := el.innerText.Value()
if h == values.EmptyString {
return h
}
buff := bytes.NewBuffer([]byte(h))
parsed, err := goquery.NewDocumentFromReader(buff)
if err != nil { if err != nil {
return values.EmptyString return values.EmptyString
} }
return values.NewString(parsed.Text()) return val.(values.String)
} }
func (el *HtmlElement) InnerHtml() values.String { func (el *HtmlElement) InnerHtml() values.String {
ctx, cancelFn := createCtx() return el.innerHtml
defer cancelFn()
res, err := el.client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(el.id))
if err != nil {
return values.EmptyString
}
return values.NewString(res.OuterHTML)
} }
func (el *HtmlElement) Click() (values.Boolean, error) { func (el *HtmlElement) Click() (values.Boolean, error) {
ctx, cancel := createCtx() ctx, cancel := contextWithTimeout()
defer cancel() defer cancel()
return events.DispatchEvent(ctx, el.client, el.id, "click") return events.DispatchEvent(ctx, el.client, el.id, "click")
} }
func createCtx() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), DefaultTimeout)
}
func parseAttrs(attrs []string) *values.Object {
var attr values.String
res := values.NewObject()
for _, el := range attrs {
str := values.NewString(el)
if common.IsAttribute(el) {
attr = str
res.Set(str, values.EmptyString)
} else {
current, ok := res.Get(attr)
if ok {
res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str))
}
}
}
return res
}
func loadNodes(client *cdp.Client, nodes []dom.NodeID) *values.Array {
arr := values.NewArray(len(nodes))
for _, id := range nodes {
child, err := LoadElement(client, id)
if err != nil {
break
}
arr.Push(child)
}
return arr
}

View File

@ -4,7 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/eval" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval"
"github.com/mafredri/cdp" "github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/runtime" "github.com/mafredri/cdp/protocol/runtime"

View File

@ -3,7 +3,7 @@ package events
import ( import (
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/eval" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval"
"github.com/mafredri/cdp" "github.com/mafredri/cdp"
"time" "time"
) )

View File

@ -0,0 +1,182 @@
package dynamic
import (
"context"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
"golang.org/x/sync/errgroup"
)
func pointerInt(input int) *int {
return &input
}
type batchFunc = func() error
func runBatch(funcs ...batchFunc) error {
eg := errgroup.Group{}
for _, f := range funcs {
eg.Go(f)
}
return eg.Wait()
}
func parseAttrs(attrs []string) *values.Object {
var attr values.String
res := values.NewObject()
for _, el := range attrs {
str := values.NewString(el)
if common.IsAttribute(el) {
attr = str
res.Set(str, values.EmptyString)
} else {
current, ok := res.Get(attr)
if ok {
res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str))
}
}
}
return res
}
func loadNodes(client *cdp.Client, broker *events.EventBroker, nodes []dom.NodeID) (*values.Array, error) {
arr := values.NewArray(len(nodes))
for _, id := range nodes {
child, err := LoadElement(client, broker, id)
if err != nil {
return nil, err
}
arr.Push(child)
}
return arr, nil
}
func contextWithTimeout() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), DefaultTimeout)
}
func waitForLoadEvent(ctx context.Context, client *cdp.Client) error {
loadEventFired, err := client.Page.LoadEventFired(ctx)
if err != nil {
return err
}
_, err = loadEventFired.Recv()
if err != nil {
return err
}
return loadEventFired.Close()
}
func createEventBroker(client *cdp.Client) (*events.EventBroker, error) {
ctx := context.Background()
load, err := client.Page.LoadEventFired(ctx)
if err != nil {
return nil, err
}
broker := events.NewEventBroker()
broker.AddEventStream("load", load, func() interface{} {
return new(page.LoadEventFiredReply)
})
err = broker.Start()
if err != nil {
broker.Close()
return nil, err
}
destroy, err := client.DOM.DocumentUpdated(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("reload", destroy, func() interface{} {
return new(dom.DocumentUpdatedReply)
})
attrModified, err := client.DOM.AttributeModified(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("attr:modified", attrModified, func() interface{} {
return new(dom.AttributeModifiedReply)
})
attrRemoved, err := client.DOM.AttributeRemoved(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("attr:removed", attrRemoved, func() interface{} {
return new(dom.AttributeRemovedReply)
})
childrenCount, err := client.DOM.ChildNodeCountUpdated(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("children:count", childrenCount, func() interface{} {
return new(dom.ChildNodeCountUpdatedReply)
})
childrenInsert, err := client.DOM.ChildNodeInserted(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("children:inserted", childrenInsert, func() interface{} {
return new(dom.ChildNodeInsertedReply)
})
childDeleted, err := client.DOM.ChildNodeRemoved(ctx)
if err != nil {
broker.Close()
return nil, err
}
broker.AddEventStream("children:deleted", childDeleted, func() interface{} {
return new(dom.ChildNodeRemovedReply)
})
return broker, nil
}

View File

@ -1,4 +1,4 @@
package http package static
import ( import (
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"

View File

@ -1,8 +1,8 @@
package http_test package static_test
import ( import (
"bytes" "bytes"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/http" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/static"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
. "github.com/smartystreets/goconvey/convey" . "github.com/smartystreets/goconvey/convey"
"testing" "testing"
@ -228,7 +228,7 @@ func TestDocument(t *testing.T) {
So(err, ShouldBeNil) So(err, ShouldBeNil)
el, err := http.NewHtmlElement(doc.Selection) el, err := static.NewHtmlElement(doc.Selection)
So(err, ShouldBeNil) So(err, ShouldBeNil)

View File

@ -1,4 +1,4 @@
package http package static
import ( import (
"crypto/sha512" "crypto/sha512"

View File

@ -1,8 +1,8 @@
package http_test package static_test
import ( import (
"bytes" "bytes"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/http" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/static"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
. "github.com/smartystreets/goconvey/convey" . "github.com/smartystreets/goconvey/convey"
"testing" "testing"
@ -250,7 +250,7 @@ func TestElement(t *testing.T) {
So(err, ShouldBeNil) So(err, ShouldBeNil)
el, err := http.NewHtmlElement(doc.Find("body")) el, err := static.NewHtmlElement(doc.Find("body"))
So(err, ShouldBeNil) So(err, ShouldBeNil)
@ -266,7 +266,7 @@ func TestElement(t *testing.T) {
So(err, ShouldBeNil) So(err, ShouldBeNil)
el, err := http.NewHtmlElement(doc.Find("body")) el, err := static.NewHtmlElement(doc.Find("body"))
So(err, ShouldBeNil) So(err, ShouldBeNil)
@ -290,7 +290,7 @@ func TestElement(t *testing.T) {
So(err, ShouldBeNil) So(err, ShouldBeNil)
el, err := http.NewHtmlElement(doc.Find("body")) el, err := static.NewHtmlElement(doc.Find("body"))
So(err, ShouldBeNil) So(err, ShouldBeNil)
@ -315,7 +315,7 @@ func TestElement(t *testing.T) {
So(err, ShouldBeNil) So(err, ShouldBeNil)
el, err := http.NewHtmlElement(doc.Find("#q")) el, err := static.NewHtmlElement(doc.Find("#q"))
So(err, ShouldBeNil) So(err, ShouldBeNil)
@ -342,7 +342,7 @@ func TestElement(t *testing.T) {
So(err, ShouldBeNil) So(err, ShouldBeNil)
el, err := http.NewHtmlElement(doc.Find("h2")) el, err := static.NewHtmlElement(doc.Find("h2"))
So(err, ShouldBeNil) So(err, ShouldBeNil)
@ -369,7 +369,7 @@ func TestElement(t *testing.T) {
So(err, ShouldBeNil) So(err, ShouldBeNil)
el, err := http.NewHtmlElement(doc.Find("#content")) el, err := static.NewHtmlElement(doc.Find("#content"))
So(err, ShouldBeNil) So(err, ShouldBeNil)
@ -385,7 +385,7 @@ func TestElement(t *testing.T) {
So(err, ShouldBeNil) So(err, ShouldBeNil)
el, err := http.NewHtmlElement(doc.Find("body .card-img-top:nth-child(1)")) el, err := static.NewHtmlElement(doc.Find("body .card-img-top:nth-child(1)"))
So(err, ShouldBeNil) So(err, ShouldBeNil)

View File

@ -1,4 +1,4 @@
package http package static
import "github.com/sethgrid/pester" import "github.com/sethgrid/pester"

View File

@ -1,4 +1,4 @@
package http package static
import ( import (
"bytes" "bytes"
@ -11,11 +11,11 @@ import (
httpx "net/http" httpx "net/http"
) )
type HttpDriver struct { type Driver struct {
client *pester.Client client *pester.Client
} }
func NewDriver(setters ...Option) *HttpDriver { func NewDriver(setters ...Option) *Driver {
client := pester.New() client := pester.New()
client.Concurrency = 3 client.Concurrency = 3
client.MaxRetries = 5 client.MaxRetries = 5
@ -25,10 +25,10 @@ func NewDriver(setters ...Option) *HttpDriver {
setter(client) setter(client)
} }
return &HttpDriver{client} return &Driver{client}
} }
func (d *HttpDriver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) { func (d *Driver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) {
req, err := httpx.NewRequest(httpx.MethodGet, url, nil) req, err := httpx.NewRequest(httpx.MethodGet, url, nil)
if err != nil { if err != nil {
@ -58,7 +58,7 @@ func (d *HttpDriver) GetDocument(ctx context.Context, url string) (values.HtmlNo
return NewHtmlDocument(url, doc) return NewHtmlDocument(url, doc)
} }
func (d *HttpDriver) ParseDocument(ctx context.Context, str string) (values.HtmlNode, error) { func (d *Driver) ParseDocument(ctx context.Context, str string) (values.HtmlNode, error) {
buf := bytes.NewBuffer([]byte(str)) buf := bytes.NewBuffer([]byte(str))
doc, err := goquery.NewDocumentFromReader(buf) doc, err := goquery.NewDocumentFromReader(buf)
@ -70,7 +70,7 @@ func (d *HttpDriver) ParseDocument(ctx context.Context, str string) (values.Html
return NewHtmlDocument("#string", doc) return NewHtmlDocument("#string", doc)
} }
func (d *HttpDriver) Close() error { func (d *Driver) Close() error {
d.client = nil d.client = nil
return nil return nil

View File

@ -4,7 +4,7 @@ import (
"context" "context"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic"
) )
func WaitElement(_ context.Context, args ...core.Value) (core.Value, error) { func WaitElement(_ context.Context, args ...core.Value) (core.Value, error) {
@ -30,7 +30,7 @@ func WaitElement(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err return values.None, err
} }
doc, ok := arg.(*browser.HtmlDocument) doc, ok := arg.(*dynamic.HtmlDocument)
if !ok { if !ok {
return values.False, core.Error(core.ErrInvalidType, "expected dynamic document") return values.False, core.Error(core.ErrInvalidType, "expected dynamic document")
@ -52,7 +52,7 @@ func WaitNavigation(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err return values.None, err
} }
doc, ok := args[0].(*browser.HtmlDocument) doc, ok := args[0].(*dynamic.HtmlDocument)
if !ok { if !ok {
return values.None, core.Error(core.ErrInvalidType, "expected dynamic document") return values.None, core.Error(core.ErrInvalidType, "expected dynamic document")