diff --git a/cmd/main.go b/cmd/main.go index 4d4fbcc3..d3b63d42 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -1,4 +1,4 @@ -package cmd +package main import ( "bufio" diff --git a/pkg/runtime/program.go b/pkg/runtime/program.go index e135858b..5bc475e6 100644 --- a/pkg/runtime/program.go +++ b/pkg/runtime/program.go @@ -27,8 +27,8 @@ func (p *Program) Run(ctx context.Context, setters ...Option) ([]byte, error) { } ctx = opts.withContext(ctx) - ctx = driver.WithCdpDriver(ctx, opts.cdp) - ctx = driver.WithHttpDriver(ctx) + ctx = driver.WithDynamicDriver(ctx, opts.cdp) + ctx = driver.WithStaticDriver(ctx) out, err := p.exp.Exec(ctx, scope) diff --git a/pkg/stdlib/html/actions.go b/pkg/stdlib/html/actions.go index 7a7e5b39..7e4525ff 100644 --- a/pkg/stdlib/html/actions.go +++ b/pkg/stdlib/html/actions.go @@ -4,7 +4,7 @@ import ( "context" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic" ) /* @@ -29,7 +29,7 @@ func Click(_ context.Context, args ...core.Value) (core.Value, error) { return values.False, err } - el, ok := arg1.(*browser.HtmlElement) + el, ok := arg1.(*dynamic.HtmlElement) if !ok { return values.False, core.Error(core.ErrInvalidType, "expected dynamic element") @@ -47,7 +47,7 @@ func Click(_ context.Context, args ...core.Value) (core.Value, error) { return values.None, err } - doc, ok := arg1.(*browser.HtmlDocument) + doc, ok := arg1.(*dynamic.HtmlDocument) if !ok { return values.False, core.Error(core.ErrInvalidType, "expected dynamic document") @@ -83,7 +83,7 @@ func Navigate(_ context.Context, args ...core.Value) (core.Value, error) { return values.None, err } - doc, ok := args[0].(*browser.HtmlDocument) + doc, ok := args[0].(*dynamic.HtmlDocument) if !ok { return values.False, core.Error(core.ErrInvalidType, "expected dynamic document") diff --git a/pkg/stdlib/html/document.go b/pkg/stdlib/html/document.go index 3d7a7bf3..fa17ce69 100644 --- a/pkg/stdlib/html/document.go +++ b/pkg/stdlib/html/document.go @@ -5,11 +5,11 @@ import ( "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/stdlib/html/driver" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/http" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/static" ) func Document(ctx context.Context, inputs ...core.Value) (core.Value, error) { - url, js, err := documentArgs(inputs) + url, dynamic, err := documentArgs(inputs) if err != nil { return values.None, err @@ -17,10 +17,10 @@ func Document(ctx context.Context, inputs ...core.Value) (core.Value, error) { var drv driver.Driver - if !js { - drv, err = driver.FromContext(ctx, driver.Http) + if !dynamic { + drv, err = driver.FromContext(ctx, driver.Static) } else { - drv, err = driver.FromContext(ctx, driver.Cdp) + drv, err = driver.FromContext(ctx, driver.Dynamic) } if err != nil { @@ -43,13 +43,13 @@ func DocumentParse(ctx context.Context, inputs ...core.Value) (core.Value, error return arg1, core.Error(core.TypeError(a1.Type(), core.StringType), "arg 1") } - drv, err := driver.FromContext(ctx, driver.Http) + drv, err := driver.FromContext(ctx, driver.Static) if err != nil { return values.None, err } - return drv.(*http.HttpDriver).ParseDocument(ctx, arg1.String()) + return drv.(*static.Driver).ParseDocument(ctx, arg1.String()) } func documentArgs(inputs []core.Value) (values.String, values.Boolean, error) { diff --git a/pkg/stdlib/html/driver/browser/helpers.go b/pkg/stdlib/html/driver/browser/helpers.go deleted file mode 100644 index 9e6eaf94..00000000 --- a/pkg/stdlib/html/driver/browser/helpers.go +++ /dev/null @@ -1,43 +0,0 @@ -package browser - -import ( - "context" - "github.com/mafredri/cdp" - "golang.org/x/sync/errgroup" -) - -func pointerInt(input int) *int { - return &input -} - -type batchFunc = func() error - -func runBatch(funcs ...batchFunc) error { - eg := errgroup.Group{} - - for _, f := range funcs { - eg.Go(f) - } - - return eg.Wait() -} - -func contextWithTimeout() (context.Context, context.CancelFunc) { - return context.WithTimeout(context.Background(), DefaultTimeout) -} - -func waitForLoadEvent(ctx context.Context, client *cdp.Client) error { - loadEventFired, err := client.Page.LoadEventFired(ctx) - - if err != nil { - return err - } - - _, err = loadEventFired.Recv() - - if err != nil { - return err - } - - return loadEventFired.Close() -} diff --git a/pkg/stdlib/html/driver/common/lazy.go b/pkg/stdlib/html/driver/common/lazy.go new file mode 100644 index 00000000..1915f553 --- /dev/null +++ b/pkg/stdlib/html/driver/common/lazy.go @@ -0,0 +1,56 @@ +package common + +import ( + "github.com/MontFerret/ferret/pkg/runtime/core" + "github.com/MontFerret/ferret/pkg/runtime/values" + "sync" +) + +type ( + LazyFactory func() (core.Value, error) + + LazyValue struct { + sync.Mutex + factory LazyFactory + ready bool + value core.Value + err error + } +) + +func NewLazyValue(factory LazyFactory) *LazyValue { + lz := new(LazyValue) + lz.ready = false + lz.factory = factory + lz.value = values.None + + return lz +} + +func (lv *LazyValue) Value() (core.Value, error) { + lv.Lock() + defer lv.Unlock() + + if !lv.ready { + val, err := lv.factory() + + if err == nil { + lv.value = val + lv.err = nil + } else { + lv.value = values.None + lv.err = err + } + } + + return lv.value, lv.err +} + +func (lv *LazyValue) Reset() { + lv.Lock() + defer lv.Unlock() + + lv.ready = false + lv.value = values.None + lv.err = nil +} diff --git a/pkg/stdlib/html/driver/driver.go b/pkg/stdlib/html/driver/driver.go index fdb7174f..b8926ac5 100644 --- a/pkg/stdlib/html/driver/driver.go +++ b/pkg/stdlib/html/driver/driver.go @@ -5,23 +5,27 @@ import ( "fmt" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/http" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/static" ) -const Cdp = "cdp" -const Http = "http" +type DriverName string + +const ( + Dynamic DriverName = "dynamic" + Static DriverName = "static" +) type Driver interface { GetDocument(ctx context.Context, url string) (values.HtmlNode, error) Close() error } -func ToContext(ctx context.Context, name string, drv Driver) context.Context { +func ToContext(ctx context.Context, name DriverName, drv Driver) context.Context { return context.WithValue(ctx, name, drv) } -func FromContext(ctx context.Context, name string) (Driver, error) { +func FromContext(ctx context.Context, name DriverName) (Driver, error) { val := ctx.Value(name) drv, ok := val.(Driver) @@ -33,10 +37,10 @@ func FromContext(ctx context.Context, name string) (Driver, error) { return nil, core.Error(core.ErrNotFound, fmt.Sprintf("%s driver", name)) } -func WithCdpDriver(ctx context.Context, addr string) context.Context { - return context.WithValue(ctx, Cdp, browser.NewDriver(addr)) +func WithDynamicDriver(ctx context.Context, addr string) context.Context { + return context.WithValue(ctx, Dynamic, dynamic.NewDriver(addr)) } -func WithHttpDriver(ctx context.Context, opts ...http.Option) context.Context { - return context.WithValue(ctx, Http, http.NewDriver(opts...)) +func WithStaticDriver(ctx context.Context, opts ...static.Option) context.Context { + return context.WithValue(ctx, Static, static.NewDriver(opts...)) } diff --git a/pkg/stdlib/html/driver/browser/document.go b/pkg/stdlib/html/driver/dynamic/document.go similarity index 85% rename from pkg/stdlib/html/driver/browser/document.go rename to pkg/stdlib/html/driver/dynamic/document.go index 4911c653..28c2e525 100644 --- a/pkg/stdlib/html/driver/browser/document.go +++ b/pkg/stdlib/html/driver/dynamic/document.go @@ -1,4 +1,4 @@ -package browser +package dynamic import ( "context" @@ -6,8 +6,8 @@ import ( "fmt" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/eval" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/events" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events" "github.com/corpix/uarand" "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/dom" @@ -27,6 +27,7 @@ type HtmlDocument struct { events *events.EventBroker url string element *HtmlElement + history []*HtmlElement } func LoadHtmlDocument( @@ -82,7 +83,7 @@ func LoadHtmlDocument( return nil, err } - root, err := getRootElement(client) + root, innerHtml, err := getRootElement(client) if err != nil { return nil, err @@ -94,56 +95,42 @@ func LoadHtmlDocument( return nil, err } - return NewHtmlDocument(conn, client, root, broker), nil + return NewHtmlDocument(conn, client, broker, root, innerHtml), nil } -func getRootElement(client *cdp.Client) (dom.Node, error) { +func getRootElement(client *cdp.Client) (dom.Node, values.String, error) { args := dom.NewGetDocumentArgs() args.Depth = pointerInt(1) // lets load the entire document + ctx := context.Background() - d, err := client.DOM.GetDocument(context.Background(), args) + d, err := client.DOM.GetDocument(ctx, args) if err != nil { - return dom.Node{}, err + return dom.Node{}, values.EmptyString, err } - return d.Root, nil -} - -func createEventBroker(client *cdp.Client) (*events.EventBroker, error) { - load, err := client.Page.LoadEventFired(context.Background()) + innerHtml, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID)) if err != nil { - return nil, err + return dom.Node{}, values.EmptyString, err } - broker := events.NewEventBroker() - broker.AddEventStream("load", load, func() interface{} { - return new(page.LoadEventFiredReply) - }) - - err = broker.Start() - - if err != nil { - broker.Close() - - return nil, err - } - - return broker, nil + return d.Root, values.NewString(innerHtml.OuterHTML), nil } func NewHtmlDocument( conn *rpcc.Conn, client *cdp.Client, - root dom.Node, broker *events.EventBroker, + root dom.Node, + innerHtml values.String, ) *HtmlDocument { doc := new(HtmlDocument) doc.conn = conn doc.client = client doc.events = broker - doc.element = NewHtmlElement(client, root.NodeID, root) + doc.element = NewHtmlElement(client, broker, root.NodeID, root, innerHtml) + doc.history = make([]*HtmlElement, 0, 10) doc.url = "" if root.BaseURL != nil { @@ -154,18 +141,18 @@ func NewHtmlDocument( doc.Lock() defer doc.Unlock() - updated, err := getRootElement(client) + updated, innerHtml, err := getRootElement(client) if err != nil { // TODO: We need somehow log all errors outside of stdout return } - // close an old root element - doc.element.Close() + // put the root element in a history list, since it might be still used + doc.history = append(doc.history, doc.element) // create a new root element wrapper - doc.element = NewHtmlElement(client, updated.NodeID, updated) + doc.element = NewHtmlElement(client, broker, updated.NodeID, updated, innerHtml) doc.url = "" if updated.BaseURL != nil { @@ -241,6 +228,11 @@ func (doc *HtmlDocument) Close() error { doc.events.Stop() doc.events.Close() + for _, h := range doc.history { + h.Close() + } + + doc.element.Close() doc.client.Page.Close(context.Background()) return doc.conn.Close() diff --git a/pkg/stdlib/html/driver/browser/cdp.go b/pkg/stdlib/html/driver/dynamic/dynamic.go similarity index 87% rename from pkg/stdlib/html/driver/browser/cdp.go rename to pkg/stdlib/html/driver/dynamic/dynamic.go index e6092f03..2cd54da7 100644 --- a/pkg/stdlib/html/driver/browser/cdp.go +++ b/pkg/stdlib/html/driver/dynamic/dynamic.go @@ -1,4 +1,4 @@ -package browser +package dynamic import ( "context" @@ -12,7 +12,7 @@ import ( "sync" ) -type CdpDriver struct { +type Driver struct { sync.Mutex dev *devtool.DevTools conn *rpcc.Conn @@ -21,14 +21,14 @@ type CdpDriver struct { contextID target.BrowserContextID } -func NewDriver(address string) *CdpDriver { - drv := new(CdpDriver) +func NewDriver(address string) *Driver { + drv := new(Driver) drv.dev = devtool.New(address) return drv } -func (drv *CdpDriver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) { +func (drv *Driver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) { err := drv.init(ctx) if err != nil { @@ -57,7 +57,7 @@ func (drv *CdpDriver) GetDocument(ctx context.Context, url string) (values.HtmlN return LoadHtmlDocument(ctx, conn, url) } -func (drv *CdpDriver) Close() error { +func (drv *Driver) Close() error { drv.Lock() defer drv.Unlock() @@ -70,7 +70,7 @@ func (drv *CdpDriver) Close() error { return nil } -func (drv *CdpDriver) init(ctx context.Context) error { +func (drv *Driver) init(ctx context.Context) error { drv.Lock() defer drv.Unlock() diff --git a/pkg/stdlib/html/driver/browser/element.go b/pkg/stdlib/html/driver/dynamic/element.go similarity index 69% rename from pkg/stdlib/html/driver/browser/element.go rename to pkg/stdlib/html/driver/dynamic/element.go index 83bf4281..adc4f212 100644 --- a/pkg/stdlib/html/driver/browser/element.go +++ b/pkg/stdlib/html/driver/dynamic/element.go @@ -1,4 +1,4 @@ -package browser +package dynamic import ( "bytes" @@ -7,30 +7,37 @@ import ( "encoding/json" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/events" "github.com/MontFerret/ferret/pkg/stdlib/html/driver/common" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events" "github.com/PuerkitoBio/goquery" "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/dom" "strconv" + "sync" "time" ) const DefaultTimeout = time.Second * 30 type HtmlElement struct { + sync.Mutex client *cdp.Client + broker *events.EventBroker + connected bool id dom.NodeID nodeType values.Int nodeName values.String + innerHtml values.String + innerText *common.LazyValue value string - attributes *values.Object + attributes *common.LazyValue children []dom.NodeID - loadedChildren *values.Array + loadedChildren *common.LazyValue } func LoadElement( client *cdp.Client, + broker *events.EventBroker, id dom.NodeID, ) (*HtmlElement, error) { if client == nil { @@ -46,28 +53,70 @@ func LoadElement( dom. NewDescribeNodeArgs(). SetNodeID(id). - SetDepth(1), + SetDepth(-1), ) if err != nil { return nil, core.Error(err, strconv.Itoa(int(id))) } - return NewHtmlElement(client, id, node.Node), nil + innerHtml, err := client.DOM.GetOuterHTML( + ctx, + dom.NewGetOuterHTMLArgs().SetNodeID(id), + ) + + if err != nil { + return nil, core.Error(err, strconv.Itoa(int(id))) + } + + return NewHtmlElement( + client, + broker, + id, + node.Node, + values.NewString(innerHtml.OuterHTML), + ), nil } func NewHtmlElement( client *cdp.Client, + broker *events.EventBroker, id dom.NodeID, node dom.Node, + innerHtml values.String, ) *HtmlElement { el := new(HtmlElement) el.client = client + el.broker = broker + el.connected = true el.id = id el.nodeType = values.NewInt(node.NodeType) el.nodeName = values.NewString(node.NodeName) + el.innerHtml = innerHtml + el.innerText = common.NewLazyValue(func() (core.Value, error) { + h := el.InnerHtml() + + if h == values.EmptyString { + return h, nil + } + + buff := bytes.NewBuffer([]byte(h)) + + parsed, err := goquery.NewDocumentFromReader(buff) + + if err != nil { + return values.EmptyString, err + } + + return values.NewString(parsed.Text()), nil + }) + el.attributes = common.NewLazyValue(func() (core.Value, error) { + return parseAttrs(node.Attributes), nil + }) el.value = "" - el.attributes = parseAttrs(node.Attributes) + el.loadedChildren = common.NewLazyValue(func() (core.Value, error) { + return loadNodes(client, broker, el.children) + }) var childCount int @@ -89,8 +138,6 @@ func NewHtmlElement( } func (el *HtmlElement) Close() error { - // el.client = nil - return nil } @@ -178,11 +225,23 @@ func (el *HtmlElement) NodeName() values.String { } func (el *HtmlElement) GetAttributes() core.Value { - return el.attributes + val, err := el.attributes.Value() + + if err != nil { + return values.None + } + + return val } func (el *HtmlElement) GetAttribute(name values.String) core.Value { - val, found := el.attributes.Get(name) + attrs, err := el.attributes.Value() + + if err != nil { + return values.None + } + + val, found := attrs.(*values.Object).Get(name) if !found { return values.None @@ -192,19 +251,23 @@ func (el *HtmlElement) GetAttribute(name values.String) core.Value { } func (el *HtmlElement) GetChildNodes() core.Value { - if el.loadedChildren == nil { - el.loadedChildren = loadNodes(el.client, el.children) + val, err := el.loadedChildren.Value() + + if err != nil { + return values.NewArray(0) } - return el.loadedChildren + return val } func (el *HtmlElement) GetChildNode(idx values.Int) core.Value { - if el.loadedChildren == nil { - el.loadedChildren = loadNodes(el.client, el.children) + val, err := el.loadedChildren.Value() + + if err != nil { + return values.None } - return el.loadedChildren.Get(idx) + return val.(*values.Array).Get(idx) } func (el *HtmlElement) QuerySelector(selector values.String) core.Value { @@ -217,7 +280,7 @@ func (el *HtmlElement) QuerySelector(selector values.String) core.Value { return values.None } - res, err := LoadElement(el.client, found.NodeID) + res, err := LoadElement(el.client, el.broker, found.NodeID) if err != nil { return values.None @@ -239,7 +302,7 @@ func (el *HtmlElement) QuerySelectorAll(selector values.String) core.Value { arr := values.NewArray(len(res.NodeIDs)) for _, id := range res.NodeIDs { - childEl, err := LoadElement(el.client, id) + childEl, err := LoadElement(el.client, el.broker, id) if err != nil { return values.None @@ -252,84 +315,23 @@ func (el *HtmlElement) QuerySelectorAll(selector values.String) core.Value { } func (el *HtmlElement) InnerText() values.String { - h := el.InnerHtml() - - if h == values.EmptyString { - return h - } - - buff := bytes.NewBuffer([]byte(h)) - - parsed, err := goquery.NewDocumentFromReader(buff) + val, err := el.innerText.Value() if err != nil { return values.EmptyString } - return values.NewString(parsed.Text()) + return val.(values.String) } func (el *HtmlElement) InnerHtml() values.String { - ctx, cancelFn := createCtx() - - defer cancelFn() - - res, err := el.client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(el.id)) - - if err != nil { - return values.EmptyString - } - - return values.NewString(res.OuterHTML) + return el.innerHtml } func (el *HtmlElement) Click() (values.Boolean, error) { - ctx, cancel := createCtx() + ctx, cancel := contextWithTimeout() defer cancel() return events.DispatchEvent(ctx, el.client, el.id, "click") } - -func createCtx() (context.Context, context.CancelFunc) { - return context.WithTimeout(context.Background(), DefaultTimeout) -} - -func parseAttrs(attrs []string) *values.Object { - var attr values.String - - res := values.NewObject() - - for _, el := range attrs { - str := values.NewString(el) - - if common.IsAttribute(el) { - attr = str - res.Set(str, values.EmptyString) - } else { - current, ok := res.Get(attr) - - if ok { - res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str)) - } - } - } - - return res -} - -func loadNodes(client *cdp.Client, nodes []dom.NodeID) *values.Array { - arr := values.NewArray(len(nodes)) - - for _, id := range nodes { - child, err := LoadElement(client, id) - - if err != nil { - break - } - - arr.Push(child) - } - - return arr -} diff --git a/pkg/stdlib/html/driver/browser/eval/eval.go b/pkg/stdlib/html/driver/dynamic/eval/eval.go similarity index 100% rename from pkg/stdlib/html/driver/browser/eval/eval.go rename to pkg/stdlib/html/driver/dynamic/eval/eval.go diff --git a/pkg/stdlib/html/driver/browser/events/broker.go b/pkg/stdlib/html/driver/dynamic/events/broker.go similarity index 100% rename from pkg/stdlib/html/driver/browser/events/broker.go rename to pkg/stdlib/html/driver/dynamic/events/broker.go diff --git a/pkg/stdlib/html/driver/browser/events/dispatch.go b/pkg/stdlib/html/driver/dynamic/events/dispatch.go similarity index 95% rename from pkg/stdlib/html/driver/browser/events/dispatch.go rename to pkg/stdlib/html/driver/dynamic/events/dispatch.go index 5bd91de3..fb4d1345 100644 --- a/pkg/stdlib/html/driver/browser/events/dispatch.go +++ b/pkg/stdlib/html/driver/dynamic/events/dispatch.go @@ -4,7 +4,7 @@ import ( "context" "fmt" "github.com/MontFerret/ferret/pkg/runtime/values" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/eval" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval" "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/runtime" diff --git a/pkg/stdlib/html/driver/browser/events/wait.go b/pkg/stdlib/html/driver/dynamic/events/wait.go similarity index 94% rename from pkg/stdlib/html/driver/browser/events/wait.go rename to pkg/stdlib/html/driver/dynamic/events/wait.go index 2db6796c..aa709ac9 100644 --- a/pkg/stdlib/html/driver/browser/events/wait.go +++ b/pkg/stdlib/html/driver/dynamic/events/wait.go @@ -3,7 +3,7 @@ package events import ( "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser/eval" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval" "github.com/mafredri/cdp" "time" ) diff --git a/pkg/stdlib/html/driver/dynamic/helpers.go b/pkg/stdlib/html/driver/dynamic/helpers.go new file mode 100644 index 00000000..61d592b2 --- /dev/null +++ b/pkg/stdlib/html/driver/dynamic/helpers.go @@ -0,0 +1,182 @@ +package dynamic + +import ( + "context" + "github.com/MontFerret/ferret/pkg/runtime/values" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/common" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events" + "github.com/mafredri/cdp" + "github.com/mafredri/cdp/protocol/dom" + "github.com/mafredri/cdp/protocol/page" + "golang.org/x/sync/errgroup" +) + +func pointerInt(input int) *int { + return &input +} + +type batchFunc = func() error + +func runBatch(funcs ...batchFunc) error { + eg := errgroup.Group{} + + for _, f := range funcs { + eg.Go(f) + } + + return eg.Wait() +} + +func parseAttrs(attrs []string) *values.Object { + var attr values.String + + res := values.NewObject() + + for _, el := range attrs { + str := values.NewString(el) + + if common.IsAttribute(el) { + attr = str + res.Set(str, values.EmptyString) + } else { + current, ok := res.Get(attr) + + if ok { + res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str)) + } + } + } + + return res +} + +func loadNodes(client *cdp.Client, broker *events.EventBroker, nodes []dom.NodeID) (*values.Array, error) { + arr := values.NewArray(len(nodes)) + + for _, id := range nodes { + child, err := LoadElement(client, broker, id) + + if err != nil { + return nil, err + } + + arr.Push(child) + } + + return arr, nil +} + +func contextWithTimeout() (context.Context, context.CancelFunc) { + return context.WithTimeout(context.Background(), DefaultTimeout) +} + +func waitForLoadEvent(ctx context.Context, client *cdp.Client) error { + loadEventFired, err := client.Page.LoadEventFired(ctx) + + if err != nil { + return err + } + + _, err = loadEventFired.Recv() + + if err != nil { + return err + } + + return loadEventFired.Close() +} + +func createEventBroker(client *cdp.Client) (*events.EventBroker, error) { + ctx := context.Background() + load, err := client.Page.LoadEventFired(ctx) + + if err != nil { + return nil, err + } + + broker := events.NewEventBroker() + broker.AddEventStream("load", load, func() interface{} { + return new(page.LoadEventFiredReply) + }) + + err = broker.Start() + + if err != nil { + broker.Close() + + return nil, err + } + + destroy, err := client.DOM.DocumentUpdated(ctx) + + if err != nil { + broker.Close() + return nil, err + } + + broker.AddEventStream("reload", destroy, func() interface{} { + return new(dom.DocumentUpdatedReply) + }) + + attrModified, err := client.DOM.AttributeModified(ctx) + + if err != nil { + broker.Close() + + return nil, err + } + + broker.AddEventStream("attr:modified", attrModified, func() interface{} { + return new(dom.AttributeModifiedReply) + }) + + attrRemoved, err := client.DOM.AttributeRemoved(ctx) + + if err != nil { + broker.Close() + + return nil, err + } + + broker.AddEventStream("attr:removed", attrRemoved, func() interface{} { + return new(dom.AttributeRemovedReply) + }) + + childrenCount, err := client.DOM.ChildNodeCountUpdated(ctx) + + if err != nil { + broker.Close() + + return nil, err + } + + broker.AddEventStream("children:count", childrenCount, func() interface{} { + return new(dom.ChildNodeCountUpdatedReply) + }) + + childrenInsert, err := client.DOM.ChildNodeInserted(ctx) + + if err != nil { + broker.Close() + + return nil, err + } + + broker.AddEventStream("children:inserted", childrenInsert, func() interface{} { + return new(dom.ChildNodeInsertedReply) + }) + + childDeleted, err := client.DOM.ChildNodeRemoved(ctx) + + if err != nil { + broker.Close() + + return nil, err + } + + broker.AddEventStream("children:deleted", childDeleted, func() interface{} { + return new(dom.ChildNodeRemovedReply) + }) + + return broker, nil +} diff --git a/pkg/stdlib/html/driver/http/document.go b/pkg/stdlib/html/driver/static/document.go similarity index 98% rename from pkg/stdlib/html/driver/http/document.go rename to pkg/stdlib/html/driver/static/document.go index 22b167c9..f7e12d85 100644 --- a/pkg/stdlib/html/driver/http/document.go +++ b/pkg/stdlib/html/driver/static/document.go @@ -1,4 +1,4 @@ -package http +package static import ( "github.com/MontFerret/ferret/pkg/runtime/core" diff --git a/pkg/stdlib/html/driver/http/document_test.go b/pkg/stdlib/html/driver/static/document_test.go similarity index 99% rename from pkg/stdlib/html/driver/http/document_test.go rename to pkg/stdlib/html/driver/static/document_test.go index bda286aa..9774f336 100644 --- a/pkg/stdlib/html/driver/http/document_test.go +++ b/pkg/stdlib/html/driver/static/document_test.go @@ -1,8 +1,8 @@ -package http_test +package static_test import ( "bytes" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/http" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/static" "github.com/PuerkitoBio/goquery" . "github.com/smartystreets/goconvey/convey" "testing" @@ -228,7 +228,7 @@ func TestDocument(t *testing.T) { So(err, ShouldBeNil) - el, err := http.NewHtmlElement(doc.Selection) + el, err := static.NewHtmlElement(doc.Selection) So(err, ShouldBeNil) diff --git a/pkg/stdlib/html/driver/http/element.go b/pkg/stdlib/html/driver/static/element.go similarity index 99% rename from pkg/stdlib/html/driver/http/element.go rename to pkg/stdlib/html/driver/static/element.go index abf16117..48e924df 100644 --- a/pkg/stdlib/html/driver/http/element.go +++ b/pkg/stdlib/html/driver/static/element.go @@ -1,4 +1,4 @@ -package http +package static import ( "crypto/sha512" diff --git a/pkg/stdlib/html/driver/http/element_test.go b/pkg/stdlib/html/driver/static/element_test.go similarity index 97% rename from pkg/stdlib/html/driver/http/element_test.go rename to pkg/stdlib/html/driver/static/element_test.go index 9f999e7d..79421ab1 100644 --- a/pkg/stdlib/html/driver/http/element_test.go +++ b/pkg/stdlib/html/driver/static/element_test.go @@ -1,8 +1,8 @@ -package http_test +package static_test import ( "bytes" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/http" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/static" "github.com/PuerkitoBio/goquery" . "github.com/smartystreets/goconvey/convey" "testing" @@ -250,7 +250,7 @@ func TestElement(t *testing.T) { So(err, ShouldBeNil) - el, err := http.NewHtmlElement(doc.Find("body")) + el, err := static.NewHtmlElement(doc.Find("body")) So(err, ShouldBeNil) @@ -266,7 +266,7 @@ func TestElement(t *testing.T) { So(err, ShouldBeNil) - el, err := http.NewHtmlElement(doc.Find("body")) + el, err := static.NewHtmlElement(doc.Find("body")) So(err, ShouldBeNil) @@ -290,7 +290,7 @@ func TestElement(t *testing.T) { So(err, ShouldBeNil) - el, err := http.NewHtmlElement(doc.Find("body")) + el, err := static.NewHtmlElement(doc.Find("body")) So(err, ShouldBeNil) @@ -315,7 +315,7 @@ func TestElement(t *testing.T) { So(err, ShouldBeNil) - el, err := http.NewHtmlElement(doc.Find("#q")) + el, err := static.NewHtmlElement(doc.Find("#q")) So(err, ShouldBeNil) @@ -342,7 +342,7 @@ func TestElement(t *testing.T) { So(err, ShouldBeNil) - el, err := http.NewHtmlElement(doc.Find("h2")) + el, err := static.NewHtmlElement(doc.Find("h2")) So(err, ShouldBeNil) @@ -369,7 +369,7 @@ func TestElement(t *testing.T) { So(err, ShouldBeNil) - el, err := http.NewHtmlElement(doc.Find("#content")) + el, err := static.NewHtmlElement(doc.Find("#content")) So(err, ShouldBeNil) @@ -385,7 +385,7 @@ func TestElement(t *testing.T) { So(err, ShouldBeNil) - el, err := http.NewHtmlElement(doc.Find("body .card-img-top:nth-child(1)")) + el, err := static.NewHtmlElement(doc.Find("body .card-img-top:nth-child(1)")) So(err, ShouldBeNil) diff --git a/pkg/stdlib/html/driver/http/options.go b/pkg/stdlib/html/driver/static/options.go similarity index 97% rename from pkg/stdlib/html/driver/http/options.go rename to pkg/stdlib/html/driver/static/options.go index afdf7542..7a97e3d3 100644 --- a/pkg/stdlib/html/driver/http/options.go +++ b/pkg/stdlib/html/driver/static/options.go @@ -1,4 +1,4 @@ -package http +package static import "github.com/sethgrid/pester" diff --git a/pkg/stdlib/html/driver/http/http.go b/pkg/stdlib/html/driver/static/static.go similarity index 80% rename from pkg/stdlib/html/driver/http/http.go rename to pkg/stdlib/html/driver/static/static.go index 63afa4e2..1f096c7f 100644 --- a/pkg/stdlib/html/driver/http/http.go +++ b/pkg/stdlib/html/driver/static/static.go @@ -1,4 +1,4 @@ -package http +package static import ( "bytes" @@ -11,11 +11,11 @@ import ( httpx "net/http" ) -type HttpDriver struct { +type Driver struct { client *pester.Client } -func NewDriver(setters ...Option) *HttpDriver { +func NewDriver(setters ...Option) *Driver { client := pester.New() client.Concurrency = 3 client.MaxRetries = 5 @@ -25,10 +25,10 @@ func NewDriver(setters ...Option) *HttpDriver { setter(client) } - return &HttpDriver{client} + return &Driver{client} } -func (d *HttpDriver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) { +func (d *Driver) GetDocument(ctx context.Context, url string) (values.HtmlNode, error) { req, err := httpx.NewRequest(httpx.MethodGet, url, nil) if err != nil { @@ -58,7 +58,7 @@ func (d *HttpDriver) GetDocument(ctx context.Context, url string) (values.HtmlNo return NewHtmlDocument(url, doc) } -func (d *HttpDriver) ParseDocument(ctx context.Context, str string) (values.HtmlNode, error) { +func (d *Driver) ParseDocument(ctx context.Context, str string) (values.HtmlNode, error) { buf := bytes.NewBuffer([]byte(str)) doc, err := goquery.NewDocumentFromReader(buf) @@ -70,7 +70,7 @@ func (d *HttpDriver) ParseDocument(ctx context.Context, str string) (values.Html return NewHtmlDocument("#string", doc) } -func (d *HttpDriver) Close() error { +func (d *Driver) Close() error { d.client = nil return nil diff --git a/pkg/stdlib/html/events.go b/pkg/stdlib/html/events.go index d1be5a50..1263b778 100644 --- a/pkg/stdlib/html/events.go +++ b/pkg/stdlib/html/events.go @@ -4,7 +4,7 @@ import ( "context" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" - "github.com/MontFerret/ferret/pkg/stdlib/html/driver/browser" + "github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic" ) func WaitElement(_ context.Context, args ...core.Value) (core.Value, error) { @@ -30,7 +30,7 @@ func WaitElement(_ context.Context, args ...core.Value) (core.Value, error) { return values.None, err } - doc, ok := arg.(*browser.HtmlDocument) + doc, ok := arg.(*dynamic.HtmlDocument) if !ok { return values.False, core.Error(core.ErrInvalidType, "expected dynamic document") @@ -52,7 +52,7 @@ func WaitNavigation(_ context.Context, args ...core.Value) (core.Value, error) { return values.None, err } - doc, ok := args[0].(*browser.HtmlDocument) + doc, ok := args[0].(*dynamic.HtmlDocument) if !ok { return values.None, core.Error(core.ErrInvalidType, "expected dynamic document")