1
0
mirror of https://github.com/MontFerret/ferret.git synced 2024-12-16 11:37:36 +02:00
ferret/pkg/drivers/cdp/document.go

564 lines
13 KiB
Go
Raw Normal View History

package cdp
2018-09-18 22:42:38 +02:00
import (
"context"
2018-09-23 10:33:20 +02:00
"fmt"
"hash/fnv"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
"github.com/mafredri/cdp/protocol/runtime"
"github.com/pkg/errors"
"github.com/rs/zerolog"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/drivers/cdp/eval"
"github.com/MontFerret/ferret/pkg/drivers/cdp/events"
"github.com/MontFerret/ferret/pkg/drivers/cdp/input"
"github.com/MontFerret/ferret/pkg/drivers/cdp/templates"
"github.com/MontFerret/ferret/pkg/drivers/common"
2018-09-18 22:42:38 +02:00
"github.com/MontFerret/ferret/pkg/runtime/core"
2018-09-23 10:33:20 +02:00
"github.com/MontFerret/ferret/pkg/runtime/values"
2018-09-18 22:42:38 +02:00
)
type HTMLDocument struct {
logger *zerolog.Logger
client *cdp.Client
events *events.EventBroker
input *input.Manager
exec *eval.ExecutionContext
frames page.FrameTree
element *HTMLElement
parent *HTMLDocument
children *common.LazyValue
}
func LoadRootHTMLDocument(
2018-09-18 22:42:38 +02:00
ctx context.Context,
logger *zerolog.Logger,
client *cdp.Client,
events *events.EventBroker,
mouse *input.Mouse,
keyboard *input.Keyboard,
) (*HTMLDocument, error) {
gdRepl, err := client.DOM.GetDocument(ctx, dom.NewGetDocumentArgs().SetDepth(1))
2018-09-18 22:42:38 +02:00
if err != nil {
return nil, err
2018-09-18 22:42:38 +02:00
}
ftRepl, err := client.Page.GetFrameTree(ctx)
if err != nil {
return nil, err
}
worldRepl, err := client.Page.CreateIsolatedWorld(ctx, page.NewCreateIsolatedWorldArgs(ftRepl.FrameTree.Frame.ID))
if err != nil {
return nil, err
}
return LoadHTMLDocument(
ctx,
logger,
client,
events,
mouse,
keyboard,
gdRepl.Root,
ftRepl.FrameTree,
worldRepl.ExecutionContextID,
nil,
)
}
func LoadHTMLDocument(
ctx context.Context,
logger *zerolog.Logger,
client *cdp.Client,
events *events.EventBroker,
mouse *input.Mouse,
keyboard *input.Keyboard,
node dom.Node,
tree page.FrameTree,
execID runtime.ExecutionContextID,
parent *HTMLDocument,
) (*HTMLDocument, error) {
exec := eval.NewExecutionContext(client, tree.Frame, execID)
inputManager := input.NewManager(client, exec, keyboard, mouse)
rootElement, err := LoadHTMLElement(
ctx,
logger,
client,
events,
inputManager,
exec,
node.NodeID,
)
2018-11-22 18:44:05 +02:00
if err != nil {
return nil, errors.Wrap(err, "failed to load root element")
}
return NewHTMLDocument(
logger,
2018-09-28 06:28:33 +02:00
client,
events,
inputManager,
exec,
rootElement,
tree,
parent,
2018-09-28 06:28:33 +02:00
), nil
}
func NewHTMLDocument(
2018-09-28 06:28:33 +02:00
logger *zerolog.Logger,
2018-09-25 23:58:57 +02:00
client *cdp.Client,
events *events.EventBroker,
input *input.Manager,
exec *eval.ExecutionContext,
rootElement *HTMLElement,
frames page.FrameTree,
parent *HTMLDocument,
) *HTMLDocument {
doc := new(HTMLDocument)
2018-09-28 06:28:33 +02:00
doc.logger = logger
2018-09-25 23:58:57 +02:00
doc.client = client
doc.events = events
doc.input = input
doc.exec = exec
doc.element = rootElement
doc.frames = frames
doc.parent = parent
doc.children = common.NewLazyValue(doc.loadChildren)
2018-09-25 23:58:57 +02:00
return doc
}
func (doc *HTMLDocument) MarshalJSON() ([]byte, error) {
2018-09-25 23:58:57 +02:00
return doc.element.MarshalJSON()
2018-09-18 22:42:38 +02:00
}
func (doc *HTMLDocument) Type() core.Type {
return drivers.HTMLDocumentType
2018-09-18 22:42:38 +02:00
}
func (doc *HTMLDocument) String() string {
return doc.frames.Frame.URL
2018-09-18 22:42:38 +02:00
}
func (doc *HTMLDocument) Unwrap() interface{} {
2018-09-25 23:58:57 +02:00
return doc.element
}
func (doc *HTMLDocument) Hash() uint64 {
2018-10-05 21:17:22 +02:00
h := fnv.New64a()
2018-09-25 23:58:57 +02:00
2018-10-05 21:17:22 +02:00
h.Write([]byte(doc.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(doc.frames.Frame.ID))
h.Write([]byte(doc.frames.Frame.URL))
2018-09-25 23:58:57 +02:00
2018-10-05 21:17:22 +02:00
return h.Sum64()
2018-09-25 23:58:57 +02:00
}
func (doc *HTMLDocument) Copy() core.Value {
2018-09-27 17:53:26 +02:00
return values.None
}
func (doc *HTMLDocument) Compare(other core.Value) int64 {
switch other.Type() {
case drivers.HTMLDocumentType:
other := other.(drivers.HTMLDocument)
2018-09-18 22:42:38 +02:00
return values.NewString(doc.frames.Frame.URL).Compare(other.GetURL())
default:
return drivers.Compare(doc.Type(), other.Type())
2018-09-18 22:42:38 +02:00
}
}
func (doc *HTMLDocument) Iterate(ctx context.Context) (core.Iterator, error) {
return doc.element.Iterate(ctx)
}
func (doc *HTMLDocument) GetIn(ctx context.Context, path []core.Value) (core.Value, error) {
return common.GetInDocument(ctx, doc, path)
}
func (doc *HTMLDocument) SetIn(ctx context.Context, path []core.Value, value core.Value) error {
return common.SetInDocument(ctx, doc, path, value)
2018-09-18 22:42:38 +02:00
}
2018-09-23 10:33:20 +02:00
func (doc *HTMLDocument) Close() error {
errs := make([]error, 0, 5)
2018-09-28 06:28:33 +02:00
if doc.children.Ready() {
val, err := doc.children.Read(context.Background())
2018-09-28 06:28:33 +02:00
if err == nil {
arr := val.(*values.Array)
2018-09-28 06:28:33 +02:00
arr.ForEach(func(value core.Value, _ int) bool {
doc := value.(drivers.HTMLDocument)
2018-09-28 06:28:33 +02:00
err := doc.Close()
2018-09-25 23:58:57 +02:00
if err != nil {
errs = append(errs, errors.Wrapf(err, "failed to close nested document: %s", doc.GetURL()))
}
return true
})
} else {
errs = append(errs, err)
}
2018-09-28 06:28:33 +02:00
}
err := doc.element.Close()
2018-09-28 06:28:33 +02:00
if err != nil {
errs = append(errs, err)
2018-09-28 06:28:33 +02:00
}
2018-09-25 23:58:57 +02:00
if len(errs) == 0 {
return nil
}
2018-09-25 23:58:57 +02:00
return core.Errors(errs...)
2018-09-25 23:58:57 +02:00
}
func (doc *HTMLDocument) IsDetached() values.Boolean {
return doc.element.IsDetached()
2018-09-25 23:58:57 +02:00
}
func (doc *HTMLDocument) GetNodeType() values.Int {
return 9
}
2018-09-25 23:58:57 +02:00
func (doc *HTMLDocument) GetNodeName() values.String {
return "#document"
2018-09-25 23:58:57 +02:00
}
func (doc *HTMLDocument) GetChildNodes(ctx context.Context) core.Value {
return doc.element.GetChildNodes(ctx)
2018-09-25 23:58:57 +02:00
}
func (doc *HTMLDocument) GetChildNode(ctx context.Context, idx values.Int) core.Value {
return doc.element.GetChildNode(ctx, idx)
2018-09-25 23:58:57 +02:00
}
func (doc *HTMLDocument) QuerySelector(ctx context.Context, selector values.String) core.Value {
return doc.element.QuerySelector(ctx, selector)
2018-09-25 23:58:57 +02:00
}
func (doc *HTMLDocument) QuerySelectorAll(ctx context.Context, selector values.String) core.Value {
return doc.element.QuerySelectorAll(ctx, selector)
2018-09-25 23:58:57 +02:00
}
func (doc *HTMLDocument) CountBySelector(ctx context.Context, selector values.String) values.Int {
return doc.element.CountBySelector(ctx, selector)
2018-09-28 04:03:35 +02:00
}
func (doc *HTMLDocument) ExistsBySelector(ctx context.Context, selector values.String) values.Boolean {
return doc.element.ExistsBySelector(ctx, selector)
2018-09-28 04:03:35 +02:00
}
func (doc *HTMLDocument) GetTitle() values.String {
value, err := doc.exec.ReadProperty(context.Background(), doc.element.id.objectID, "title")
if err != nil {
doc.logError(errors.Wrap(err, "failed to read document title"))
return values.EmptyString
}
return values.NewString(value.String())
}
func (doc *HTMLDocument) GetName() values.String {
if doc.frames.Frame.Name != nil {
return values.NewString(*doc.frames.Frame.Name)
}
return values.EmptyString
}
func (doc *HTMLDocument) GetParentDocument() drivers.HTMLDocument {
return doc.parent
}
func (doc *HTMLDocument) GetChildDocuments(ctx context.Context) (*values.Array, error) {
children, err := doc.children.Read(ctx)
if err != nil {
return values.NewArray(0), errors.Wrap(err, "failed to load child documents")
}
return children.Copy().(*values.Array), nil
}
func (doc *HTMLDocument) XPath(ctx context.Context, expression values.String) (core.Value, error) {
return doc.element.XPath(ctx, expression)
}
func (doc *HTMLDocument) Length() values.Int {
return doc.element.Length()
2018-09-28 04:03:35 +02:00
}
func (doc *HTMLDocument) GetElement() drivers.HTMLElement {
return doc.element
}
func (doc *HTMLDocument) GetURL() values.String {
return values.NewString(doc.frames.Frame.URL)
}
func (doc *HTMLDocument) ClickBySelector(ctx context.Context, selector values.String) (values.Boolean, error) {
if err := doc.input.ClickBySelector(ctx, doc.element.id.nodeID, selector); err != nil {
2018-09-28 04:03:35 +02:00
return values.False, err
}
return values.True, nil
2018-09-28 04:03:35 +02:00
}
func (doc *HTMLDocument) ClickBySelectorAll(ctx context.Context, selector values.String) (values.Boolean, error) {
if err := doc.input.ClickBySelectorAll(ctx, doc.element.id.nodeID, selector); err != nil {
2018-09-28 03:41:41 +02:00
return values.False, err
}
return values.True, nil
2018-09-28 03:41:41 +02:00
}
func (doc *HTMLDocument) InputBySelector(ctx context.Context, selector values.String, value core.Value, delay values.Int) (values.Boolean, error) {
if err := doc.input.TypeBySelector(ctx, doc.element.id.nodeID, selector, value, delay); err != nil {
return values.False, err
}
return values.True, nil
}
func (doc *HTMLDocument) SelectBySelector(ctx context.Context, selector values.String, value *values.Array) (*values.Array, error) {
return doc.input.SelectBySelector(ctx, doc.element.id.nodeID, selector, value)
2018-11-12 21:53:36 +02:00
}
func (doc *HTMLDocument) MoveMouseBySelector(ctx context.Context, selector values.String) error {
return doc.input.MoveMouseBySelector(ctx, doc.element.id.nodeID, selector)
2018-11-15 21:33:53 +02:00
}
func (doc *HTMLDocument) MoveMouseByXY(ctx context.Context, x, y values.Float) error {
return doc.input.MoveMouseByXY(ctx, x, y)
}
func (doc *HTMLDocument) WaitForElement(ctx context.Context, selector values.String, when drivers.WaitEvent) error {
var operator string
if when == drivers.WaitEventPresence {
operator = "!="
} else {
operator = "=="
}
task := events.NewEvalWaitTask(
doc.exec,
fmt.Sprintf(
`
var el = document.querySelector(%s);
if (el %s null) {
return true;
}
// null means we need to repeat
return null;
`,
eval.ParamString(selector.String()),
operator,
),
2018-09-25 23:58:57 +02:00
events.DefaultPolling,
2018-09-23 10:33:20 +02:00
)
_, err := task.Run(ctx)
2018-09-23 10:33:20 +02:00
return err
}
func (doc *HTMLDocument) WaitForClassBySelector(ctx context.Context, selector, class values.String, when drivers.WaitEvent) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitBySelector(
selector,
when,
class,
fmt.Sprintf("el.className.split(' ').find(i => i === %s)", eval.ParamString(class.String())),
),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForClassBySelectorAll(ctx context.Context, selector, class values.String, when drivers.WaitEvent) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitBySelectorAll(
selector,
when,
class,
fmt.Sprintf("el.className.split(' ').find(i => i === %s)", eval.ParamString(class.String())),
),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForAttributeBySelector(
ctx context.Context,
selector,
name values.String,
value core.Value,
when drivers.WaitEvent,
) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitBySelector(
selector,
when,
value,
templates.AttributeRead(name),
),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForAttributeBySelectorAll(
ctx context.Context,
selector,
name values.String,
value core.Value,
when drivers.WaitEvent,
) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitBySelectorAll(
selector,
when,
value,
templates.AttributeRead(name),
),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForStyleBySelector(ctx context.Context, selector, name values.String, value core.Value, when drivers.WaitEvent) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitBySelector(
selector,
when,
value,
templates.StyleRead(name),
),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForStyleBySelectorAll(ctx context.Context, selector, name values.String, value core.Value, when drivers.WaitEvent) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitBySelectorAll(
selector,
when,
value,
templates.StyleRead(name),
),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) ScrollTop(ctx context.Context) error {
return doc.input.ScrollTop(ctx)
}
func (doc *HTMLDocument) ScrollBottom(ctx context.Context) error {
return doc.input.ScrollBottom(ctx)
}
func (doc *HTMLDocument) ScrollBySelector(ctx context.Context, selector values.String) error {
return doc.input.ScrollIntoViewBySelector(ctx, selector)
2018-11-15 21:33:53 +02:00
}
func (doc *HTMLDocument) ScrollByXY(ctx context.Context, x, y values.Float) error {
return doc.input.ScrollByXY(ctx, x, y)
}
func (doc *HTMLDocument) loadChildren(ctx context.Context) (value core.Value, e error) {
children := values.NewArray(len(doc.frames.ChildFrames))
2018-10-08 02:15:41 +02:00
if len(doc.frames.ChildFrames) > 0 {
for _, cf := range doc.frames.ChildFrames {
cfNode, cfExecID, err := resolveFrame(ctx, doc.client, cf.Frame)
2018-10-08 02:15:41 +02:00
if err != nil {
return nil, errors.Wrap(err, "failed to resolve frame node")
}
2018-10-08 02:15:41 +02:00
cfDocument, err := LoadHTMLDocument(
ctx,
doc.logger,
doc.client,
doc.events,
doc.input.Mouse(),
doc.input.Keyboard(),
cfNode,
cf,
cfExecID,
doc,
)
if err != nil {
return nil, errors.Wrap(err, "failed to load frame document")
}
children.Push(cfDocument)
}
}
return children, nil
2018-10-08 02:15:41 +02:00
}
func (doc *HTMLDocument) logError(err error) *zerolog.Event {
return doc.logger.
Error().
2018-10-08 02:15:41 +02:00
Timestamp().
Str("url", string(doc.frames.Frame.URL)).
Str("securityOrigin", string(doc.frames.Frame.SecurityOrigin)).
Str("mimeType", string(doc.frames.Frame.MimeType)).
Str("frameID", string(doc.frames.Frame.ID)).
Err(err)
2018-09-26 01:04:07 +02:00
}