mirror of
https://github.com/MontFerret/ferret.git
synced 2024-12-31 01:53:36 +02:00
8e13cf9134
* Refactored input and select * WIP * Fixed serialization * Fixed scriolling * Fixed XPath result handling * Renamed some methods
564 lines
13 KiB
Go
564 lines
13 KiB
Go
package cdp
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"hash/fnv"
|
|
|
|
"github.com/mafredri/cdp"
|
|
"github.com/mafredri/cdp/protocol/dom"
|
|
"github.com/mafredri/cdp/protocol/page"
|
|
"github.com/mafredri/cdp/protocol/runtime"
|
|
"github.com/pkg/errors"
|
|
"github.com/rs/zerolog"
|
|
|
|
"github.com/MontFerret/ferret/pkg/drivers"
|
|
"github.com/MontFerret/ferret/pkg/drivers/cdp/eval"
|
|
"github.com/MontFerret/ferret/pkg/drivers/cdp/events"
|
|
"github.com/MontFerret/ferret/pkg/drivers/cdp/input"
|
|
"github.com/MontFerret/ferret/pkg/drivers/cdp/templates"
|
|
"github.com/MontFerret/ferret/pkg/drivers/common"
|
|
"github.com/MontFerret/ferret/pkg/runtime/core"
|
|
"github.com/MontFerret/ferret/pkg/runtime/values"
|
|
)
|
|
|
|
type HTMLDocument struct {
|
|
logger *zerolog.Logger
|
|
client *cdp.Client
|
|
events *events.EventBroker
|
|
input *input.Manager
|
|
exec *eval.ExecutionContext
|
|
frames page.FrameTree
|
|
element *HTMLElement
|
|
parent *HTMLDocument
|
|
children *common.LazyValue
|
|
}
|
|
|
|
func LoadRootHTMLDocument(
|
|
ctx context.Context,
|
|
logger *zerolog.Logger,
|
|
client *cdp.Client,
|
|
events *events.EventBroker,
|
|
mouse *input.Mouse,
|
|
keyboard *input.Keyboard,
|
|
) (*HTMLDocument, error) {
|
|
gdRepl, err := client.DOM.GetDocument(ctx, dom.NewGetDocumentArgs().SetDepth(1))
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ftRepl, err := client.Page.GetFrameTree(ctx)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
worldRepl, err := client.Page.CreateIsolatedWorld(ctx, page.NewCreateIsolatedWorldArgs(ftRepl.FrameTree.Frame.ID))
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return LoadHTMLDocument(
|
|
ctx,
|
|
logger,
|
|
client,
|
|
events,
|
|
mouse,
|
|
keyboard,
|
|
gdRepl.Root,
|
|
ftRepl.FrameTree,
|
|
worldRepl.ExecutionContextID,
|
|
nil,
|
|
)
|
|
}
|
|
|
|
func LoadHTMLDocument(
|
|
ctx context.Context,
|
|
logger *zerolog.Logger,
|
|
client *cdp.Client,
|
|
events *events.EventBroker,
|
|
mouse *input.Mouse,
|
|
keyboard *input.Keyboard,
|
|
node dom.Node,
|
|
tree page.FrameTree,
|
|
execID runtime.ExecutionContextID,
|
|
parent *HTMLDocument,
|
|
) (*HTMLDocument, error) {
|
|
exec := eval.NewExecutionContext(client, tree.Frame, execID)
|
|
inputManager := input.NewManager(client, exec, keyboard, mouse)
|
|
|
|
rootElement, err := LoadHTMLElement(
|
|
ctx,
|
|
logger,
|
|
client,
|
|
events,
|
|
inputManager,
|
|
exec,
|
|
node.NodeID,
|
|
)
|
|
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to load root element")
|
|
}
|
|
|
|
return NewHTMLDocument(
|
|
logger,
|
|
client,
|
|
events,
|
|
inputManager,
|
|
exec,
|
|
rootElement,
|
|
tree,
|
|
parent,
|
|
), nil
|
|
}
|
|
|
|
func NewHTMLDocument(
|
|
logger *zerolog.Logger,
|
|
client *cdp.Client,
|
|
events *events.EventBroker,
|
|
input *input.Manager,
|
|
exec *eval.ExecutionContext,
|
|
rootElement *HTMLElement,
|
|
frames page.FrameTree,
|
|
parent *HTMLDocument,
|
|
) *HTMLDocument {
|
|
doc := new(HTMLDocument)
|
|
doc.logger = logger
|
|
doc.client = client
|
|
doc.events = events
|
|
doc.input = input
|
|
doc.exec = exec
|
|
doc.element = rootElement
|
|
doc.frames = frames
|
|
doc.parent = parent
|
|
doc.children = common.NewLazyValue(doc.loadChildren)
|
|
|
|
return doc
|
|
}
|
|
|
|
func (doc *HTMLDocument) MarshalJSON() ([]byte, error) {
|
|
return doc.element.MarshalJSON()
|
|
}
|
|
|
|
func (doc *HTMLDocument) Type() core.Type {
|
|
return drivers.HTMLDocumentType
|
|
}
|
|
|
|
func (doc *HTMLDocument) String() string {
|
|
return doc.frames.Frame.URL
|
|
}
|
|
|
|
func (doc *HTMLDocument) Unwrap() interface{} {
|
|
return doc.element
|
|
}
|
|
|
|
func (doc *HTMLDocument) Hash() uint64 {
|
|
h := fnv.New64a()
|
|
|
|
h.Write([]byte(doc.Type().String()))
|
|
h.Write([]byte(":"))
|
|
h.Write([]byte(doc.frames.Frame.ID))
|
|
h.Write([]byte(doc.frames.Frame.URL))
|
|
|
|
return h.Sum64()
|
|
}
|
|
|
|
func (doc *HTMLDocument) Copy() core.Value {
|
|
return values.None
|
|
}
|
|
|
|
func (doc *HTMLDocument) Compare(other core.Value) int64 {
|
|
switch other.Type() {
|
|
case drivers.HTMLDocumentType:
|
|
other := other.(drivers.HTMLDocument)
|
|
|
|
return values.NewString(doc.frames.Frame.URL).Compare(other.GetURL())
|
|
default:
|
|
return drivers.Compare(doc.Type(), other.Type())
|
|
}
|
|
}
|
|
|
|
func (doc *HTMLDocument) Iterate(ctx context.Context) (core.Iterator, error) {
|
|
return doc.element.Iterate(ctx)
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetIn(ctx context.Context, path []core.Value) (core.Value, error) {
|
|
return common.GetInDocument(ctx, doc, path)
|
|
}
|
|
|
|
func (doc *HTMLDocument) SetIn(ctx context.Context, path []core.Value, value core.Value) error {
|
|
return common.SetInDocument(ctx, doc, path, value)
|
|
}
|
|
|
|
func (doc *HTMLDocument) Close() error {
|
|
errs := make([]error, 0, 5)
|
|
|
|
if doc.children.Ready() {
|
|
val, err := doc.children.Read(context.Background())
|
|
|
|
if err == nil {
|
|
arr := val.(*values.Array)
|
|
|
|
arr.ForEach(func(value core.Value, _ int) bool {
|
|
doc := value.(drivers.HTMLDocument)
|
|
|
|
err := doc.Close()
|
|
|
|
if err != nil {
|
|
errs = append(errs, errors.Wrapf(err, "failed to close nested document: %s", doc.GetURL()))
|
|
}
|
|
|
|
return true
|
|
})
|
|
} else {
|
|
errs = append(errs, err)
|
|
}
|
|
}
|
|
|
|
err := doc.element.Close()
|
|
|
|
if err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
|
|
if len(errs) == 0 {
|
|
return nil
|
|
}
|
|
|
|
return core.Errors(errs...)
|
|
}
|
|
|
|
func (doc *HTMLDocument) IsDetached() values.Boolean {
|
|
return doc.element.IsDetached()
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetNodeType() values.Int {
|
|
return 9
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetNodeName() values.String {
|
|
return "#document"
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetChildNodes(ctx context.Context) core.Value {
|
|
return doc.element.GetChildNodes(ctx)
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetChildNode(ctx context.Context, idx values.Int) core.Value {
|
|
return doc.element.GetChildNode(ctx, idx)
|
|
}
|
|
|
|
func (doc *HTMLDocument) QuerySelector(ctx context.Context, selector values.String) core.Value {
|
|
return doc.element.QuerySelector(ctx, selector)
|
|
}
|
|
|
|
func (doc *HTMLDocument) QuerySelectorAll(ctx context.Context, selector values.String) core.Value {
|
|
return doc.element.QuerySelectorAll(ctx, selector)
|
|
}
|
|
|
|
func (doc *HTMLDocument) CountBySelector(ctx context.Context, selector values.String) values.Int {
|
|
return doc.element.CountBySelector(ctx, selector)
|
|
}
|
|
|
|
func (doc *HTMLDocument) ExistsBySelector(ctx context.Context, selector values.String) values.Boolean {
|
|
return doc.element.ExistsBySelector(ctx, selector)
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetTitle() values.String {
|
|
value, err := doc.exec.ReadProperty(context.Background(), doc.element.id.objectID, "title")
|
|
|
|
if err != nil {
|
|
doc.logError(errors.Wrap(err, "failed to read document title"))
|
|
|
|
return values.EmptyString
|
|
}
|
|
|
|
return values.NewString(value.String())
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetName() values.String {
|
|
if doc.frames.Frame.Name != nil {
|
|
return values.NewString(*doc.frames.Frame.Name)
|
|
}
|
|
|
|
return values.EmptyString
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetParentDocument() drivers.HTMLDocument {
|
|
return doc.parent
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetChildDocuments(ctx context.Context) (*values.Array, error) {
|
|
children, err := doc.children.Read(ctx)
|
|
|
|
if err != nil {
|
|
return values.NewArray(0), errors.Wrap(err, "failed to load child documents")
|
|
}
|
|
|
|
return children.Copy().(*values.Array), nil
|
|
}
|
|
|
|
func (doc *HTMLDocument) XPath(ctx context.Context, expression values.String) (core.Value, error) {
|
|
return doc.element.XPath(ctx, expression)
|
|
}
|
|
|
|
func (doc *HTMLDocument) Length() values.Int {
|
|
return doc.element.Length()
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetElement() drivers.HTMLElement {
|
|
return doc.element
|
|
}
|
|
|
|
func (doc *HTMLDocument) GetURL() values.String {
|
|
return values.NewString(doc.frames.Frame.URL)
|
|
}
|
|
|
|
func (doc *HTMLDocument) ClickBySelector(ctx context.Context, selector values.String) (values.Boolean, error) {
|
|
if err := doc.input.ClickBySelector(ctx, doc.element.id.nodeID, selector); err != nil {
|
|
return values.False, err
|
|
}
|
|
|
|
return values.True, nil
|
|
}
|
|
|
|
func (doc *HTMLDocument) ClickBySelectorAll(ctx context.Context, selector values.String) (values.Boolean, error) {
|
|
if err := doc.input.ClickBySelectorAll(ctx, doc.element.id.nodeID, selector); err != nil {
|
|
return values.False, err
|
|
}
|
|
|
|
return values.True, nil
|
|
}
|
|
|
|
func (doc *HTMLDocument) InputBySelector(ctx context.Context, selector values.String, value core.Value, delay values.Int) (values.Boolean, error) {
|
|
if err := doc.input.TypeBySelector(ctx, doc.element.id.nodeID, selector, value, delay); err != nil {
|
|
return values.False, err
|
|
}
|
|
|
|
return values.True, nil
|
|
}
|
|
|
|
func (doc *HTMLDocument) SelectBySelector(ctx context.Context, selector values.String, value *values.Array) (*values.Array, error) {
|
|
return doc.input.SelectBySelector(ctx, doc.element.id.nodeID, selector, value)
|
|
}
|
|
|
|
func (doc *HTMLDocument) MoveMouseBySelector(ctx context.Context, selector values.String) error {
|
|
return doc.input.MoveMouseBySelector(ctx, doc.element.id.nodeID, selector)
|
|
}
|
|
|
|
func (doc *HTMLDocument) MoveMouseByXY(ctx context.Context, x, y values.Float) error {
|
|
return doc.input.MoveMouseByXY(ctx, x, y)
|
|
}
|
|
|
|
func (doc *HTMLDocument) WaitForElement(ctx context.Context, selector values.String, when drivers.WaitEvent) error {
|
|
var operator string
|
|
|
|
if when == drivers.WaitEventPresence {
|
|
operator = "!="
|
|
} else {
|
|
operator = "=="
|
|
}
|
|
|
|
task := events.NewEvalWaitTask(
|
|
doc.exec,
|
|
fmt.Sprintf(
|
|
`
|
|
var el = document.querySelector(%s);
|
|
|
|
if (el %s null) {
|
|
return true;
|
|
}
|
|
|
|
// null means we need to repeat
|
|
return null;
|
|
`,
|
|
eval.ParamString(selector.String()),
|
|
operator,
|
|
),
|
|
events.DefaultPolling,
|
|
)
|
|
|
|
_, err := task.Run(ctx)
|
|
|
|
return err
|
|
}
|
|
|
|
func (doc *HTMLDocument) WaitForClassBySelector(ctx context.Context, selector, class values.String, when drivers.WaitEvent) error {
|
|
task := events.NewEvalWaitTask(
|
|
doc.exec,
|
|
templates.WaitBySelector(
|
|
selector,
|
|
when,
|
|
class,
|
|
fmt.Sprintf("el.className.split(' ').find(i => i === %s)", eval.ParamString(class.String())),
|
|
),
|
|
events.DefaultPolling,
|
|
)
|
|
|
|
_, err := task.Run(ctx)
|
|
|
|
return err
|
|
}
|
|
|
|
func (doc *HTMLDocument) WaitForClassBySelectorAll(ctx context.Context, selector, class values.String, when drivers.WaitEvent) error {
|
|
task := events.NewEvalWaitTask(
|
|
doc.exec,
|
|
templates.WaitBySelectorAll(
|
|
selector,
|
|
when,
|
|
class,
|
|
fmt.Sprintf("el.className.split(' ').find(i => i === %s)", eval.ParamString(class.String())),
|
|
),
|
|
events.DefaultPolling,
|
|
)
|
|
|
|
_, err := task.Run(ctx)
|
|
|
|
return err
|
|
}
|
|
|
|
func (doc *HTMLDocument) WaitForAttributeBySelector(
|
|
ctx context.Context,
|
|
selector,
|
|
name values.String,
|
|
value core.Value,
|
|
when drivers.WaitEvent,
|
|
) error {
|
|
task := events.NewEvalWaitTask(
|
|
doc.exec,
|
|
templates.WaitBySelector(
|
|
selector,
|
|
when,
|
|
value,
|
|
templates.AttributeRead(name),
|
|
),
|
|
events.DefaultPolling,
|
|
)
|
|
|
|
_, err := task.Run(ctx)
|
|
|
|
return err
|
|
}
|
|
|
|
func (doc *HTMLDocument) WaitForAttributeBySelectorAll(
|
|
ctx context.Context,
|
|
selector,
|
|
name values.String,
|
|
value core.Value,
|
|
when drivers.WaitEvent,
|
|
) error {
|
|
task := events.NewEvalWaitTask(
|
|
doc.exec,
|
|
templates.WaitBySelectorAll(
|
|
selector,
|
|
when,
|
|
value,
|
|
templates.AttributeRead(name),
|
|
),
|
|
events.DefaultPolling,
|
|
)
|
|
|
|
_, err := task.Run(ctx)
|
|
|
|
return err
|
|
}
|
|
|
|
func (doc *HTMLDocument) WaitForStyleBySelector(ctx context.Context, selector, name values.String, value core.Value, when drivers.WaitEvent) error {
|
|
task := events.NewEvalWaitTask(
|
|
doc.exec,
|
|
templates.WaitBySelector(
|
|
selector,
|
|
when,
|
|
value,
|
|
templates.StyleRead(name),
|
|
),
|
|
events.DefaultPolling,
|
|
)
|
|
|
|
_, err := task.Run(ctx)
|
|
|
|
return err
|
|
}
|
|
|
|
func (doc *HTMLDocument) WaitForStyleBySelectorAll(ctx context.Context, selector, name values.String, value core.Value, when drivers.WaitEvent) error {
|
|
task := events.NewEvalWaitTask(
|
|
doc.exec,
|
|
templates.WaitBySelectorAll(
|
|
selector,
|
|
when,
|
|
value,
|
|
templates.StyleRead(name),
|
|
),
|
|
events.DefaultPolling,
|
|
)
|
|
|
|
_, err := task.Run(ctx)
|
|
|
|
return err
|
|
}
|
|
|
|
func (doc *HTMLDocument) ScrollTop(ctx context.Context) error {
|
|
return doc.input.ScrollTop(ctx)
|
|
}
|
|
|
|
func (doc *HTMLDocument) ScrollBottom(ctx context.Context) error {
|
|
return doc.input.ScrollBottom(ctx)
|
|
}
|
|
|
|
func (doc *HTMLDocument) ScrollBySelector(ctx context.Context, selector values.String) error {
|
|
return doc.input.ScrollIntoViewBySelector(ctx, selector)
|
|
}
|
|
|
|
func (doc *HTMLDocument) ScrollByXY(ctx context.Context, x, y values.Float) error {
|
|
return doc.input.ScrollByXY(ctx, x, y)
|
|
}
|
|
|
|
func (doc *HTMLDocument) loadChildren(ctx context.Context) (value core.Value, e error) {
|
|
children := values.NewArray(len(doc.frames.ChildFrames))
|
|
|
|
if len(doc.frames.ChildFrames) > 0 {
|
|
for _, cf := range doc.frames.ChildFrames {
|
|
cfNode, cfExecID, err := resolveFrame(ctx, doc.client, cf.Frame)
|
|
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to resolve frame node")
|
|
}
|
|
|
|
cfDocument, err := LoadHTMLDocument(
|
|
ctx,
|
|
doc.logger,
|
|
doc.client,
|
|
doc.events,
|
|
doc.input.Mouse(),
|
|
doc.input.Keyboard(),
|
|
cfNode,
|
|
cf,
|
|
cfExecID,
|
|
doc,
|
|
)
|
|
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to load frame document")
|
|
}
|
|
|
|
children.Push(cfDocument)
|
|
}
|
|
}
|
|
|
|
return children, nil
|
|
}
|
|
|
|
func (doc *HTMLDocument) logError(err error) *zerolog.Event {
|
|
return doc.logger.
|
|
Error().
|
|
Timestamp().
|
|
Str("url", string(doc.frames.Frame.URL)).
|
|
Str("securityOrigin", string(doc.frames.Frame.SecurityOrigin)).
|
|
Str("mimeType", string(doc.frames.Frame.MimeType)).
|
|
Str("frameID", string(doc.frames.Frame.ID)).
|
|
Err(err)
|
|
}
|