1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-07-03 00:46:51 +02:00
Files
ferret/pkg/drivers/cdp/dom/document.go

427 lines
10 KiB
Go
Raw Normal View History

package dom
2018-09-18 16:42:38 -04:00
import (
"context"
"hash/fnv"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
"github.com/pkg/errors"
"github.com/rs/zerolog"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/drivers/cdp/eval"
"github.com/MontFerret/ferret/pkg/drivers/cdp/events"
"github.com/MontFerret/ferret/pkg/drivers/cdp/input"
"github.com/MontFerret/ferret/pkg/drivers/cdp/templates"
"github.com/MontFerret/ferret/pkg/drivers/common"
2018-09-18 16:42:38 -04:00
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/logging"
2018-09-23 04:33:20 -04:00
"github.com/MontFerret/ferret/pkg/runtime/values"
2018-09-18 16:42:38 -04:00
)
type HTMLDocument struct {
logger zerolog.Logger
client *cdp.Client
dom *Manager
input *input.Manager
exec *eval.Runtime
frameTree page.FrameTree
element *HTMLElement
}
func LoadRootHTMLDocument(
2018-09-18 16:42:38 -04:00
ctx context.Context,
logger zerolog.Logger,
client *cdp.Client,
domManager *Manager,
mouse *input.Mouse,
keyboard *input.Keyboard,
) (*HTMLDocument, error) {
gdRepl, err := client.DOM.GetDocument(ctx, dom.NewGetDocumentArgs().SetDepth(1))
2018-09-18 16:42:38 -04:00
if err != nil {
return nil, err
2018-09-18 16:42:38 -04:00
}
ftRepl, err := client.Page.GetFrameTree(ctx)
if err != nil {
return nil, err
}
exec, err := eval.New(ctx, logger, client, ftRepl.FrameTree.Frame.ID)
if err != nil {
return nil, err
}
return LoadHTMLDocument(
ctx,
logger,
client,
domManager,
mouse,
keyboard,
gdRepl.Root,
ftRepl.FrameTree,
exec,
)
}
func LoadHTMLDocument(
ctx context.Context,
logger zerolog.Logger,
client *cdp.Client,
domManager *Manager,
mouse *input.Mouse,
keyboard *input.Keyboard,
node dom.Node,
frameTree page.FrameTree,
exec *eval.Runtime,
) (*HTMLDocument, error) {
inputManager := input.NewManager(logger, client, exec, keyboard, mouse)
rootElement, err := LoadHTMLElement(
ctx,
logger,
client,
domManager,
inputManager,
exec,
node.NodeID,
)
2018-11-22 11:44:05 -05:00
if err != nil {
return nil, errors.Wrap(err, "failed to load root element")
}
return NewHTMLDocument(
logger,
2018-09-28 00:28:33 -04:00
client,
domManager,
inputManager,
exec,
rootElement,
frameTree,
2018-09-28 00:28:33 -04:00
), nil
}
func NewHTMLDocument(
logger zerolog.Logger,
2018-09-25 17:58:57 -04:00
client *cdp.Client,
domManager *Manager,
input *input.Manager,
exec *eval.Runtime,
rootElement *HTMLElement,
frames page.FrameTree,
) *HTMLDocument {
doc := new(HTMLDocument)
doc.logger = logging.WithName(logger.With(), "html_document").Logger()
2018-09-25 17:58:57 -04:00
doc.client = client
doc.dom = domManager
doc.input = input
doc.exec = exec
doc.element = rootElement
doc.frameTree = frames
2018-09-25 17:58:57 -04:00
return doc
}
func (doc *HTMLDocument) MarshalJSON() ([]byte, error) {
2018-09-25 17:58:57 -04:00
return doc.element.MarshalJSON()
2018-09-18 16:42:38 -04:00
}
func (doc *HTMLDocument) Type() core.Type {
return drivers.HTMLDocumentType
2018-09-18 16:42:38 -04:00
}
func (doc *HTMLDocument) String() string {
return doc.frameTree.Frame.URL
2018-09-18 16:42:38 -04:00
}
func (doc *HTMLDocument) Unwrap() interface{} {
2018-09-25 17:58:57 -04:00
return doc.element
}
func (doc *HTMLDocument) Hash() uint64 {
2018-10-05 15:17:22 -04:00
h := fnv.New64a()
2018-09-25 17:58:57 -04:00
2018-10-05 15:17:22 -04:00
h.Write([]byte(doc.Type().String()))
h.Write([]byte(":"))
h.Write([]byte(doc.frameTree.Frame.ID))
h.Write([]byte(doc.frameTree.Frame.URL))
2018-09-25 17:58:57 -04:00
2018-10-05 15:17:22 -04:00
return h.Sum64()
2018-09-25 17:58:57 -04:00
}
func (doc *HTMLDocument) Copy() core.Value {
2018-09-27 11:53:26 -04:00
return values.None
}
func (doc *HTMLDocument) Compare(other core.Value) int64 {
switch other.Type() {
case drivers.HTMLDocumentType:
cdpDoc, ok := other.(*HTMLDocument)
if ok {
thisID := values.NewString(string(doc.Frame().Frame.ID))
otherID := values.NewString(string(cdpDoc.Frame().Frame.ID))
return thisID.Compare(otherID)
}
other := other.(drivers.HTMLDocument)
2018-09-18 16:42:38 -04:00
return values.NewString(doc.frameTree.Frame.URL).Compare(other.GetURL())
default:
return drivers.Compare(doc.Type(), other.Type())
2018-09-18 16:42:38 -04:00
}
}
func (doc *HTMLDocument) Iterate(ctx context.Context) (core.Iterator, error) {
return doc.element.Iterate(ctx)
}
func (doc *HTMLDocument) GetIn(ctx context.Context, path []core.Value) (core.Value, core.PathError) {
return common.GetInDocument(ctx, path, doc)
}
func (doc *HTMLDocument) SetIn(ctx context.Context, path []core.Value, value core.Value) core.PathError {
return common.SetInDocument(ctx, path, doc, value)
2018-09-18 16:42:38 -04:00
}
2018-09-23 04:33:20 -04:00
func (doc *HTMLDocument) Close() error {
return doc.element.Close()
}
2018-09-25 17:58:57 -04:00
func (doc *HTMLDocument) Frame() page.FrameTree {
return doc.frameTree
2018-09-25 17:58:57 -04:00
}
func (doc *HTMLDocument) GetNodeType(_ context.Context) (values.Int, error) {
return 9, nil
}
2018-09-25 17:58:57 -04:00
func (doc *HTMLDocument) GetNodeName(_ context.Context) (values.String, error) {
return "#document", nil
2018-09-25 17:58:57 -04:00
}
func (doc *HTMLDocument) GetChildNodes(ctx context.Context) (*values.Array, error) {
return doc.element.GetChildNodes(ctx)
2018-09-25 17:58:57 -04:00
}
func (doc *HTMLDocument) GetChildNode(ctx context.Context, idx values.Int) (core.Value, error) {
return doc.element.GetChildNode(ctx, idx)
2018-09-25 17:58:57 -04:00
}
func (doc *HTMLDocument) QuerySelector(ctx context.Context, selector values.String) (core.Value, error) {
return doc.element.QuerySelector(ctx, selector)
2018-09-25 17:58:57 -04:00
}
func (doc *HTMLDocument) QuerySelectorAll(ctx context.Context, selector values.String) (*values.Array, error) {
return doc.element.QuerySelectorAll(ctx, selector)
2018-09-25 17:58:57 -04:00
}
func (doc *HTMLDocument) CountBySelector(ctx context.Context, selector values.String) (values.Int, error) {
return doc.element.CountBySelector(ctx, selector)
2018-09-27 22:03:35 -04:00
}
func (doc *HTMLDocument) ExistsBySelector(ctx context.Context, selector values.String) (values.Boolean, error) {
return doc.element.ExistsBySelector(ctx, selector)
2018-09-27 22:03:35 -04:00
}
func (doc *HTMLDocument) GetTitle() values.String {
value, err := doc.exec.ReadProperty(context.Background(), doc.element.id, "title")
if err != nil {
doc.logError(errors.Wrap(err, "failed to read document title"))
return values.EmptyString
}
return values.NewString(value.String())
}
func (doc *HTMLDocument) GetName() values.String {
if doc.frameTree.Frame.Name != nil {
return values.NewString(*doc.frameTree.Frame.Name)
}
return values.EmptyString
}
func (doc *HTMLDocument) GetParentDocument(ctx context.Context) (drivers.HTMLDocument, error) {
if doc.frameTree.Frame.ParentID == nil {
return nil, nil
}
return doc.dom.GetFrameNode(ctx, *doc.frameTree.Frame.ParentID)
}
func (doc *HTMLDocument) GetChildDocuments(ctx context.Context) (*values.Array, error) {
arr := values.NewArray(len(doc.frameTree.ChildFrames))
for _, childFrame := range doc.frameTree.ChildFrames {
frame, err := doc.dom.GetFrameNode(ctx, childFrame.Frame.ID)
if err != nil {
return nil, err
}
if frame != nil {
arr.Push(frame)
}
}
return arr, nil
}
func (doc *HTMLDocument) XPath(ctx context.Context, expression values.String) (core.Value, error) {
return doc.element.XPath(ctx, expression)
}
func (doc *HTMLDocument) Length() values.Int {
return doc.element.Length()
2018-09-27 22:03:35 -04:00
}
func (doc *HTMLDocument) GetElement() drivers.HTMLElement {
return doc.element
}
func (doc *HTMLDocument) GetURL() values.String {
return values.NewString(doc.frameTree.Frame.URL)
}
func (doc *HTMLDocument) MoveMouseByXY(ctx context.Context, x, y values.Float) error {
return doc.input.MoveMouseByXY(ctx, x, y)
}
func (doc *HTMLDocument) WaitForElement(ctx context.Context, selector values.String, when drivers.WaitEvent) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitForElement(doc.element.id, selector, when),
2018-09-25 17:58:57 -04:00
events.DefaultPolling,
2018-09-23 04:33:20 -04:00
)
_, err := task.Run(ctx)
2018-09-23 04:33:20 -04:00
return err
}
func (doc *HTMLDocument) WaitForClassBySelector(ctx context.Context, selector, class values.String, when drivers.WaitEvent) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitForClassBySelector(doc.element.id, selector, class, when),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForClassBySelectorAll(ctx context.Context, selector, class values.String, when drivers.WaitEvent) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitForClassBySelectorAll(doc.element.id, selector, class, when),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForAttributeBySelector(
ctx context.Context,
selector,
name,
value values.String,
when drivers.WaitEvent,
) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitForAttributeBySelector(doc.element.id, selector, name, value, when),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForAttributeBySelectorAll(
ctx context.Context,
selector,
name,
value values.String,
when drivers.WaitEvent,
) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitForAttributeBySelectorAll(doc.element.id, selector, name, value, when),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForStyleBySelector(ctx context.Context, selector, name, value values.String, when drivers.WaitEvent) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitForStyleBySelector(doc.element.id, selector, name, value, when),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) WaitForStyleBySelectorAll(ctx context.Context, selector, name, value values.String, when drivers.WaitEvent) error {
task := events.NewEvalWaitTask(
doc.exec,
templates.WaitForStyleBySelectorAll(doc.element.id, selector, name, value, when),
events.DefaultPolling,
)
_, err := task.Run(ctx)
return err
}
func (doc *HTMLDocument) ScrollTop(ctx context.Context, options drivers.ScrollOptions) error {
return doc.input.ScrollTop(ctx, options)
}
func (doc *HTMLDocument) ScrollBottom(ctx context.Context, options drivers.ScrollOptions) error {
return doc.input.ScrollBottom(ctx, options)
}
func (doc *HTMLDocument) ScrollBySelector(ctx context.Context, selector values.String, options drivers.ScrollOptions) error {
return doc.input.ScrollIntoViewBySelector(ctx, doc.element.id, selector, options)
2018-11-15 14:33:53 -05:00
}
func (doc *HTMLDocument) Scroll(ctx context.Context, options drivers.ScrollOptions) error {
return doc.input.ScrollByXY(ctx, options)
}
func (doc *HTMLDocument) Eval(ctx context.Context, expression string) (core.Value, error) {
return doc.exec.EvalValue(ctx, eval.F(expression))
}
func (doc *HTMLDocument) logError(err error) *zerolog.Event {
return doc.logger.
Error().
2018-10-07 20:15:41 -04:00
Timestamp().
Str("url", doc.frameTree.Frame.URL).
Str("securityOrigin", doc.frameTree.Frame.SecurityOrigin).
Str("mimeType", doc.frameTree.Frame.MimeType).
Str("frameID", string(doc.frameTree.Frame.ID)).
Err(err)
2018-09-25 19:04:07 -04:00
}