package cdp import ( "context" "encoding/json" "fmt" "hash/fnv" "sync" "time" "github.com/MontFerret/ferret/pkg/drivers" "github.com/MontFerret/ferret/pkg/drivers/cdp/eval" "github.com/MontFerret/ferret/pkg/drivers/cdp/events" "github.com/MontFerret/ferret/pkg/drivers/cdp/templates" "github.com/MontFerret/ferret/pkg/drivers/common" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/logging" "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values/types" "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/dom" "github.com/mafredri/cdp/protocol/input" "github.com/mafredri/cdp/protocol/network" "github.com/mafredri/cdp/protocol/page" "github.com/mafredri/cdp/rpcc" "github.com/pkg/errors" "github.com/rs/zerolog" ) const BlankPageURL = "about:blank" type HTMLDocument struct { sync.Mutex logger *zerolog.Logger conn *rpcc.Conn client *cdp.Client events *events.EventBroker url values.String element *HTMLElement } func handleLoadError(logger *zerolog.Logger, client *cdp.Client) { err := client.Page.Close(context.Background()) if err != nil { logger.Warn().Timestamp().Err(err).Msg("unabled to close document on load error") } } func LoadHTMLDocument( ctx context.Context, conn *rpcc.Conn, client *cdp.Client, params drivers.LoadDocumentParams, ) (drivers.HTMLDocument, error) { logger := logging.FromContext(ctx) if conn == nil { return nil, core.Error(core.ErrMissedArgument, "connection") } if params.URL == "" { return nil, core.Error(core.ErrMissedArgument, "url") } if params.Cookies != nil { cookies := make([]network.CookieParam, 0, len(params.Cookies)) for _, c := range params.Cookies { cookies = append(cookies, fromDriverCookie(params.URL, c)) logger. Debug(). Timestamp(). Str("cookie", c.Name). Msg("set cookie") } err := client.Network.SetCookies( ctx, network.NewSetCookiesArgs(cookies), ) if err != nil { return nil, err } } if params.Header != nil { j, err := json.Marshal(params.Header) if err != nil { return nil, err } for k := range params.Header { logger. Debug(). Timestamp(). Str("header", k). Msg("set header") } err = client.Network.SetExtraHTTPHeaders( ctx, network.NewSetExtraHTTPHeadersArgs(network.Headers(j)), ) if err != nil { return nil, err } } var err error if params.URL != BlankPageURL { err = waitForLoadEvent(ctx, client) if err != nil { handleLoadError(logger, client) return nil, err } } node, err := getRootElement(ctx, client) if err != nil { handleLoadError(logger, client) return nil, errors.Wrap(err, "failed to get root element") } broker, err := createEventBroker(client) if err != nil { handleLoadError(logger, client) return nil, errors.Wrap(err, "failed to create event events") } rootElement, err := LoadElement( ctx, logger, client, broker, node.Root.NodeID, node.Root.BackendNodeID, ) if err != nil { broker.Stop() broker.Close() handleLoadError(logger, client) return nil, errors.Wrap(err, "failed to load root element") } return NewHTMLDocument( logger, conn, client, broker, values.NewString(params.URL), rootElement, ), nil } func NewHTMLDocument( logger *zerolog.Logger, conn *rpcc.Conn, client *cdp.Client, broker *events.EventBroker, url values.String, rootElement *HTMLElement, ) *HTMLDocument { doc := new(HTMLDocument) doc.logger = logger doc.conn = conn doc.client = client doc.events = broker doc.url = url doc.element = rootElement broker.AddEventListener(events.EventLoad, doc.handlePageLoad) broker.AddEventListener(events.EventError, doc.handleError) return doc } func (doc *HTMLDocument) MarshalJSON() ([]byte, error) { doc.Lock() defer doc.Unlock() return doc.element.MarshalJSON() } func (doc *HTMLDocument) Type() core.Type { return drivers.HTMLDocumentType } func (doc *HTMLDocument) String() string { doc.Lock() defer doc.Unlock() return doc.url.String() } func (doc *HTMLDocument) Unwrap() interface{} { doc.Lock() defer doc.Unlock() return doc.element } func (doc *HTMLDocument) Hash() uint64 { doc.Lock() defer doc.Unlock() h := fnv.New64a() h.Write([]byte(doc.Type().String())) h.Write([]byte(":")) h.Write([]byte(doc.url)) return h.Sum64() } func (doc *HTMLDocument) Copy() core.Value { return values.None } func (doc *HTMLDocument) Compare(other core.Value) int64 { doc.Lock() defer doc.Unlock() switch other.Type() { case drivers.HTMLDocumentType: other := other.(drivers.HTMLDocument) return doc.url.Compare(other.GetURL()) default: return drivers.Compare(doc.Type(), other.Type()) } } func (doc *HTMLDocument) Iterate(ctx context.Context) (core.Iterator, error) { return doc.element.Iterate(ctx) } func (doc *HTMLDocument) GetIn(ctx context.Context, path []core.Value) (core.Value, error) { return common.GetInDocument(ctx, doc, path) } func (doc *HTMLDocument) SetIn(ctx context.Context, path []core.Value, value core.Value) error { return common.SetInDocument(ctx, doc, path, value) } func (doc *HTMLDocument) Close() error { doc.Lock() defer doc.Unlock() err := doc.events.Stop() if err != nil { doc.logger.Warn(). Timestamp(). Str("url", doc.url.String()). Err(err). Msg("failed to stop event events") } err = doc.events.Close() if err != nil { doc.logger.Warn(). Timestamp(). Str("url", doc.url.String()). Err(err). Msg("failed to close event events") } err = doc.element.Close() if err != nil { doc.logger.Warn(). Timestamp(). Str("url", doc.url.String()). Err(err). Msg("failed to close root element") } err = doc.client.Page.Close(context.Background()) if err != nil { doc.logger.Warn(). Timestamp(). Str("url", doc.url.String()). Err(err). Msg("failed to close browser page") } return doc.conn.Close() } func (doc *HTMLDocument) NodeType() values.Int { doc.Lock() defer doc.Unlock() return doc.element.NodeType() } func (doc *HTMLDocument) NodeName() values.String { doc.Lock() defer doc.Unlock() return doc.element.NodeName() } func (doc *HTMLDocument) Length() values.Int { doc.Lock() defer doc.Unlock() return doc.element.Length() } func (doc *HTMLDocument) GetChildNodes(ctx context.Context) core.Value { doc.Lock() defer doc.Unlock() return doc.element.GetChildNodes(ctx) } func (doc *HTMLDocument) GetChildNode(ctx context.Context, idx values.Int) core.Value { doc.Lock() defer doc.Unlock() return doc.element.GetChildNode(ctx, idx) } func (doc *HTMLDocument) QuerySelector(ctx context.Context, selector values.String) core.Value { doc.Lock() defer doc.Unlock() return doc.element.QuerySelector(ctx, selector) } func (doc *HTMLDocument) QuerySelectorAll(ctx context.Context, selector values.String) core.Value { doc.Lock() defer doc.Unlock() return doc.element.QuerySelectorAll(ctx, selector) } func (doc *HTMLDocument) DocumentElement() drivers.HTMLElement { doc.Lock() defer doc.Unlock() return doc.element } func (doc *HTMLDocument) GetURL() core.Value { doc.Lock() defer doc.Unlock() return doc.url } func (doc *HTMLDocument) GetCookies(ctx context.Context) (*values.Array, error) { doc.Lock() defer doc.Unlock() repl, err := doc.client.Network.GetAllCookies(ctx) if err != nil { return values.NewArray(0), err } if repl.Cookies == nil { return values.NewArray(0), nil } cookies := values.NewArray(len(repl.Cookies)) for _, c := range repl.Cookies { cookies.Push(toDriverCookie(c)) } return cookies, nil } func (doc *HTMLDocument) SetCookies(ctx context.Context, cookies ...drivers.HTTPCookie) error { doc.Lock() defer doc.Unlock() if len(cookies) == 0 { return nil } params := make([]network.CookieParam, 0, len(cookies)) for _, c := range cookies { params = append(params, fromDriverCookie(doc.url.String(), c)) } return doc.client.Network.SetCookies(ctx, network.NewSetCookiesArgs(params)) } func (doc *HTMLDocument) DeleteCookies(ctx context.Context, cookies ...drivers.HTTPCookie) error { doc.Lock() defer doc.Unlock() if len(cookies) == 0 { return nil } var err error for _, c := range cookies { err = doc.client.Network.DeleteCookies(ctx, fromDriverCookieDelete(doc.url.String(), c)) if err != nil { break } } return err } func (doc *HTMLDocument) SetURL(ctx context.Context, url values.String) error { return doc.Navigate(ctx, url) } func (doc *HTMLDocument) CountBySelector(ctx context.Context, selector values.String) values.Int { doc.Lock() defer doc.Unlock() return doc.element.CountBySelector(ctx, selector) } func (doc *HTMLDocument) ExistsBySelector(ctx context.Context, selector values.String) values.Boolean { doc.Lock() defer doc.Unlock() return doc.element.ExistsBySelector(ctx, selector) } func (doc *HTMLDocument) ClickBySelector(ctx context.Context, selector values.String) (values.Boolean, error) { res, err := eval.Eval( ctx, doc.client, fmt.Sprintf(` var el = document.querySelector(%s); if (el == null) { return false; } var evt = new window.MouseEvent('click', { bubbles: true, cancelable: true }); el.dispatchEvent(evt); return true; `, eval.ParamString(selector.String())), true, false, ) if err != nil { return values.False, err } if res.Type() == types.Boolean { return res.(values.Boolean), nil } return values.False, nil } func (doc *HTMLDocument) ClickBySelectorAll(ctx context.Context, selector values.String) (values.Boolean, error) { res, err := eval.Eval( ctx, doc.client, fmt.Sprintf(` var elements = document.querySelectorAll(%s); if (elements == null) { return false; } elements.forEach((el) => { var evt = new window.MouseEvent('click', { bubbles: true, cancelable: true }); el.dispatchEvent(evt); }); return true; `, eval.ParamString(selector.String())), true, false, ) if err != nil { return values.False, err } if res.Type() == types.Boolean { return res.(values.Boolean), nil } return values.False, nil } func (doc *HTMLDocument) InputBySelector(ctx context.Context, selector values.String, value core.Value, delay values.Int) (values.Boolean, error) { valStr := value.String() res, err := eval.Eval( ctx, doc.client, fmt.Sprintf(` var el = document.querySelector(%s); if (el == null) { return false; } el.focus(); return true; `, eval.ParamString(selector.String())), true, false, ) if err != nil { return values.False, err } if res.Type() == types.Boolean && res.(values.Boolean) == values.False { return values.False, nil } // Initial delay after focusing but before typing time.Sleep(time.Duration(delay) * time.Millisecond) for _, ch := range valStr { for _, ev := range []string{"keyDown", "keyUp"} { ke := input.NewDispatchKeyEventArgs(ev).SetText(string(ch)) if err := doc.client.Input.DispatchKeyEvent(ctx, ke); err != nil { return values.False, err } } time.Sleep(randomDuration(delay) * time.Millisecond) } return values.True, nil } func (doc *HTMLDocument) SelectBySelector(ctx context.Context, selector values.String, value *values.Array) (*values.Array, error) { res, err := eval.Eval( ctx, doc.client, fmt.Sprintf(` var element = document.querySelector(%s); if (element == null) { return []; } var values = %s; if (element.nodeName.toLowerCase() !== 'select') { throw new Error('Element is not a