package cdp import ( "context" "hash/fnv" "io" "regexp" "sync" "github.com/mafredri/cdp" "github.com/mafredri/cdp/protocol/page" "github.com/mafredri/cdp/rpcc" "github.com/pkg/errors" "github.com/rs/zerolog" "github.com/MontFerret/ferret/pkg/drivers" "github.com/MontFerret/ferret/pkg/drivers/cdp/dom" "github.com/MontFerret/ferret/pkg/drivers/cdp/input" net "github.com/MontFerret/ferret/pkg/drivers/cdp/network" "github.com/MontFerret/ferret/pkg/drivers/cdp/templates" "github.com/MontFerret/ferret/pkg/drivers/common" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/events" "github.com/MontFerret/ferret/pkg/runtime/logging" "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values/types" ) type ( HTMLPageEvent string HTMLPage struct { mu sync.Mutex closed values.Boolean logger *zerolog.Logger conn *rpcc.Conn client *cdp.Client network *net.Manager dom *dom.Manager mouse *input.Mouse keyboard *input.Keyboard } ) const ( HTMLPageEventNavigation HTMLPageEvent = "navigation" ) func LoadHTMLPage( ctx context.Context, conn *rpcc.Conn, params drivers.Params, ) (p *HTMLPage, err error) { logger := logging.FromContext(ctx) if conn == nil { return nil, core.Error(core.ErrMissedArgument, "connection") } client := cdp.NewClient(conn) if err := enableFeatures(ctx, client, params); err != nil { return nil, err } closers := make([]io.Closer, 0, 4) defer func() { if err != nil { if err := client.Page.Close(context.Background()); err != nil { logger.Error().Err(err) } if err := conn.Close(); err != nil { logger.Error().Err(err) } common.CloseAll(logger, closers, "failed to close a Page resource") } }() netOpts := net.Options{ Headers: params.Headers, } if params.Cookies != nil && params.Cookies.Length() > 0 { netOpts.Cookies = make(map[string]*drivers.HTTPCookies) netOpts.Cookies[params.URL] = params.Cookies } if params.Ignore != nil && len(params.Ignore.Resources) > 0 { netOpts.Filter = &net.Filter{ Patterns: params.Ignore.Resources, } } netManager, err := net.New(ctx, logger, client, netOpts) if err != nil { return nil, err } mouse := input.NewMouse(client) keyboard := input.NewKeyboard(client) domManager, err := dom.New( logger, client, mouse, keyboard, ) if err != nil { return nil, err } p = NewHTMLPage( logger, conn, client, netManager, domManager, mouse, keyboard, ) if params.URL != BlankPageURL && params.URL != "" { err = p.Navigate(ctx, values.NewString(params.URL)) } else { err = p.loadMainFrame(ctx) } if err != nil { return p, err } return p, nil } func LoadHTMLPageWithContent( ctx context.Context, conn *rpcc.Conn, params drivers.Params, content []byte, ) (p *HTMLPage, err error) { logger := logging.FromContext(ctx) p, err = LoadHTMLPage(ctx, conn, params) if err != nil { return nil, err } defer func() { if err != nil { if e := p.Close(); e != nil { logger.Error().Err(e).Msg("failed to close page") } } }() frameID := p.getCurrentDocument().Frame().Frame.ID err = p.client.Page.SetDocumentContent(ctx, page.NewSetDocumentContentArgs(frameID, string(content))) if err != nil { return nil, errors.Wrap(err, "set document content") } // Remove prev frames (from a blank page) prev := p.dom.GetMainFrame() err = p.dom.RemoveFrameRecursively(prev.Frame().Frame.ID) if err != nil { return nil, err } err = p.loadMainFrame(ctx) if err != nil { return nil, err } return p, nil } func NewHTMLPage( logger *zerolog.Logger, conn *rpcc.Conn, client *cdp.Client, netManager *net.Manager, domManager *dom.Manager, mouse *input.Mouse, keyboard *input.Keyboard, ) *HTMLPage { p := new(HTMLPage) p.closed = values.False p.logger = logger p.conn = conn p.client = client p.network = netManager p.dom = domManager p.mouse = mouse p.keyboard = keyboard return p } func (p *HTMLPage) MarshalJSON() ([]byte, error) { return p.getCurrentDocument().MarshalJSON() } func (p *HTMLPage) Type() core.Type { return drivers.HTMLPageType } func (p *HTMLPage) String() string { return p.getCurrentDocument().GetURL().String() } func (p *HTMLPage) Compare(other core.Value) int64 { tc := drivers.Compare(p.Type(), other.Type()) if tc != 0 { return tc } cdpPage, ok := other.(*HTMLPage) if !ok { return 1 } return p.getCurrentDocument().GetURL().Compare(cdpPage.GetURL()) } func (p *HTMLPage) Unwrap() interface{} { p.mu.Lock() defer p.mu.Unlock() return p } func (p *HTMLPage) Hash() uint64 { h := fnv.New64a() h.Write([]byte("CDP")) h.Write([]byte(p.Type().String())) h.Write([]byte(":")) h.Write([]byte(p.getCurrentDocument().GetURL())) return h.Sum64() } func (p *HTMLPage) Copy() core.Value { return values.None } func (p *HTMLPage) GetIn(ctx context.Context, path []core.Value) (core.Value, error) { return common.GetInPage(ctx, p, path) } func (p *HTMLPage) SetIn(ctx context.Context, path []core.Value, value core.Value) error { return common.SetInPage(ctx, p, path, value) } func (p *HTMLPage) Iterate(ctx context.Context) (core.Iterator, error) { return p.getCurrentDocument().Iterate(ctx) } func (p *HTMLPage) Length() values.Int { return p.getCurrentDocument().Length() } func (p *HTMLPage) Close() error { p.mu.Lock() defer p.mu.Unlock() url := p.GetURL().String() p.closed = values.True err := p.dom.Close() if err != nil { p.logger.Warn(). Timestamp(). Str("url", url). Err(err). Msg("failed to close dom manager") } err = p.network.Close() if err != nil { p.logger.Warn(). Timestamp(). Str("url", url). Err(err). Msg("failed to close network manager") } err = p.client.Page.Close(context.Background()) if err != nil { p.logger.Warn(). Timestamp(). Str("url", url). Err(err). Msg("failed to close browser page") } err = p.conn.Close() if err != nil { p.logger.Warn(). Timestamp(). Str("url", url). Err(err). Msg("failed to close connection") } return err } func (p *HTMLPage) IsClosed() values.Boolean { p.mu.Lock() defer p.mu.Unlock() return p.closed } func (p *HTMLPage) GetURL() values.String { res, err := p.getCurrentDocument().Eval(context.Background(), templates.GetURL()) if err == nil { return values.ToString(res) } p.logger.Warn(). Timestamp(). Err(err). Msg("failed to retrieve URL") return p.getCurrentDocument().GetURL() } func (p *HTMLPage) GetMainFrame() drivers.HTMLDocument { return p.getCurrentDocument() } func (p *HTMLPage) GetFrames(ctx context.Context) (*values.Array, error) { p.mu.Lock() defer p.mu.Unlock() return p.dom.GetFrameNodes(ctx) } func (p *HTMLPage) GetFrame(ctx context.Context, idx values.Int) (core.Value, error) { p.mu.Lock() defer p.mu.Unlock() frames, err := p.dom.GetFrameNodes(ctx) if err != nil { return values.None, err } return frames.Get(idx), nil } func (p *HTMLPage) GetCookies(ctx context.Context) (*drivers.HTTPCookies, error) { p.mu.Lock() defer p.mu.Unlock() return p.network.GetCookies(ctx) } func (p *HTMLPage) SetCookies(ctx context.Context, cookies *drivers.HTTPCookies) error { p.mu.Lock() defer p.mu.Unlock() return p.network.SetCookies(ctx, p.getCurrentDocument().GetURL().String(), cookies) } func (p *HTMLPage) DeleteCookies(ctx context.Context, cookies *drivers.HTTPCookies) error { p.mu.Lock() defer p.mu.Unlock() return p.network.DeleteCookies(ctx, p.getCurrentDocument().GetURL().String(), cookies) } func (p *HTMLPage) GetResponse(ctx context.Context) (drivers.HTTPResponse, error) { doc := p.getCurrentDocument() if doc == nil { return drivers.HTTPResponse{}, nil } return p.network.GetResponse(ctx, doc.Frame().Frame.ID) } func (p *HTMLPage) PrintToPDF(ctx context.Context, params drivers.PDFParams) (values.Binary, error) { p.mu.Lock() defer p.mu.Unlock() args := page.NewPrintToPDFArgs() args. SetLandscape(bool(params.Landscape)). SetDisplayHeaderFooter(bool(params.DisplayHeaderFooter)). SetPrintBackground(bool(params.PrintBackground)). SetIgnoreInvalidPageRanges(bool(params.IgnoreInvalidPageRanges)). SetPreferCSSPageSize(bool(params.PreferCSSPageSize)) if params.Scale > 0 { args.SetScale(float64(params.Scale)) } if params.PaperWidth > 0 { args.SetPaperWidth(float64(params.PaperWidth)) } if params.PaperHeight > 0 { args.SetPaperHeight(float64(params.PaperHeight)) } if params.MarginTop > 0 { args.SetMarginTop(float64(params.MarginTop)) } if params.MarginBottom > 0 { args.SetMarginBottom(float64(params.MarginBottom)) } if params.MarginRight > 0 { args.SetMarginRight(float64(params.MarginRight)) } if params.MarginLeft > 0 { args.SetMarginLeft(float64(params.MarginLeft)) } if params.PageRanges != values.EmptyString { args.SetPageRanges(string(params.PageRanges)) } if params.HeaderTemplate != values.EmptyString { args.SetHeaderTemplate(string(params.HeaderTemplate)) } if params.FooterTemplate != values.EmptyString { args.SetFooterTemplate(string(params.FooterTemplate)) } reply, err := p.client.Page.PrintToPDF(ctx, args) if err != nil { return values.NewBinary([]byte{}), err } return values.NewBinary(reply.Data), nil } func (p *HTMLPage) CaptureScreenshot(ctx context.Context, params drivers.ScreenshotParams) (values.Binary, error) { p.mu.Lock() defer p.mu.Unlock() metrics, err := p.client.Page.GetLayoutMetrics(ctx) if err != nil { return values.NewBinary(nil), err } if params.Format == drivers.ScreenshotFormatJPEG && params.Quality < 0 && params.Quality > 100 { params.Quality = 100 } if params.X < 0 { params.X = 0 } if params.Y < 0 { params.Y = 0 } if params.Width <= 0 { params.Width = values.Float(metrics.CSSLayoutViewport.ClientWidth) - params.X } if params.Height <= 0 { params.Height = values.Float(metrics.CSSLayoutViewport.ClientHeight) - params.Y } clip := page.Viewport{ X: float64(params.X), Y: float64(params.Y), Width: float64(params.Width), Height: float64(params.Height), Scale: 1.0, } format := string(params.Format) quality := int(params.Quality) args := page.CaptureScreenshotArgs{ Format: &format, Quality: &quality, Clip: &clip, } reply, err := p.client.Page.CaptureScreenshot(ctx, &args) if err != nil { return values.NewBinary([]byte{}), err } return values.NewBinary(reply.Data), nil } func (p *HTMLPage) Navigate(ctx context.Context, url values.String) error { p.mu.Lock() defer p.mu.Unlock() if err := p.network.Navigate(ctx, url); err != nil { return err } return p.reloadMainFrame(ctx) } func (p *HTMLPage) NavigateBack(ctx context.Context, skip values.Int) (values.Boolean, error) { p.mu.Lock() defer p.mu.Unlock() ret, err := p.network.NavigateBack(ctx, skip) if err != nil { return values.False, err } return ret, p.reloadMainFrame(ctx) } func (p *HTMLPage) NavigateForward(ctx context.Context, skip values.Int) (values.Boolean, error) { p.mu.Lock() defer p.mu.Unlock() ret, err := p.network.NavigateForward(ctx, skip) if err != nil { return values.False, err } return ret, p.reloadMainFrame(ctx) } func (p *HTMLPage) WaitForNavigation(ctx context.Context, targetURL values.String) error { p.mu.Lock() defer p.mu.Unlock() pattern, err := p.urlToRegexp(targetURL) if err != nil { return err } if err := p.network.WaitForNavigation(ctx, pattern); err != nil { return err } return p.reloadMainFrame(ctx) } func (p *HTMLPage) WaitForFrameNavigation(ctx context.Context, frame drivers.HTMLDocument, targetURL values.String) error { p.mu.Lock() defer p.mu.Unlock() current := p.dom.GetMainFrame() doc, ok := frame.(*dom.HTMLDocument) if !ok { return errors.New("invalid frame type") } pattern, err := p.urlToRegexp(targetURL) if err != nil { return err } frameID := doc.Frame().Frame.ID isMain := current.Frame().Frame.ID == frameID // if it's the current document if isMain { err = p.network.WaitForNavigation(ctx, pattern) } else { err = p.network.WaitForFrameNavigation(ctx, frameID, pattern) } if err != nil { return err } return p.reloadMainFrame(ctx) } func (p *HTMLPage) Subscribe(ctx context.Context, eventName string, options *values.Object) <-chan events.Event { ch := make(chan events.Event) go func() { var err error var data core.Value if eventName == drivers.EventPageNavigation { data, err = p.subscribeToNavigation(ctx, options) } else { err = core.Errorf(core.ErrInvalidOperation, "unknown event name: %s", eventName) } ch <- events.Event{ Data: data, Err: err, } }() return ch } func (p *HTMLPage) subscribeToNavigation(ctx context.Context, options *values.Object) (values.String, error) { var frame drivers.HTMLDocument var targetURL values.String if options != nil { if options.Has("frame") { frameOpt, _ := options.Get("frame") doc, ok := frameOpt.(drivers.HTMLDocument) if ok { frame = doc } else { return values.EmptyString, errors.Wrap(core.TypeError(frameOpt.Type(), drivers.HTMLDocumentType), "invalid frame") } } if options.Has("target") { targetURLOpt, _ := options.Get("target") url, ok := targetURLOpt.(values.String) if ok { targetURL = url } else { return values.EmptyString, errors.Wrap(core.TypeError(targetURLOpt.Type(), types.String), "invalid target") } } } if frame == nil { if err := p.WaitForNavigation(ctx, targetURL); err != nil { return values.EmptyString, err } return p.GetURL(), nil } if err := p.WaitForFrameNavigation(ctx, frame, targetURL); err != nil { return values.EmptyString, err } return frame.GetURL(), nil } func (p *HTMLPage) urlToRegexp(targetURL values.String) (*regexp.Regexp, error) { if targetURL == "" { return nil, nil } r, err := regexp.Compile(targetURL.String()) if err != nil { return nil, errors.Wrap(err, "invalid URL pattern") } return r, nil } func (p *HTMLPage) reloadMainFrame(ctx context.Context) error { prev := p.dom.GetMainFrame() if prev != nil { if err := p.dom.RemoveFrameRecursively(prev.Frame().Frame.ID); err != nil { p.logger.Error().Err(err).Msg("failed to remove main frame") } } next, err := dom.LoadRootHTMLDocument( ctx, p.logger, p.client, p.dom, p.mouse, p.keyboard, ) if err != nil { p.logger.Error().Err(err).Msg("failed to load a new root document") return err } p.dom.SetMainFrame(next) return nil } func (p *HTMLPage) loadMainFrame(ctx context.Context) error { next, err := dom.LoadRootHTMLDocument( ctx, p.logger, p.client, p.dom, p.mouse, p.keyboard, ) if err != nil { return err } p.dom.SetMainFrame(next) return nil } func (p *HTMLPage) getCurrentDocument() *dom.HTMLDocument { return p.dom.GetMainFrame() }