mirror of
				https://github.com/MontFerret/ferret.git
				synced 2025-10-30 23:37:40 +02:00 
			
		
		
		
	| @@ -56,6 +56,7 @@ func Exec(query string, opts Options) { | ||||
| 		runtime.WithLog(l), | ||||
| 		runtime.WithLogLevel(logging.DebugLevel), | ||||
| 		runtime.WithParams(opts.Params), | ||||
| 		runtime.WithProxy(opts.Proxy), | ||||
| 	) | ||||
|  | ||||
| 	if err != nil { | ||||
|   | ||||
| @@ -3,4 +3,5 @@ package cli | ||||
| type Options struct { | ||||
| 	Cdp    string | ||||
| 	Params map[string]interface{} | ||||
| 	Proxy  string | ||||
| } | ||||
|   | ||||
| @@ -98,6 +98,7 @@ func Repl(version string, opts Options) { | ||||
| 			runtime.WithLog(l), | ||||
| 			runtime.WithLogLevel(logging.DebugLevel), | ||||
| 			runtime.WithParams(opts.Params), | ||||
| 			runtime.WithProxy(opts.Proxy), | ||||
| 		) | ||||
|  | ||||
| 		timer.Stop() | ||||
|   | ||||
| @@ -7,7 +7,7 @@ LET links = ( | ||||
| ) | ||||
| FOR link IN links | ||||
|     // The Verge has pretty heavy pages, so let's increase the navigation wait time | ||||
|     NAVIGATE(doc, link, 10000) | ||||
|     NAVIGATE(doc, link, 20000) | ||||
|     WAIT_ELEMENT(doc, '.c-entry-content', 5000) | ||||
|     LET texter = ELEMENT(doc, '.c-entry-content') | ||||
|     RETURN texter.innerText | ||||
| @@ -9,6 +9,6 @@ FOR track IN tracks | ||||
|     LET title = ELEMENT(track, '.chartTrack__title') | ||||
|  | ||||
|     RETURN { | ||||
|         artist: username.innerText, | ||||
|         track: title.innerText | ||||
|         artist: TRIM(username.innerText), | ||||
|         track: TRIM(title.innerText) | ||||
|     } | ||||
|   | ||||
							
								
								
									
										7
									
								
								main.go
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								main.go
									
									
									
									
									
								
							| @@ -76,6 +76,12 @@ var ( | ||||
| 		false, | ||||
| 		"launch Chrome", | ||||
| 	) | ||||
|  | ||||
| 	proxyAddress = flag.String( | ||||
| 		"proxy", | ||||
| 		"", | ||||
| 		"address of proxy server to use (only applicable for static pages)", | ||||
| 	) | ||||
| ) | ||||
|  | ||||
| func main() { | ||||
| @@ -137,6 +143,7 @@ func main() { | ||||
| 	opts := cli.Options{ | ||||
| 		Cdp:    cdpConn, | ||||
| 		Params: p, | ||||
| 		Proxy:  *proxyAddress, | ||||
| 	} | ||||
|  | ||||
| 	stat, _ := os.Stdin.Stat() | ||||
|   | ||||
							
								
								
									
										28
									
								
								pkg/runtime/env/env.go
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								pkg/runtime/env/env.go
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | ||||
| package env | ||||
|  | ||||
| import "context" | ||||
|  | ||||
| type ( | ||||
| 	ctxKey struct{} | ||||
|  | ||||
| 	Environment struct { | ||||
| 		CDPAddress   string | ||||
| 		ProxyAddress string | ||||
| 	} | ||||
| ) | ||||
|  | ||||
| func WithContext(ctx context.Context, e Environment) context.Context { | ||||
| 	return context.WithValue(ctx, ctxKey{}, e) | ||||
| } | ||||
|  | ||||
| func FromContext(ctx context.Context) Environment { | ||||
| 	res := ctx.Value(ctxKey{}) | ||||
|  | ||||
| 	val, ok := res.(Environment) | ||||
|  | ||||
| 	if !ok { | ||||
| 		return Environment{} | ||||
| 	} | ||||
|  | ||||
| 	return val | ||||
| } | ||||
| @@ -3,6 +3,7 @@ package runtime | ||||
| import ( | ||||
| 	"context" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/core" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/env" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/logging" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| 	"io" | ||||
| @@ -53,7 +54,6 @@ func WithBrowser(address string) Option { | ||||
|  | ||||
| func WithProxy(address string) Option { | ||||
| 	return func(options *Options) { | ||||
| 		// TODO: add implementation | ||||
| 		options.proxy = address | ||||
| 	} | ||||
| } | ||||
| @@ -73,6 +73,10 @@ func WithLogLevel(lvl logging.Level) Option { | ||||
| func (opts *Options) withContext(parent context.Context) context.Context { | ||||
| 	ctx := core.ParamsWith(parent, opts.params) | ||||
| 	ctx = logging.WithContext(ctx, opts.logging) | ||||
| 	ctx = env.WithContext(ctx, env.Environment{ | ||||
| 		CDPAddress:   opts.cdp, | ||||
| 		ProxyAddress: opts.proxy, | ||||
| 	}) | ||||
|  | ||||
| 	return ctx | ||||
| } | ||||
|   | ||||
| @@ -40,7 +40,8 @@ func (p *Program) Run(ctx context.Context, setters ...Option) ([]byte, error) { | ||||
| 	} | ||||
|  | ||||
| 	ctx = opts.withContext(ctx) | ||||
| 	ctx = driver.WithDynamicDriver(ctx, opts.cdp) | ||||
| 	// TODO: Decouple from STDLIB | ||||
| 	ctx = driver.WithDynamicDriver(ctx) | ||||
| 	ctx = driver.WithStaticDriver(ctx) | ||||
|  | ||||
| 	out, err := p.body.Exec(ctx, scope) | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/core" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/env" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| 	"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic" | ||||
| 	"github.com/MontFerret/ferret/pkg/stdlib/html/driver/static" | ||||
| @@ -37,10 +38,27 @@ func FromContext(ctx context.Context, name Name) (Driver, error) { | ||||
| 	return nil, core.Error(core.ErrNotFound, fmt.Sprintf("%s driver", name)) | ||||
| } | ||||
|  | ||||
| func WithDynamicDriver(ctx context.Context, addr string) context.Context { | ||||
| 	return context.WithValue(ctx, Dynamic, dynamic.NewDriver(addr)) | ||||
| func WithDynamicDriver(ctx context.Context) context.Context { | ||||
| 	e := env.FromContext(ctx) | ||||
|  | ||||
| 	return context.WithValue( | ||||
| 		ctx, | ||||
| 		Dynamic, | ||||
| 		dynamic.NewDriver( | ||||
| 			e.CDPAddress, | ||||
| 			dynamic.WithProxy(e.ProxyAddress), | ||||
| 		), | ||||
| 	) | ||||
| } | ||||
|  | ||||
| func WithStaticDriver(ctx context.Context, opts ...static.Option) context.Context { | ||||
| 	return context.WithValue(ctx, Static, static.NewDriver(opts...)) | ||||
| func WithStaticDriver(ctx context.Context) context.Context { | ||||
| 	e := env.FromContext(ctx) | ||||
|  | ||||
| 	return context.WithValue( | ||||
| 		ctx, | ||||
| 		Static, | ||||
| 		static.NewDriver( | ||||
| 			static.WithProxy(e.ProxyAddress), | ||||
| 		), | ||||
| 	) | ||||
| } | ||||
|   | ||||
| @@ -12,10 +12,8 @@ import ( | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| 	"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval" | ||||
| 	"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events" | ||||
| 	"github.com/corpix/uarand" | ||||
| 	"github.com/mafredri/cdp" | ||||
| 	"github.com/mafredri/cdp/protocol/dom" | ||||
| 	"github.com/mafredri/cdp/protocol/emulation" | ||||
| 	"github.com/mafredri/cdp/protocol/page" | ||||
| 	"github.com/mafredri/cdp/rpcc" | ||||
| 	"github.com/pkg/errors" | ||||
| @@ -24,7 +22,18 @@ import ( | ||||
|  | ||||
| const BlankPageURL = "about:blank" | ||||
|  | ||||
| type HTMLDocument struct { | ||||
| type ( | ||||
| 	ScreenshotFormat string | ||||
| 	ScreenshotArgs   struct { | ||||
| 		X       float64 | ||||
| 		Y       float64 | ||||
| 		Width   float64 | ||||
| 		Height  float64 | ||||
| 		Format  ScreenshotFormat | ||||
| 		Quality int | ||||
| 	} | ||||
|  | ||||
| 	HTMLDocument struct { | ||||
| 		sync.Mutex | ||||
| 		logger  *zerolog.Logger | ||||
| 		conn    *rpcc.Conn | ||||
| @@ -32,11 +41,24 @@ type HTMLDocument struct { | ||||
| 		events  *events.EventBroker | ||||
| 		url     values.String | ||||
| 		element *HTMLElement | ||||
| 	} | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	ScreenshotFormatPNG  ScreenshotFormat = "png" | ||||
| 	ScreenshotFormatJPEG ScreenshotFormat = "jpeg" | ||||
| ) | ||||
|  | ||||
| func IsScreenshotFormatValid(format string) bool { | ||||
| 	value := ScreenshotFormat(format) | ||||
|  | ||||
| 	return value == ScreenshotFormatPNG || value == ScreenshotFormatJPEG | ||||
| } | ||||
|  | ||||
| func LoadHTMLDocument( | ||||
| 	ctx context.Context, | ||||
| 	conn *rpcc.Conn, | ||||
| 	client *cdp.Client, | ||||
| 	url string, | ||||
| ) (*HTMLDocument, error) { | ||||
| 	if conn == nil { | ||||
| @@ -47,39 +69,7 @@ func LoadHTMLDocument( | ||||
| 		return nil, core.Error(core.ErrMissedArgument, "url") | ||||
| 	} | ||||
|  | ||||
| 	client := cdp.NewClient(conn) | ||||
|  | ||||
| 	err := runBatch( | ||||
| 		func() error { | ||||
| 			return client.Page.Enable(ctx) | ||||
| 		}, | ||||
|  | ||||
| 		func() error { | ||||
| 			return client.Page.SetLifecycleEventsEnabled( | ||||
| 				ctx, | ||||
| 				page.NewSetLifecycleEventsEnabledArgs(true), | ||||
| 			) | ||||
| 		}, | ||||
|  | ||||
| 		func() error { | ||||
| 			return client.DOM.Enable(ctx) | ||||
| 		}, | ||||
|  | ||||
| 		func() error { | ||||
| 			return client.Runtime.Enable(ctx) | ||||
| 		}, | ||||
|  | ||||
| 		func() error { | ||||
| 			return client.Emulation.SetUserAgentOverride( | ||||
| 				ctx, | ||||
| 				emulation.NewSetUserAgentOverrideArgs(uarand.GetRandom()), | ||||
| 			) | ||||
| 		}, | ||||
| 	) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	var err error | ||||
|  | ||||
| 	if url != BlankPageURL { | ||||
| 		err = waitForLoadEvent(ctx, client) | ||||
| @@ -111,26 +101,6 @@ func LoadHTMLDocument( | ||||
| 	), nil | ||||
| } | ||||
|  | ||||
| func getRootElement(client *cdp.Client) (dom.Node, values.String, error) { | ||||
| 	args := dom.NewGetDocumentArgs() | ||||
| 	args.Depth = pointerInt(1) // lets load the entire document | ||||
| 	ctx := context.Background() | ||||
|  | ||||
| 	d, err := client.DOM.GetDocument(ctx, args) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return dom.Node{}, values.EmptyString, err | ||||
| 	} | ||||
|  | ||||
| 	innerHTML, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID)) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return dom.Node{}, values.EmptyString, err | ||||
| 	} | ||||
|  | ||||
| 	return d.Root, values.NewString(innerHTML.OuterHTML), nil | ||||
| } | ||||
|  | ||||
| func NewHTMLDocument( | ||||
| 	logger *zerolog.Logger, | ||||
| 	conn *rpcc.Conn, | ||||
| @@ -731,6 +701,54 @@ func (doc *HTMLDocument) Navigate(url values.String, timeout values.Int) error { | ||||
| 	return doc.WaitForNavigation(timeout) | ||||
| } | ||||
|  | ||||
| func (doc *HTMLDocument) CaptureScreenshot(params *ScreenshotArgs) (core.Value, error) { | ||||
| 	ctx := context.Background() | ||||
| 	metrics, err := doc.client.Page.GetLayoutMetrics(ctx) | ||||
|  | ||||
| 	if params.Format == ScreenshotFormatJPEG && params.Quality < 0 && params.Quality > 100 { | ||||
| 		params.Quality = 100 | ||||
| 	} | ||||
|  | ||||
| 	if params.X < 0 { | ||||
| 		params.X = 0 | ||||
| 	} | ||||
|  | ||||
| 	if params.Y < 0 { | ||||
| 		params.Y = 0 | ||||
| 	} | ||||
|  | ||||
| 	if params.Width <= 0 { | ||||
| 		params.Width = float64(metrics.LayoutViewport.ClientWidth) - params.X | ||||
| 	} | ||||
|  | ||||
| 	if params.Height <= 0 { | ||||
| 		params.Height = float64(metrics.LayoutViewport.ClientHeight) - params.Y | ||||
| 	} | ||||
|  | ||||
| 	clip := page.Viewport{ | ||||
| 		X:      params.X, | ||||
| 		Y:      params.Y, | ||||
| 		Width:  params.Width, | ||||
| 		Height: params.Height, | ||||
| 		Scale:  1.0, | ||||
| 	} | ||||
|  | ||||
| 	format := string(params.Format) | ||||
| 	screenshotArgs := page.CaptureScreenshotArgs{ | ||||
| 		Format:  &format, | ||||
| 		Quality: ¶ms.Quality, | ||||
| 		Clip:    &clip, | ||||
| 	} | ||||
|  | ||||
| 	reply, err := doc.client.Page.CaptureScreenshot(ctx, &screenshotArgs) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	return values.NewBinary(reply.Data), nil | ||||
| } | ||||
|  | ||||
| func (doc *HTMLDocument) onLoad(_ interface{}) { | ||||
| 	doc.Lock() | ||||
| 	defer doc.Unlock() | ||||
| @@ -777,66 +795,3 @@ func (doc *HTMLDocument) onError(val interface{}) { | ||||
| 		Err(err). | ||||
| 		Msg("unexpected error") | ||||
| } | ||||
|  | ||||
| type ScreenshotFormat string | ||||
|  | ||||
| const ( | ||||
| 	ScreenshotFormatPNG  ScreenshotFormat = "png" | ||||
| 	ScreenshotFormatJPEG ScreenshotFormat = "jpeg" | ||||
| ) | ||||
|  | ||||
| func IsScreenshotFormatValid(format string) bool { | ||||
| 	value := ScreenshotFormat(format) | ||||
| 	return value == ScreenshotFormatPNG || value == ScreenshotFormatJPEG | ||||
| } | ||||
|  | ||||
| type ScreenshotArgs struct { | ||||
| 	X       float64 | ||||
| 	Y       float64 | ||||
| 	Width   float64 | ||||
| 	Height  float64 | ||||
| 	Format  ScreenshotFormat | ||||
| 	Quality int | ||||
| } | ||||
|  | ||||
| func (doc *HTMLDocument) CaptureScreenshot(params *ScreenshotArgs) (core.Value, error) { | ||||
| 	ctx := context.Background() | ||||
| 	metrics, err := doc.client.Page.GetLayoutMetrics(ctx) | ||||
|  | ||||
| 	if params.Format == ScreenshotFormatJPEG && params.Quality < 0 && params.Quality > 100 { | ||||
| 		params.Quality = 100 | ||||
| 	} | ||||
| 	if params.X < 0 { | ||||
| 		params.X = 0 | ||||
| 	} | ||||
| 	if params.Y < 0 { | ||||
| 		params.Y = 0 | ||||
| 	} | ||||
| 	if params.Width <= 0 { | ||||
| 		params.Width = float64(metrics.LayoutViewport.ClientWidth) - params.X | ||||
| 	} | ||||
| 	if params.Height <= 0 { | ||||
| 		params.Height = float64(metrics.LayoutViewport.ClientHeight) - params.Y | ||||
| 	} | ||||
| 	clip := page.Viewport{ | ||||
| 		X:      params.X, | ||||
| 		Y:      params.Y, | ||||
| 		Width:  params.Width, | ||||
| 		Height: params.Height, | ||||
| 		Scale:  1.0, | ||||
| 	} | ||||
|  | ||||
| 	format := string(params.Format) | ||||
| 	screenshotArgs := page.CaptureScreenshotArgs{ | ||||
| 		Format:  &format, | ||||
| 		Quality: ¶ms.Quality, | ||||
| 		Clip:    &clip, | ||||
| 	} | ||||
|  | ||||
| 	reply, err := doc.client.Page.CaptureScreenshot(ctx, &screenshotArgs) | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	return values.NewBinary(reply.Data), nil | ||||
| } | ||||
|   | ||||
| @@ -3,8 +3,11 @@ package dynamic | ||||
| import ( | ||||
| 	"context" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| 	"github.com/corpix/uarand" | ||||
| 	"github.com/mafredri/cdp" | ||||
| 	"github.com/mafredri/cdp/devtool" | ||||
| 	"github.com/mafredri/cdp/protocol/emulation" | ||||
| 	"github.com/mafredri/cdp/protocol/page" | ||||
| 	"github.com/mafredri/cdp/protocol/target" | ||||
| 	"github.com/mafredri/cdp/rpcc" | ||||
| 	"github.com/mafredri/cdp/session" | ||||
| @@ -19,11 +22,17 @@ type Driver struct { | ||||
| 	client    *cdp.Client | ||||
| 	session   *session.Manager | ||||
| 	contextID target.BrowserContextID | ||||
| 	opts      *Options | ||||
| } | ||||
| 
 | ||||
| func NewDriver(address string) *Driver { | ||||
| func NewDriver(address string, opts ...Option) *Driver { | ||||
| 	drv := new(Driver) | ||||
| 	drv.dev = devtool.New(address) | ||||
| 	drv.opts = new(Options) | ||||
| 
 | ||||
| 	for _, opt := range opts { | ||||
| 		opt(drv.opts) | ||||
| 	} | ||||
| 
 | ||||
| 	return drv | ||||
| } | ||||
| @@ -60,7 +69,37 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	return LoadHTMLDocument(ctx, conn, url) | ||||
| 	client := cdp.NewClient(conn) | ||||
| 
 | ||||
| 	err = runBatch( | ||||
| 		func() error { | ||||
| 			return client.Page.Enable(ctx) | ||||
| 		}, | ||||
| 
 | ||||
| 		func() error { | ||||
| 			return client.Page.SetLifecycleEventsEnabled( | ||||
| 				ctx, | ||||
| 				page.NewSetLifecycleEventsEnabledArgs(true), | ||||
| 			) | ||||
| 		}, | ||||
| 
 | ||||
| 		func() error { | ||||
| 			return client.DOM.Enable(ctx) | ||||
| 		}, | ||||
| 
 | ||||
| 		func() error { | ||||
| 			return client.Runtime.Enable(ctx) | ||||
| 		}, | ||||
| 
 | ||||
| 		func() error { | ||||
| 			return client.Emulation.SetUserAgentOverride( | ||||
| 				ctx, | ||||
| 				emulation.NewSetUserAgentOverrideArgs(uarand.GetRandom()), | ||||
| 			) | ||||
| 		}, | ||||
| 	) | ||||
| 
 | ||||
| 	return LoadHTMLDocument(ctx, conn, client, url) | ||||
| } | ||||
| 
 | ||||
| func (drv *Driver) Close() error { | ||||
| @@ -29,6 +29,26 @@ func runBatch(funcs ...batchFunc) error { | ||||
| 	return eg.Wait() | ||||
| } | ||||
|  | ||||
| func getRootElement(client *cdp.Client) (dom.Node, values.String, error) { | ||||
| 	args := dom.NewGetDocumentArgs() | ||||
| 	args.Depth = pointerInt(1) // lets load the entire document | ||||
| 	ctx := context.Background() | ||||
|  | ||||
| 	d, err := client.DOM.GetDocument(ctx, args) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return dom.Node{}, values.EmptyString, err | ||||
| 	} | ||||
|  | ||||
| 	innerHTML, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID)) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return dom.Node{}, values.EmptyString, err | ||||
| 	} | ||||
|  | ||||
| 	return d.Root, values.NewString(innerHTML.OuterHTML), nil | ||||
| } | ||||
|  | ||||
| func parseAttrs(attrs []string) *values.Object { | ||||
| 	var attr values.String | ||||
|  | ||||
|   | ||||
							
								
								
									
										15
									
								
								pkg/stdlib/html/driver/dynamic/options.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								pkg/stdlib/html/driver/dynamic/options.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| package dynamic | ||||
|  | ||||
| type ( | ||||
| 	Options struct { | ||||
| 		proxy string | ||||
| 	} | ||||
|  | ||||
| 	Option func(opts *Options) | ||||
| ) | ||||
|  | ||||
| func WithProxy(address string) Option { | ||||
| 	return func(opts *Options) { | ||||
| 		opts.proxy = address | ||||
| 	} | ||||
| } | ||||
| @@ -8,29 +8,62 @@ import ( | ||||
| 	"github.com/corpix/uarand" | ||||
| 	"github.com/pkg/errors" | ||||
| 	"github.com/sethgrid/pester" | ||||
| 	httpx "net/http" | ||||
| 	"net/http" | ||||
| 	"net/url" | ||||
| ) | ||||
| 
 | ||||
| type Driver struct { | ||||
| 	client  *pester.Client | ||||
| 	options *Options | ||||
| } | ||||
| 
 | ||||
| func NewDriver(setters ...Option) *Driver { | ||||
| 	client := pester.New() | ||||
| 	client.Concurrency = 3 | ||||
| 	client.MaxRetries = 5 | ||||
| 	client.Backoff = pester.ExponentialBackoff | ||||
| 
 | ||||
| 	for _, setter := range setters { | ||||
| 		setter(client) | ||||
| func NewDriver(opts ...Option) *Driver { | ||||
| 	drv := new(Driver) | ||||
| 	drv.options = &Options{ | ||||
| 		concurrency: 3, | ||||
| 		maxRetries:  5, | ||||
| 		backoff:     pester.ExponentialBackoff, | ||||
| 	} | ||||
| 
 | ||||
| 	return &Driver{client} | ||||
| 	for _, opt := range opts { | ||||
| 		opt(drv.options) | ||||
| 	} | ||||
| 
 | ||||
| 	if drv.options.proxy == "" { | ||||
| 		drv.client = pester.New() | ||||
| 	} else { | ||||
| 		client, err := newClientWithProxy(drv.options) | ||||
| 
 | ||||
| 		if err != nil { | ||||
| 			drv.client = pester.New() | ||||
| 		} else { | ||||
| 			drv.client = pester.NewExtendedClient(client) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	drv.client.Concurrency = drv.options.concurrency | ||||
| 	drv.client.MaxRetries = drv.options.maxRetries | ||||
| 	drv.client.Backoff = drv.options.backoff | ||||
| 
 | ||||
| 	return drv | ||||
| } | ||||
| 
 | ||||
| func newClientWithProxy(options *Options) (*http.Client, error) { | ||||
| 	proxyURL, err := url.Parse(options.proxy) | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	proxy := http.ProxyURL(proxyURL) | ||||
| 	tr := &http.Transport{Proxy: proxy} | ||||
| 
 | ||||
| 	return &http.Client{Transport: tr}, nil | ||||
| } | ||||
| 
 | ||||
| func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) { | ||||
| 	url := targetURL.String() | ||||
| 	req, err := httpx.NewRequest(httpx.MethodGet, url, nil) | ||||
| 	u := targetURL.String() | ||||
| 	req, err := http.NewRequest(http.MethodGet, u, nil) | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| @@ -45,7 +78,7 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values | ||||
| 	resp, err := d.client.Do(req) | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		return nil, errors.Wrapf(err, "failed to retrieve a document %s", url) | ||||
| 		return nil, errors.Wrapf(err, "failed to retrieve a document %s", u) | ||||
| 	} | ||||
| 
 | ||||
| 	defer resp.Body.Close() | ||||
| @@ -53,10 +86,10 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values | ||||
| 	doc, err := goquery.NewDocumentFromReader(resp.Body) | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		return nil, errors.Wrapf(err, "failed to parse a document %s", url) | ||||
| 		return nil, errors.Wrapf(err, "failed to parse a document %s", u) | ||||
| 	} | ||||
| 
 | ||||
| 	return NewHTMLDocument(url, doc) | ||||
| 	return NewHTMLDocument(u, doc) | ||||
| } | ||||
| 
 | ||||
| func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) { | ||||
| @@ -1,37 +1,51 @@ | ||||
| package static | ||||
|  | ||||
| import "github.com/sethgrid/pester" | ||||
| import ( | ||||
| 	"github.com/sethgrid/pester" | ||||
| ) | ||||
|  | ||||
| type ( | ||||
| 	Option func(opts *pester.Client) | ||||
| 	Option  func(opts *Options) | ||||
| 	Options struct { | ||||
| 		backoff     pester.BackoffStrategy | ||||
| 		maxRetries  int | ||||
| 		concurrency int | ||||
| 		proxy       string | ||||
| 	} | ||||
| ) | ||||
|  | ||||
| func WithDefaultBackoff() Option { | ||||
| 	return func(opts *pester.Client) { | ||||
| 		opts.Backoff = pester.DefaultBackoff | ||||
| 	return func(opts *Options) { | ||||
| 		opts.backoff = pester.DefaultBackoff | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithLinearBackoff() Option { | ||||
| 	return func(opts *pester.Client) { | ||||
| 		opts.Backoff = pester.LinearBackoff | ||||
| 	return func(opts *Options) { | ||||
| 		opts.backoff = pester.LinearBackoff | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithExponentialBackoff() Option { | ||||
| 	return func(opts *pester.Client) { | ||||
| 		opts.Backoff = pester.ExponentialBackoff | ||||
| 	return func(opts *Options) { | ||||
| 		opts.backoff = pester.ExponentialBackoff | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithMaxRetries(value int) Option { | ||||
| 	return func(opts *pester.Client) { | ||||
| 		opts.MaxRetries = value | ||||
| 	return func(opts *Options) { | ||||
| 		opts.maxRetries = value | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithConcurrency(value int) Option { | ||||
| 	return func(opts *pester.Client) { | ||||
| 		opts.Concurrency = value | ||||
| 	return func(opts *Options) { | ||||
| 		opts.concurrency = value | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithProxy(address string) Option { | ||||
| 	return func(opts *Options) { | ||||
| 		opts.proxy = address | ||||
| 	} | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user