mirror of
https://github.com/MontFerret/ferret.git
synced 2025-01-12 03:19:45 +02:00
parent
8f7edaedee
commit
957490efec
@ -56,6 +56,7 @@ func Exec(query string, opts Options) {
|
||||
runtime.WithLog(l),
|
||||
runtime.WithLogLevel(logging.DebugLevel),
|
||||
runtime.WithParams(opts.Params),
|
||||
runtime.WithProxy(opts.Proxy),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
|
@ -3,4 +3,5 @@ package cli
|
||||
type Options struct {
|
||||
Cdp string
|
||||
Params map[string]interface{}
|
||||
Proxy string
|
||||
}
|
||||
|
@ -98,6 +98,7 @@ func Repl(version string, opts Options) {
|
||||
runtime.WithLog(l),
|
||||
runtime.WithLogLevel(logging.DebugLevel),
|
||||
runtime.WithParams(opts.Params),
|
||||
runtime.WithProxy(opts.Proxy),
|
||||
)
|
||||
|
||||
timer.Stop()
|
||||
|
@ -7,7 +7,7 @@ LET links = (
|
||||
)
|
||||
FOR link IN links
|
||||
// The Verge has pretty heavy pages, so let's increase the navigation wait time
|
||||
NAVIGATE(doc, link, 10000)
|
||||
NAVIGATE(doc, link, 20000)
|
||||
WAIT_ELEMENT(doc, '.c-entry-content', 5000)
|
||||
LET texter = ELEMENT(doc, '.c-entry-content')
|
||||
RETURN texter.innerText
|
@ -9,6 +9,6 @@ FOR track IN tracks
|
||||
LET title = ELEMENT(track, '.chartTrack__title')
|
||||
|
||||
RETURN {
|
||||
artist: username.innerText,
|
||||
track: title.innerText
|
||||
artist: TRIM(username.innerText),
|
||||
track: TRIM(title.innerText)
|
||||
}
|
||||
|
7
main.go
7
main.go
@ -76,6 +76,12 @@ var (
|
||||
false,
|
||||
"launch Chrome",
|
||||
)
|
||||
|
||||
proxyAddress = flag.String(
|
||||
"proxy",
|
||||
"",
|
||||
"address of proxy server to use (only applicable for static pages)",
|
||||
)
|
||||
)
|
||||
|
||||
func main() {
|
||||
@ -137,6 +143,7 @@ func main() {
|
||||
opts := cli.Options{
|
||||
Cdp: cdpConn,
|
||||
Params: p,
|
||||
Proxy: *proxyAddress,
|
||||
}
|
||||
|
||||
stat, _ := os.Stdin.Stat()
|
||||
|
28
pkg/runtime/env/env.go
vendored
Normal file
28
pkg/runtime/env/env.go
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
package env
|
||||
|
||||
import "context"
|
||||
|
||||
type (
|
||||
ctxKey struct{}
|
||||
|
||||
Environment struct {
|
||||
CDPAddress string
|
||||
ProxyAddress string
|
||||
}
|
||||
)
|
||||
|
||||
func WithContext(ctx context.Context, e Environment) context.Context {
|
||||
return context.WithValue(ctx, ctxKey{}, e)
|
||||
}
|
||||
|
||||
func FromContext(ctx context.Context) Environment {
|
||||
res := ctx.Value(ctxKey{})
|
||||
|
||||
val, ok := res.(Environment)
|
||||
|
||||
if !ok {
|
||||
return Environment{}
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
@ -3,6 +3,7 @@ package runtime
|
||||
import (
|
||||
"context"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/env"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/logging"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"io"
|
||||
@ -53,7 +54,6 @@ func WithBrowser(address string) Option {
|
||||
|
||||
func WithProxy(address string) Option {
|
||||
return func(options *Options) {
|
||||
// TODO: add implementation
|
||||
options.proxy = address
|
||||
}
|
||||
}
|
||||
@ -73,6 +73,10 @@ func WithLogLevel(lvl logging.Level) Option {
|
||||
func (opts *Options) withContext(parent context.Context) context.Context {
|
||||
ctx := core.ParamsWith(parent, opts.params)
|
||||
ctx = logging.WithContext(ctx, opts.logging)
|
||||
ctx = env.WithContext(ctx, env.Environment{
|
||||
CDPAddress: opts.cdp,
|
||||
ProxyAddress: opts.proxy,
|
||||
})
|
||||
|
||||
return ctx
|
||||
}
|
||||
|
@ -40,7 +40,8 @@ func (p *Program) Run(ctx context.Context, setters ...Option) ([]byte, error) {
|
||||
}
|
||||
|
||||
ctx = opts.withContext(ctx)
|
||||
ctx = driver.WithDynamicDriver(ctx, opts.cdp)
|
||||
// TODO: Decouple from STDLIB
|
||||
ctx = driver.WithDynamicDriver(ctx)
|
||||
ctx = driver.WithStaticDriver(ctx)
|
||||
|
||||
out, err := p.body.Exec(ctx, scope)
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/env"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/static"
|
||||
@ -37,10 +38,27 @@ func FromContext(ctx context.Context, name Name) (Driver, error) {
|
||||
return nil, core.Error(core.ErrNotFound, fmt.Sprintf("%s driver", name))
|
||||
}
|
||||
|
||||
func WithDynamicDriver(ctx context.Context, addr string) context.Context {
|
||||
return context.WithValue(ctx, Dynamic, dynamic.NewDriver(addr))
|
||||
func WithDynamicDriver(ctx context.Context) context.Context {
|
||||
e := env.FromContext(ctx)
|
||||
|
||||
return context.WithValue(
|
||||
ctx,
|
||||
Dynamic,
|
||||
dynamic.NewDriver(
|
||||
e.CDPAddress,
|
||||
dynamic.WithProxy(e.ProxyAddress),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
func WithStaticDriver(ctx context.Context, opts ...static.Option) context.Context {
|
||||
return context.WithValue(ctx, Static, static.NewDriver(opts...))
|
||||
func WithStaticDriver(ctx context.Context) context.Context {
|
||||
e := env.FromContext(ctx)
|
||||
|
||||
return context.WithValue(
|
||||
ctx,
|
||||
Static,
|
||||
static.NewDriver(
|
||||
static.WithProxy(e.ProxyAddress),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
@ -6,16 +6,14 @@ import (
|
||||
"hash/fnv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/logging"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval"
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events"
|
||||
"github.com/corpix/uarand"
|
||||
"github.com/mafredri/cdp"
|
||||
"github.com/mafredri/cdp/protocol/dom"
|
||||
"github.com/mafredri/cdp/protocol/emulation"
|
||||
"github.com/mafredri/cdp/protocol/page"
|
||||
"github.com/mafredri/cdp/rpcc"
|
||||
"github.com/pkg/errors"
|
||||
@ -24,19 +22,43 @@ import (
|
||||
|
||||
const BlankPageURL = "about:blank"
|
||||
|
||||
type HTMLDocument struct {
|
||||
sync.Mutex
|
||||
logger *zerolog.Logger
|
||||
conn *rpcc.Conn
|
||||
client *cdp.Client
|
||||
events *events.EventBroker
|
||||
url values.String
|
||||
element *HTMLElement
|
||||
type (
|
||||
ScreenshotFormat string
|
||||
ScreenshotArgs struct {
|
||||
X float64
|
||||
Y float64
|
||||
Width float64
|
||||
Height float64
|
||||
Format ScreenshotFormat
|
||||
Quality int
|
||||
}
|
||||
|
||||
HTMLDocument struct {
|
||||
sync.Mutex
|
||||
logger *zerolog.Logger
|
||||
conn *rpcc.Conn
|
||||
client *cdp.Client
|
||||
events *events.EventBroker
|
||||
url values.String
|
||||
element *HTMLElement
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
ScreenshotFormatPNG ScreenshotFormat = "png"
|
||||
ScreenshotFormatJPEG ScreenshotFormat = "jpeg"
|
||||
)
|
||||
|
||||
func IsScreenshotFormatValid(format string) bool {
|
||||
value := ScreenshotFormat(format)
|
||||
|
||||
return value == ScreenshotFormatPNG || value == ScreenshotFormatJPEG
|
||||
}
|
||||
|
||||
func LoadHTMLDocument(
|
||||
ctx context.Context,
|
||||
conn *rpcc.Conn,
|
||||
client *cdp.Client,
|
||||
url string,
|
||||
) (*HTMLDocument, error) {
|
||||
if conn == nil {
|
||||
@ -47,39 +69,7 @@ func LoadHTMLDocument(
|
||||
return nil, core.Error(core.ErrMissedArgument, "url")
|
||||
}
|
||||
|
||||
client := cdp.NewClient(conn)
|
||||
|
||||
err := runBatch(
|
||||
func() error {
|
||||
return client.Page.Enable(ctx)
|
||||
},
|
||||
|
||||
func() error {
|
||||
return client.Page.SetLifecycleEventsEnabled(
|
||||
ctx,
|
||||
page.NewSetLifecycleEventsEnabledArgs(true),
|
||||
)
|
||||
},
|
||||
|
||||
func() error {
|
||||
return client.DOM.Enable(ctx)
|
||||
},
|
||||
|
||||
func() error {
|
||||
return client.Runtime.Enable(ctx)
|
||||
},
|
||||
|
||||
func() error {
|
||||
return client.Emulation.SetUserAgentOverride(
|
||||
ctx,
|
||||
emulation.NewSetUserAgentOverrideArgs(uarand.GetRandom()),
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var err error
|
||||
|
||||
if url != BlankPageURL {
|
||||
err = waitForLoadEvent(ctx, client)
|
||||
@ -111,26 +101,6 @@ func LoadHTMLDocument(
|
||||
), nil
|
||||
}
|
||||
|
||||
func getRootElement(client *cdp.Client) (dom.Node, values.String, error) {
|
||||
args := dom.NewGetDocumentArgs()
|
||||
args.Depth = pointerInt(1) // lets load the entire document
|
||||
ctx := context.Background()
|
||||
|
||||
d, err := client.DOM.GetDocument(ctx, args)
|
||||
|
||||
if err != nil {
|
||||
return dom.Node{}, values.EmptyString, err
|
||||
}
|
||||
|
||||
innerHTML, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID))
|
||||
|
||||
if err != nil {
|
||||
return dom.Node{}, values.EmptyString, err
|
||||
}
|
||||
|
||||
return d.Root, values.NewString(innerHTML.OuterHTML), nil
|
||||
}
|
||||
|
||||
func NewHTMLDocument(
|
||||
logger *zerolog.Logger,
|
||||
conn *rpcc.Conn,
|
||||
@ -731,6 +701,54 @@ func (doc *HTMLDocument) Navigate(url values.String, timeout values.Int) error {
|
||||
return doc.WaitForNavigation(timeout)
|
||||
}
|
||||
|
||||
func (doc *HTMLDocument) CaptureScreenshot(params *ScreenshotArgs) (core.Value, error) {
|
||||
ctx := context.Background()
|
||||
metrics, err := doc.client.Page.GetLayoutMetrics(ctx)
|
||||
|
||||
if params.Format == ScreenshotFormatJPEG && params.Quality < 0 && params.Quality > 100 {
|
||||
params.Quality = 100
|
||||
}
|
||||
|
||||
if params.X < 0 {
|
||||
params.X = 0
|
||||
}
|
||||
|
||||
if params.Y < 0 {
|
||||
params.Y = 0
|
||||
}
|
||||
|
||||
if params.Width <= 0 {
|
||||
params.Width = float64(metrics.LayoutViewport.ClientWidth) - params.X
|
||||
}
|
||||
|
||||
if params.Height <= 0 {
|
||||
params.Height = float64(metrics.LayoutViewport.ClientHeight) - params.Y
|
||||
}
|
||||
|
||||
clip := page.Viewport{
|
||||
X: params.X,
|
||||
Y: params.Y,
|
||||
Width: params.Width,
|
||||
Height: params.Height,
|
||||
Scale: 1.0,
|
||||
}
|
||||
|
||||
format := string(params.Format)
|
||||
screenshotArgs := page.CaptureScreenshotArgs{
|
||||
Format: &format,
|
||||
Quality: ¶ms.Quality,
|
||||
Clip: &clip,
|
||||
}
|
||||
|
||||
reply, err := doc.client.Page.CaptureScreenshot(ctx, &screenshotArgs)
|
||||
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
return values.NewBinary(reply.Data), nil
|
||||
}
|
||||
|
||||
func (doc *HTMLDocument) onLoad(_ interface{}) {
|
||||
doc.Lock()
|
||||
defer doc.Unlock()
|
||||
@ -777,66 +795,3 @@ func (doc *HTMLDocument) onError(val interface{}) {
|
||||
Err(err).
|
||||
Msg("unexpected error")
|
||||
}
|
||||
|
||||
type ScreenshotFormat string
|
||||
|
||||
const (
|
||||
ScreenshotFormatPNG ScreenshotFormat = "png"
|
||||
ScreenshotFormatJPEG ScreenshotFormat = "jpeg"
|
||||
)
|
||||
|
||||
func IsScreenshotFormatValid(format string) bool {
|
||||
value := ScreenshotFormat(format)
|
||||
return value == ScreenshotFormatPNG || value == ScreenshotFormatJPEG
|
||||
}
|
||||
|
||||
type ScreenshotArgs struct {
|
||||
X float64
|
||||
Y float64
|
||||
Width float64
|
||||
Height float64
|
||||
Format ScreenshotFormat
|
||||
Quality int
|
||||
}
|
||||
|
||||
func (doc *HTMLDocument) CaptureScreenshot(params *ScreenshotArgs) (core.Value, error) {
|
||||
ctx := context.Background()
|
||||
metrics, err := doc.client.Page.GetLayoutMetrics(ctx)
|
||||
|
||||
if params.Format == ScreenshotFormatJPEG && params.Quality < 0 && params.Quality > 100 {
|
||||
params.Quality = 100
|
||||
}
|
||||
if params.X < 0 {
|
||||
params.X = 0
|
||||
}
|
||||
if params.Y < 0 {
|
||||
params.Y = 0
|
||||
}
|
||||
if params.Width <= 0 {
|
||||
params.Width = float64(metrics.LayoutViewport.ClientWidth) - params.X
|
||||
}
|
||||
if params.Height <= 0 {
|
||||
params.Height = float64(metrics.LayoutViewport.ClientHeight) - params.Y
|
||||
}
|
||||
clip := page.Viewport{
|
||||
X: params.X,
|
||||
Y: params.Y,
|
||||
Width: params.Width,
|
||||
Height: params.Height,
|
||||
Scale: 1.0,
|
||||
}
|
||||
|
||||
format := string(params.Format)
|
||||
screenshotArgs := page.CaptureScreenshotArgs{
|
||||
Format: &format,
|
||||
Quality: ¶ms.Quality,
|
||||
Clip: &clip,
|
||||
}
|
||||
|
||||
reply, err := doc.client.Page.CaptureScreenshot(ctx, &screenshotArgs)
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
return values.NewBinary(reply.Data), nil
|
||||
}
|
||||
|
@ -3,8 +3,11 @@ package dynamic
|
||||
import (
|
||||
"context"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/corpix/uarand"
|
||||
"github.com/mafredri/cdp"
|
||||
"github.com/mafredri/cdp/devtool"
|
||||
"github.com/mafredri/cdp/protocol/emulation"
|
||||
"github.com/mafredri/cdp/protocol/page"
|
||||
"github.com/mafredri/cdp/protocol/target"
|
||||
"github.com/mafredri/cdp/rpcc"
|
||||
"github.com/mafredri/cdp/session"
|
||||
@ -19,11 +22,17 @@ type Driver struct {
|
||||
client *cdp.Client
|
||||
session *session.Manager
|
||||
contextID target.BrowserContextID
|
||||
opts *Options
|
||||
}
|
||||
|
||||
func NewDriver(address string) *Driver {
|
||||
func NewDriver(address string, opts ...Option) *Driver {
|
||||
drv := new(Driver)
|
||||
drv.dev = devtool.New(address)
|
||||
drv.opts = new(Options)
|
||||
|
||||
for _, opt := range opts {
|
||||
opt(drv.opts)
|
||||
}
|
||||
|
||||
return drv
|
||||
}
|
||||
@ -60,7 +69,37 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return LoadHTMLDocument(ctx, conn, url)
|
||||
client := cdp.NewClient(conn)
|
||||
|
||||
err = runBatch(
|
||||
func() error {
|
||||
return client.Page.Enable(ctx)
|
||||
},
|
||||
|
||||
func() error {
|
||||
return client.Page.SetLifecycleEventsEnabled(
|
||||
ctx,
|
||||
page.NewSetLifecycleEventsEnabledArgs(true),
|
||||
)
|
||||
},
|
||||
|
||||
func() error {
|
||||
return client.DOM.Enable(ctx)
|
||||
},
|
||||
|
||||
func() error {
|
||||
return client.Runtime.Enable(ctx)
|
||||
},
|
||||
|
||||
func() error {
|
||||
return client.Emulation.SetUserAgentOverride(
|
||||
ctx,
|
||||
emulation.NewSetUserAgentOverrideArgs(uarand.GetRandom()),
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
return LoadHTMLDocument(ctx, conn, client, url)
|
||||
}
|
||||
|
||||
func (drv *Driver) Close() error {
|
@ -29,6 +29,26 @@ func runBatch(funcs ...batchFunc) error {
|
||||
return eg.Wait()
|
||||
}
|
||||
|
||||
func getRootElement(client *cdp.Client) (dom.Node, values.String, error) {
|
||||
args := dom.NewGetDocumentArgs()
|
||||
args.Depth = pointerInt(1) // lets load the entire document
|
||||
ctx := context.Background()
|
||||
|
||||
d, err := client.DOM.GetDocument(ctx, args)
|
||||
|
||||
if err != nil {
|
||||
return dom.Node{}, values.EmptyString, err
|
||||
}
|
||||
|
||||
innerHTML, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID))
|
||||
|
||||
if err != nil {
|
||||
return dom.Node{}, values.EmptyString, err
|
||||
}
|
||||
|
||||
return d.Root, values.NewString(innerHTML.OuterHTML), nil
|
||||
}
|
||||
|
||||
func parseAttrs(attrs []string) *values.Object {
|
||||
var attr values.String
|
||||
|
||||
|
15
pkg/stdlib/html/driver/dynamic/options.go
Normal file
15
pkg/stdlib/html/driver/dynamic/options.go
Normal file
@ -0,0 +1,15 @@
|
||||
package dynamic
|
||||
|
||||
type (
|
||||
Options struct {
|
||||
proxy string
|
||||
}
|
||||
|
||||
Option func(opts *Options)
|
||||
)
|
||||
|
||||
func WithProxy(address string) Option {
|
||||
return func(opts *Options) {
|
||||
opts.proxy = address
|
||||
}
|
||||
}
|
@ -8,29 +8,62 @@ import (
|
||||
"github.com/corpix/uarand"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sethgrid/pester"
|
||||
httpx "net/http"
|
||||
"net/http"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
type Driver struct {
|
||||
client *pester.Client
|
||||
client *pester.Client
|
||||
options *Options
|
||||
}
|
||||
|
||||
func NewDriver(setters ...Option) *Driver {
|
||||
client := pester.New()
|
||||
client.Concurrency = 3
|
||||
client.MaxRetries = 5
|
||||
client.Backoff = pester.ExponentialBackoff
|
||||
|
||||
for _, setter := range setters {
|
||||
setter(client)
|
||||
func NewDriver(opts ...Option) *Driver {
|
||||
drv := new(Driver)
|
||||
drv.options = &Options{
|
||||
concurrency: 3,
|
||||
maxRetries: 5,
|
||||
backoff: pester.ExponentialBackoff,
|
||||
}
|
||||
|
||||
return &Driver{client}
|
||||
for _, opt := range opts {
|
||||
opt(drv.options)
|
||||
}
|
||||
|
||||
if drv.options.proxy == "" {
|
||||
drv.client = pester.New()
|
||||
} else {
|
||||
client, err := newClientWithProxy(drv.options)
|
||||
|
||||
if err != nil {
|
||||
drv.client = pester.New()
|
||||
} else {
|
||||
drv.client = pester.NewExtendedClient(client)
|
||||
}
|
||||
}
|
||||
|
||||
drv.client.Concurrency = drv.options.concurrency
|
||||
drv.client.MaxRetries = drv.options.maxRetries
|
||||
drv.client.Backoff = drv.options.backoff
|
||||
|
||||
return drv
|
||||
}
|
||||
|
||||
func newClientWithProxy(options *Options) (*http.Client, error) {
|
||||
proxyURL, err := url.Parse(options.proxy)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
proxy := http.ProxyURL(proxyURL)
|
||||
tr := &http.Transport{Proxy: proxy}
|
||||
|
||||
return &http.Client{Transport: tr}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) {
|
||||
url := targetURL.String()
|
||||
req, err := httpx.NewRequest(httpx.MethodGet, url, nil)
|
||||
u := targetURL.String()
|
||||
req, err := http.NewRequest(http.MethodGet, u, nil)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -45,7 +78,7 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values
|
||||
resp, err := d.client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to retrieve a document %s", url)
|
||||
return nil, errors.Wrapf(err, "failed to retrieve a document %s", u)
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
@ -53,10 +86,10 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to parse a document %s", url)
|
||||
return nil, errors.Wrapf(err, "failed to parse a document %s", u)
|
||||
}
|
||||
|
||||
return NewHTMLDocument(url, doc)
|
||||
return NewHTMLDocument(u, doc)
|
||||
}
|
||||
|
||||
func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) {
|
@ -1,37 +1,51 @@
|
||||
package static
|
||||
|
||||
import "github.com/sethgrid/pester"
|
||||
import (
|
||||
"github.com/sethgrid/pester"
|
||||
)
|
||||
|
||||
type (
|
||||
Option func(opts *pester.Client)
|
||||
Option func(opts *Options)
|
||||
Options struct {
|
||||
backoff pester.BackoffStrategy
|
||||
maxRetries int
|
||||
concurrency int
|
||||
proxy string
|
||||
}
|
||||
)
|
||||
|
||||
func WithDefaultBackoff() Option {
|
||||
return func(opts *pester.Client) {
|
||||
opts.Backoff = pester.DefaultBackoff
|
||||
return func(opts *Options) {
|
||||
opts.backoff = pester.DefaultBackoff
|
||||
}
|
||||
}
|
||||
|
||||
func WithLinearBackoff() Option {
|
||||
return func(opts *pester.Client) {
|
||||
opts.Backoff = pester.LinearBackoff
|
||||
return func(opts *Options) {
|
||||
opts.backoff = pester.LinearBackoff
|
||||
}
|
||||
}
|
||||
|
||||
func WithExponentialBackoff() Option {
|
||||
return func(opts *pester.Client) {
|
||||
opts.Backoff = pester.ExponentialBackoff
|
||||
return func(opts *Options) {
|
||||
opts.backoff = pester.ExponentialBackoff
|
||||
}
|
||||
}
|
||||
|
||||
func WithMaxRetries(value int) Option {
|
||||
return func(opts *pester.Client) {
|
||||
opts.MaxRetries = value
|
||||
return func(opts *Options) {
|
||||
opts.maxRetries = value
|
||||
}
|
||||
}
|
||||
|
||||
func WithConcurrency(value int) Option {
|
||||
return func(opts *pester.Client) {
|
||||
opts.Concurrency = value
|
||||
return func(opts *Options) {
|
||||
opts.concurrency = value
|
||||
}
|
||||
}
|
||||
|
||||
func WithProxy(address string) Option {
|
||||
return func(opts *Options) {
|
||||
opts.proxy = address
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user