diff --git a/cli/exec.go b/cli/exec.go index 142c310d..855ba9a2 100644 --- a/cli/exec.go +++ b/cli/exec.go @@ -57,6 +57,7 @@ func Exec(query string, opts Options) { runtime.WithLogLevel(logging.DebugLevel), runtime.WithParams(opts.Params), runtime.WithProxy(opts.Proxy), + runtime.WithUserAgent(opts.UserAgent), ) if err != nil { diff --git a/cli/options.go b/cli/options.go index 58bc3a9d..791a05de 100644 --- a/cli/options.go +++ b/cli/options.go @@ -1,7 +1,8 @@ package cli type Options struct { - Cdp string - Params map[string]interface{} - Proxy string + Cdp string + Params map[string]interface{} + Proxy string + UserAgent string } diff --git a/cli/repl.go b/cli/repl.go index e97e7918..29f9e5f1 100644 --- a/cli/repl.go +++ b/cli/repl.go @@ -99,6 +99,7 @@ func Repl(version string, opts Options) { runtime.WithLogLevel(logging.DebugLevel), runtime.WithParams(opts.Params), runtime.WithProxy(opts.Proxy), + runtime.WithUserAgent(opts.UserAgent), ) timer.Stop() diff --git a/main.go b/main.go index 0a5d3965..f7f6935a 100644 --- a/main.go +++ b/main.go @@ -82,6 +82,12 @@ var ( "", "address of proxy server to use (only applicable for static pages)", ) + + userAgent = flag.String( + "user-agent", + "", + "set custom user agent. '*' triggers UA generation", + ) ) func main() { @@ -141,9 +147,10 @@ func main() { } opts := cli.Options{ - Cdp: cdpConn, - Params: p, - Proxy: *proxyAddress, + Cdp: cdpConn, + Params: p, + Proxy: *proxyAddress, + UserAgent: *userAgent, } stat, _ := os.Stdin.Stat() diff --git a/pkg/html/common/ua.go b/pkg/html/common/ua.go new file mode 100644 index 00000000..6e0bd82c --- /dev/null +++ b/pkg/html/common/ua.go @@ -0,0 +1,19 @@ +package common + +import ( + "github.com/MontFerret/ferret/pkg/runtime/env" + "github.com/corpix/uarand" +) + +func GetUserAgent(val string) string { + if val == "" { + return val + } + + if val != env.RandomUserAgent { + return val + } + + // TODO: Change the implementation + return uarand.GetRandom() +} diff --git a/pkg/html/driver.go b/pkg/html/driver.go index 870c7726..b6d1c055 100644 --- a/pkg/html/driver.go +++ b/pkg/html/driver.go @@ -10,11 +10,15 @@ import ( "github.com/MontFerret/ferret/pkg/runtime/values" ) -type Name string +type ( + DriverName string + dynamicCtxKey struct{} + staticCtxKey struct{} +) const ( - Dynamic Name = "dynamic" - Static Name = "static" + Dynamic DriverName = "dynamic" + Static DriverName = "static" ) type Driver interface { @@ -22,12 +26,34 @@ type Driver interface { Close() error } -func ToContext(ctx context.Context, name Name, drv Driver) context.Context { - return context.WithValue(ctx, name, drv) +func ToContext(ctx context.Context, name DriverName, drv Driver) context.Context { + var key interface{} + + switch name { + case Dynamic: + key = dynamicCtxKey{} + case Static: + key = staticCtxKey{} + default: + return ctx + } + + return context.WithValue(ctx, key, drv) } -func FromContext(ctx context.Context, name Name) (Driver, error) { - val := ctx.Value(name) +func FromContext(ctx context.Context, name DriverName) (Driver, error) { + var key interface{} + + switch name { + case Dynamic: + key = dynamicCtxKey{} + case Static: + key = staticCtxKey{} + default: + return nil, core.Error(core.ErrInvalidArgument, fmt.Sprintf("%s driver", name)) + } + + val := ctx.Value(key) drv, ok := val.(Driver) @@ -43,10 +69,11 @@ func WithDynamicDriver(ctx context.Context) context.Context { return context.WithValue( ctx, - Dynamic, + dynamicCtxKey{}, dynamic.NewDriver( e.CDPAddress, dynamic.WithProxy(e.ProxyAddress), + dynamic.WithUserAgent(e.UserAgent), ), ) } @@ -56,9 +83,10 @@ func WithStaticDriver(ctx context.Context) context.Context { return context.WithValue( ctx, - Static, + staticCtxKey{}, static.NewDriver( static.WithProxy(e.ProxyAddress), + static.WithUserAgent(e.UserAgent), ), ) } diff --git a/pkg/html/dynamic/driver.go b/pkg/html/dynamic/driver.go index 758c254e..765cdd24 100644 --- a/pkg/html/dynamic/driver.go +++ b/pkg/html/dynamic/driver.go @@ -2,8 +2,9 @@ package dynamic import ( "context" + "github.com/MontFerret/ferret/pkg/html/common" + "github.com/MontFerret/ferret/pkg/runtime/logging" "github.com/MontFerret/ferret/pkg/runtime/values" - "github.com/corpix/uarand" "github.com/mafredri/cdp" "github.com/mafredri/cdp/devtool" "github.com/mafredri/cdp/protocol/emulation" @@ -22,25 +23,33 @@ type Driver struct { client *cdp.Client session *session.Manager contextID target.BrowserContextID - opts *Options + options *Options } func NewDriver(address string, opts ...Option) *Driver { drv := new(Driver) drv.dev = devtool.New(address) - drv.opts = new(Options) + drv.options = new(Options) for _, opt := range opts { - opt(drv.opts) + opt(drv.options) } return drv } func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (values.HTMLNode, error) { + logger := logging.FromContext(ctx) + err := drv.init(ctx) if err != nil { + logger. + Error(). + Err(err). + Str("driver", "dynamic"). + Msg("failed to initialize the driver") + return nil, err } @@ -59,6 +68,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va createTarget, err := drv.client.Target.CreateTarget(ctx, createTargetArgs) if err != nil { + logger. + Error(). + Err(err). + Str("driver", "dynamic"). + Msg("failed to create a browser target") + return nil, err } @@ -66,6 +81,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va conn, err := drv.session.Dial(ctx, createTarget.TargetID) if err != nil { + logger. + Error(). + Err(err). + Str("driver", "dynamic"). + Msg("failed to establish a connection") + return nil, err } @@ -92,9 +113,21 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va }, func() error { + ua := common.GetUserAgent(drv.options.userAgent) + + logger. + Debug(). + Str("user-agent", ua). + Msg("using User-Agent") + + // do not use custom user agent + if ua == "" { + return nil + } + return client.Emulation.SetUserAgentOverride( ctx, - emulation.NewSetUserAgentOverrideArgs(uarand.GetRandom()), + emulation.NewSetUserAgentOverrideArgs(ua), ) }, ) diff --git a/pkg/html/dynamic/options.go b/pkg/html/dynamic/options.go index 67b61945..b0f3b5a1 100644 --- a/pkg/html/dynamic/options.go +++ b/pkg/html/dynamic/options.go @@ -2,7 +2,8 @@ package dynamic type ( Options struct { - proxy string + proxy string + userAgent string } Option func(opts *Options) @@ -13,3 +14,9 @@ func WithProxy(address string) Option { opts.proxy = address } } + +func WithUserAgent(value string) Option { + return func(opts *Options) { + opts.userAgent = value + } +} diff --git a/pkg/html/static/driver.go b/pkg/html/static/driver.go index ecf0fe12..1c04cf79 100644 --- a/pkg/html/static/driver.go +++ b/pkg/html/static/driver.go @@ -3,6 +3,7 @@ package static import ( "bytes" "context" + "github.com/MontFerret/ferret/pkg/html/common" "github.com/MontFerret/ferret/pkg/runtime/values" "github.com/PuerkitoBio/goquery" "github.com/corpix/uarand" @@ -61,7 +62,7 @@ func newClientWithProxy(options *Options) (*http.Client, error) { return &http.Client{Transport: tr}, nil } -func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) { +func (drv *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) { u := targetURL.String() req, err := http.NewRequest(http.MethodGet, u, nil) @@ -73,9 +74,15 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values req.Header.Set("Accept-Language", "en-US,en;q=0.9,ru;q=0.8") req.Header.Set("Cache-Control", "no-cache") req.Header.Set("Pragma", "no-cache") - req.Header.Set("User-Agent", uarand.GetRandom()) - resp, err := d.client.Do(req) + ua := common.GetUserAgent(drv.options.userAgent) + + // use custom user agent + if ua != "" { + req.Header.Set("User-Agent", uarand.GetRandom()) + } + + resp, err := drv.client.Do(req) if err != nil { return nil, errors.Wrapf(err, "failed to retrieve a document %s", u) @@ -92,7 +99,7 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values return NewHTMLDocument(u, doc) } -func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) { +func (drv *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) { buf := bytes.NewBuffer([]byte(str)) doc, err := goquery.NewDocumentFromReader(buf) @@ -104,8 +111,8 @@ func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTM return NewHTMLDocument("#string", doc) } -func (d *Driver) Close() error { - d.client = nil +func (drv *Driver) Close() error { + drv.client = nil return nil } diff --git a/pkg/html/static/options.go b/pkg/html/static/options.go index 7423367f..7559437b 100644 --- a/pkg/html/static/options.go +++ b/pkg/html/static/options.go @@ -11,6 +11,7 @@ type ( maxRetries int concurrency int proxy string + userAgent string } ) @@ -49,3 +50,9 @@ func WithProxy(address string) Option { opts.proxy = address } } + +func WithUserAgent(value string) Option { + return func(opts *Options) { + opts.userAgent = value + } +} diff --git a/pkg/runtime/env/env.go b/pkg/runtime/env/env.go index c60f8af4..37a22886 100644 --- a/pkg/runtime/env/env.go +++ b/pkg/runtime/env/env.go @@ -8,9 +8,12 @@ type ( Environment struct { CDPAddress string ProxyAddress string + UserAgent string } ) +const RandomUserAgent = "*" + func WithContext(ctx context.Context, e Environment) context.Context { return context.WithValue(ctx, ctxKey{}, e) } diff --git a/pkg/runtime/options.go b/pkg/runtime/options.go index 492fd6f4..3b443eaf 100644 --- a/pkg/runtime/options.go +++ b/pkg/runtime/options.go @@ -12,10 +12,11 @@ import ( type ( Options struct { - proxy string - cdp string - params map[string]core.Value - logging *logging.Options + proxy string + cdp string + params map[string]core.Value + logging *logging.Options + userAgent string } Option func(*Options) @@ -58,6 +59,18 @@ func WithProxy(address string) Option { } } +func WithUserAgent(value string) Option { + return func(options *Options) { + options.userAgent = value + } +} + +func WithRandomUserAgent() Option { + return func(options *Options) { + options.userAgent = env.RandomUserAgent + } +} + func WithLog(writer io.Writer) Option { return func(options *Options) { options.logging.Writer = writer @@ -76,6 +89,7 @@ func (opts *Options) withContext(parent context.Context) context.Context { ctx = env.WithContext(ctx, env.Environment{ CDPAddress: opts.cdp, ProxyAddress: opts.proxy, + UserAgent: opts.userAgent, }) return ctx