1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-08-13 19:52:52 +02:00

Bug/#31 ua (#73)

* Made custom and random UA optional

* Added pirvate context keys
This commit is contained in:
Tim Voronov
2018-10-07 22:18:57 -04:00
committed by GitHub
parent eaf60b6846
commit a3d3fe727d
12 changed files with 159 additions and 31 deletions

View File

@@ -57,6 +57,7 @@ func Exec(query string, opts Options) {
runtime.WithLogLevel(logging.DebugLevel), runtime.WithLogLevel(logging.DebugLevel),
runtime.WithParams(opts.Params), runtime.WithParams(opts.Params),
runtime.WithProxy(opts.Proxy), runtime.WithProxy(opts.Proxy),
runtime.WithUserAgent(opts.UserAgent),
) )
if err != nil { if err != nil {

View File

@@ -1,7 +1,8 @@
package cli package cli
type Options struct { type Options struct {
Cdp string Cdp string
Params map[string]interface{} Params map[string]interface{}
Proxy string Proxy string
UserAgent string
} }

View File

@@ -99,6 +99,7 @@ func Repl(version string, opts Options) {
runtime.WithLogLevel(logging.DebugLevel), runtime.WithLogLevel(logging.DebugLevel),
runtime.WithParams(opts.Params), runtime.WithParams(opts.Params),
runtime.WithProxy(opts.Proxy), runtime.WithProxy(opts.Proxy),
runtime.WithUserAgent(opts.UserAgent),
) )
timer.Stop() timer.Stop()

13
main.go
View File

@@ -82,6 +82,12 @@ var (
"", "",
"address of proxy server to use (only applicable for static pages)", "address of proxy server to use (only applicable for static pages)",
) )
userAgent = flag.String(
"user-agent",
"",
"set custom user agent. '*' triggers UA generation",
)
) )
func main() { func main() {
@@ -141,9 +147,10 @@ func main() {
} }
opts := cli.Options{ opts := cli.Options{
Cdp: cdpConn, Cdp: cdpConn,
Params: p, Params: p,
Proxy: *proxyAddress, Proxy: *proxyAddress,
UserAgent: *userAgent,
} }
stat, _ := os.Stdin.Stat() stat, _ := os.Stdin.Stat()

19
pkg/html/common/ua.go Normal file
View File

@@ -0,0 +1,19 @@
package common
import (
"github.com/MontFerret/ferret/pkg/runtime/env"
"github.com/corpix/uarand"
)
func GetUserAgent(val string) string {
if val == "" {
return val
}
if val != env.RandomUserAgent {
return val
}
// TODO: Change the implementation
return uarand.GetRandom()
}

View File

@@ -10,11 +10,15 @@ import (
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
) )
type Name string type (
DriverName string
dynamicCtxKey struct{}
staticCtxKey struct{}
)
const ( const (
Dynamic Name = "dynamic" Dynamic DriverName = "dynamic"
Static Name = "static" Static DriverName = "static"
) )
type Driver interface { type Driver interface {
@@ -22,12 +26,34 @@ type Driver interface {
Close() error Close() error
} }
func ToContext(ctx context.Context, name Name, drv Driver) context.Context { func ToContext(ctx context.Context, name DriverName, drv Driver) context.Context {
return context.WithValue(ctx, name, drv) var key interface{}
switch name {
case Dynamic:
key = dynamicCtxKey{}
case Static:
key = staticCtxKey{}
default:
return ctx
}
return context.WithValue(ctx, key, drv)
} }
func FromContext(ctx context.Context, name Name) (Driver, error) { func FromContext(ctx context.Context, name DriverName) (Driver, error) {
val := ctx.Value(name) var key interface{}
switch name {
case Dynamic:
key = dynamicCtxKey{}
case Static:
key = staticCtxKey{}
default:
return nil, core.Error(core.ErrInvalidArgument, fmt.Sprintf("%s driver", name))
}
val := ctx.Value(key)
drv, ok := val.(Driver) drv, ok := val.(Driver)
@@ -43,10 +69,11 @@ func WithDynamicDriver(ctx context.Context) context.Context {
return context.WithValue( return context.WithValue(
ctx, ctx,
Dynamic, dynamicCtxKey{},
dynamic.NewDriver( dynamic.NewDriver(
e.CDPAddress, e.CDPAddress,
dynamic.WithProxy(e.ProxyAddress), dynamic.WithProxy(e.ProxyAddress),
dynamic.WithUserAgent(e.UserAgent),
), ),
) )
} }
@@ -56,9 +83,10 @@ func WithStaticDriver(ctx context.Context) context.Context {
return context.WithValue( return context.WithValue(
ctx, ctx,
Static, staticCtxKey{},
static.NewDriver( static.NewDriver(
static.WithProxy(e.ProxyAddress), static.WithProxy(e.ProxyAddress),
static.WithUserAgent(e.UserAgent),
), ),
) )
} }

View File

@@ -2,8 +2,9 @@ package dynamic
import ( import (
"context" "context"
"github.com/MontFerret/ferret/pkg/html/common"
"github.com/MontFerret/ferret/pkg/runtime/logging"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/corpix/uarand"
"github.com/mafredri/cdp" "github.com/mafredri/cdp"
"github.com/mafredri/cdp/devtool" "github.com/mafredri/cdp/devtool"
"github.com/mafredri/cdp/protocol/emulation" "github.com/mafredri/cdp/protocol/emulation"
@@ -22,25 +23,33 @@ type Driver struct {
client *cdp.Client client *cdp.Client
session *session.Manager session *session.Manager
contextID target.BrowserContextID contextID target.BrowserContextID
opts *Options options *Options
} }
func NewDriver(address string, opts ...Option) *Driver { func NewDriver(address string, opts ...Option) *Driver {
drv := new(Driver) drv := new(Driver)
drv.dev = devtool.New(address) drv.dev = devtool.New(address)
drv.opts = new(Options) drv.options = new(Options)
for _, opt := range opts { for _, opt := range opts {
opt(drv.opts) opt(drv.options)
} }
return drv return drv
} }
func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (values.HTMLNode, error) { func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (values.HTMLNode, error) {
logger := logging.FromContext(ctx)
err := drv.init(ctx) err := drv.init(ctx)
if err != nil { if err != nil {
logger.
Error().
Err(err).
Str("driver", "dynamic").
Msg("failed to initialize the driver")
return nil, err return nil, err
} }
@@ -59,6 +68,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
createTarget, err := drv.client.Target.CreateTarget(ctx, createTargetArgs) createTarget, err := drv.client.Target.CreateTarget(ctx, createTargetArgs)
if err != nil { if err != nil {
logger.
Error().
Err(err).
Str("driver", "dynamic").
Msg("failed to create a browser target")
return nil, err return nil, err
} }
@@ -66,6 +81,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
conn, err := drv.session.Dial(ctx, createTarget.TargetID) conn, err := drv.session.Dial(ctx, createTarget.TargetID)
if err != nil { if err != nil {
logger.
Error().
Err(err).
Str("driver", "dynamic").
Msg("failed to establish a connection")
return nil, err return nil, err
} }
@@ -92,9 +113,21 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
}, },
func() error { func() error {
ua := common.GetUserAgent(drv.options.userAgent)
logger.
Debug().
Str("user-agent", ua).
Msg("using User-Agent")
// do not use custom user agent
if ua == "" {
return nil
}
return client.Emulation.SetUserAgentOverride( return client.Emulation.SetUserAgentOverride(
ctx, ctx,
emulation.NewSetUserAgentOverrideArgs(uarand.GetRandom()), emulation.NewSetUserAgentOverrideArgs(ua),
) )
}, },
) )

View File

@@ -2,7 +2,8 @@ package dynamic
type ( type (
Options struct { Options struct {
proxy string proxy string
userAgent string
} }
Option func(opts *Options) Option func(opts *Options)
@@ -13,3 +14,9 @@ func WithProxy(address string) Option {
opts.proxy = address opts.proxy = address
} }
} }
func WithUserAgent(value string) Option {
return func(opts *Options) {
opts.userAgent = value
}
}

View File

@@ -3,6 +3,7 @@ package static
import ( import (
"bytes" "bytes"
"context" "context"
"github.com/MontFerret/ferret/pkg/html/common"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/corpix/uarand" "github.com/corpix/uarand"
@@ -61,7 +62,7 @@ func newClientWithProxy(options *Options) (*http.Client, error) {
return &http.Client{Transport: tr}, nil return &http.Client{Transport: tr}, nil
} }
func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) { func (drv *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) {
u := targetURL.String() u := targetURL.String()
req, err := http.NewRequest(http.MethodGet, u, nil) req, err := http.NewRequest(http.MethodGet, u, nil)
@@ -73,9 +74,15 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values
req.Header.Set("Accept-Language", "en-US,en;q=0.9,ru;q=0.8") req.Header.Set("Accept-Language", "en-US,en;q=0.9,ru;q=0.8")
req.Header.Set("Cache-Control", "no-cache") req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("Pragma", "no-cache") req.Header.Set("Pragma", "no-cache")
req.Header.Set("User-Agent", uarand.GetRandom())
resp, err := d.client.Do(req) ua := common.GetUserAgent(drv.options.userAgent)
// use custom user agent
if ua != "" {
req.Header.Set("User-Agent", uarand.GetRandom())
}
resp, err := drv.client.Do(req)
if err != nil { if err != nil {
return nil, errors.Wrapf(err, "failed to retrieve a document %s", u) return nil, errors.Wrapf(err, "failed to retrieve a document %s", u)
@@ -92,7 +99,7 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values
return NewHTMLDocument(u, doc) return NewHTMLDocument(u, doc)
} }
func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) { func (drv *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) {
buf := bytes.NewBuffer([]byte(str)) buf := bytes.NewBuffer([]byte(str))
doc, err := goquery.NewDocumentFromReader(buf) doc, err := goquery.NewDocumentFromReader(buf)
@@ -104,8 +111,8 @@ func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTM
return NewHTMLDocument("#string", doc) return NewHTMLDocument("#string", doc)
} }
func (d *Driver) Close() error { func (drv *Driver) Close() error {
d.client = nil drv.client = nil
return nil return nil
} }

View File

@@ -11,6 +11,7 @@ type (
maxRetries int maxRetries int
concurrency int concurrency int
proxy string proxy string
userAgent string
} }
) )
@@ -49,3 +50,9 @@ func WithProxy(address string) Option {
opts.proxy = address opts.proxy = address
} }
} }
func WithUserAgent(value string) Option {
return func(opts *Options) {
opts.userAgent = value
}
}

View File

@@ -8,9 +8,12 @@ type (
Environment struct { Environment struct {
CDPAddress string CDPAddress string
ProxyAddress string ProxyAddress string
UserAgent string
} }
) )
const RandomUserAgent = "*"
func WithContext(ctx context.Context, e Environment) context.Context { func WithContext(ctx context.Context, e Environment) context.Context {
return context.WithValue(ctx, ctxKey{}, e) return context.WithValue(ctx, ctxKey{}, e)
} }

View File

@@ -12,10 +12,11 @@ import (
type ( type (
Options struct { Options struct {
proxy string proxy string
cdp string cdp string
params map[string]core.Value params map[string]core.Value
logging *logging.Options logging *logging.Options
userAgent string
} }
Option func(*Options) Option func(*Options)
@@ -58,6 +59,18 @@ func WithProxy(address string) Option {
} }
} }
func WithUserAgent(value string) Option {
return func(options *Options) {
options.userAgent = value
}
}
func WithRandomUserAgent() Option {
return func(options *Options) {
options.userAgent = env.RandomUserAgent
}
}
func WithLog(writer io.Writer) Option { func WithLog(writer io.Writer) Option {
return func(options *Options) { return func(options *Options) {
options.logging.Writer = writer options.logging.Writer = writer
@@ -76,6 +89,7 @@ func (opts *Options) withContext(parent context.Context) context.Context {
ctx = env.WithContext(ctx, env.Environment{ ctx = env.WithContext(ctx, env.Environment{
CDPAddress: opts.cdp, CDPAddress: opts.cdp,
ProxyAddress: opts.proxy, ProxyAddress: opts.proxy,
UserAgent: opts.userAgent,
}) })
return ctx return ctx