1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-08-13 19:52:52 +02:00

Bug/#31 ua (#73)

* Made custom and random UA optional

* Added pirvate context keys
This commit is contained in:
Tim Voronov
2018-10-07 22:18:57 -04:00
committed by GitHub
parent eaf60b6846
commit a3d3fe727d
12 changed files with 159 additions and 31 deletions

View File

@@ -57,6 +57,7 @@ func Exec(query string, opts Options) {
runtime.WithLogLevel(logging.DebugLevel),
runtime.WithParams(opts.Params),
runtime.WithProxy(opts.Proxy),
runtime.WithUserAgent(opts.UserAgent),
)
if err != nil {

View File

@@ -1,7 +1,8 @@
package cli
type Options struct {
Cdp string
Params map[string]interface{}
Proxy string
Cdp string
Params map[string]interface{}
Proxy string
UserAgent string
}

View File

@@ -99,6 +99,7 @@ func Repl(version string, opts Options) {
runtime.WithLogLevel(logging.DebugLevel),
runtime.WithParams(opts.Params),
runtime.WithProxy(opts.Proxy),
runtime.WithUserAgent(opts.UserAgent),
)
timer.Stop()

13
main.go
View File

@@ -82,6 +82,12 @@ var (
"",
"address of proxy server to use (only applicable for static pages)",
)
userAgent = flag.String(
"user-agent",
"",
"set custom user agent. '*' triggers UA generation",
)
)
func main() {
@@ -141,9 +147,10 @@ func main() {
}
opts := cli.Options{
Cdp: cdpConn,
Params: p,
Proxy: *proxyAddress,
Cdp: cdpConn,
Params: p,
Proxy: *proxyAddress,
UserAgent: *userAgent,
}
stat, _ := os.Stdin.Stat()

19
pkg/html/common/ua.go Normal file
View File

@@ -0,0 +1,19 @@
package common
import (
"github.com/MontFerret/ferret/pkg/runtime/env"
"github.com/corpix/uarand"
)
func GetUserAgent(val string) string {
if val == "" {
return val
}
if val != env.RandomUserAgent {
return val
}
// TODO: Change the implementation
return uarand.GetRandom()
}

View File

@@ -10,11 +10,15 @@ import (
"github.com/MontFerret/ferret/pkg/runtime/values"
)
type Name string
type (
DriverName string
dynamicCtxKey struct{}
staticCtxKey struct{}
)
const (
Dynamic Name = "dynamic"
Static Name = "static"
Dynamic DriverName = "dynamic"
Static DriverName = "static"
)
type Driver interface {
@@ -22,12 +26,34 @@ type Driver interface {
Close() error
}
func ToContext(ctx context.Context, name Name, drv Driver) context.Context {
return context.WithValue(ctx, name, drv)
func ToContext(ctx context.Context, name DriverName, drv Driver) context.Context {
var key interface{}
switch name {
case Dynamic:
key = dynamicCtxKey{}
case Static:
key = staticCtxKey{}
default:
return ctx
}
return context.WithValue(ctx, key, drv)
}
func FromContext(ctx context.Context, name Name) (Driver, error) {
val := ctx.Value(name)
func FromContext(ctx context.Context, name DriverName) (Driver, error) {
var key interface{}
switch name {
case Dynamic:
key = dynamicCtxKey{}
case Static:
key = staticCtxKey{}
default:
return nil, core.Error(core.ErrInvalidArgument, fmt.Sprintf("%s driver", name))
}
val := ctx.Value(key)
drv, ok := val.(Driver)
@@ -43,10 +69,11 @@ func WithDynamicDriver(ctx context.Context) context.Context {
return context.WithValue(
ctx,
Dynamic,
dynamicCtxKey{},
dynamic.NewDriver(
e.CDPAddress,
dynamic.WithProxy(e.ProxyAddress),
dynamic.WithUserAgent(e.UserAgent),
),
)
}
@@ -56,9 +83,10 @@ func WithStaticDriver(ctx context.Context) context.Context {
return context.WithValue(
ctx,
Static,
staticCtxKey{},
static.NewDriver(
static.WithProxy(e.ProxyAddress),
static.WithUserAgent(e.UserAgent),
),
)
}

View File

@@ -2,8 +2,9 @@ package dynamic
import (
"context"
"github.com/MontFerret/ferret/pkg/html/common"
"github.com/MontFerret/ferret/pkg/runtime/logging"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/corpix/uarand"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/devtool"
"github.com/mafredri/cdp/protocol/emulation"
@@ -22,25 +23,33 @@ type Driver struct {
client *cdp.Client
session *session.Manager
contextID target.BrowserContextID
opts *Options
options *Options
}
func NewDriver(address string, opts ...Option) *Driver {
drv := new(Driver)
drv.dev = devtool.New(address)
drv.opts = new(Options)
drv.options = new(Options)
for _, opt := range opts {
opt(drv.opts)
opt(drv.options)
}
return drv
}
func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (values.HTMLNode, error) {
logger := logging.FromContext(ctx)
err := drv.init(ctx)
if err != nil {
logger.
Error().
Err(err).
Str("driver", "dynamic").
Msg("failed to initialize the driver")
return nil, err
}
@@ -59,6 +68,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
createTarget, err := drv.client.Target.CreateTarget(ctx, createTargetArgs)
if err != nil {
logger.
Error().
Err(err).
Str("driver", "dynamic").
Msg("failed to create a browser target")
return nil, err
}
@@ -66,6 +81,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
conn, err := drv.session.Dial(ctx, createTarget.TargetID)
if err != nil {
logger.
Error().
Err(err).
Str("driver", "dynamic").
Msg("failed to establish a connection")
return nil, err
}
@@ -92,9 +113,21 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
},
func() error {
ua := common.GetUserAgent(drv.options.userAgent)
logger.
Debug().
Str("user-agent", ua).
Msg("using User-Agent")
// do not use custom user agent
if ua == "" {
return nil
}
return client.Emulation.SetUserAgentOverride(
ctx,
emulation.NewSetUserAgentOverrideArgs(uarand.GetRandom()),
emulation.NewSetUserAgentOverrideArgs(ua),
)
},
)

View File

@@ -2,7 +2,8 @@ package dynamic
type (
Options struct {
proxy string
proxy string
userAgent string
}
Option func(opts *Options)
@@ -13,3 +14,9 @@ func WithProxy(address string) Option {
opts.proxy = address
}
}
func WithUserAgent(value string) Option {
return func(opts *Options) {
opts.userAgent = value
}
}

View File

@@ -3,6 +3,7 @@ package static
import (
"bytes"
"context"
"github.com/MontFerret/ferret/pkg/html/common"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/PuerkitoBio/goquery"
"github.com/corpix/uarand"
@@ -61,7 +62,7 @@ func newClientWithProxy(options *Options) (*http.Client, error) {
return &http.Client{Transport: tr}, nil
}
func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) {
func (drv *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) {
u := targetURL.String()
req, err := http.NewRequest(http.MethodGet, u, nil)
@@ -73,9 +74,15 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values
req.Header.Set("Accept-Language", "en-US,en;q=0.9,ru;q=0.8")
req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("Pragma", "no-cache")
req.Header.Set("User-Agent", uarand.GetRandom())
resp, err := d.client.Do(req)
ua := common.GetUserAgent(drv.options.userAgent)
// use custom user agent
if ua != "" {
req.Header.Set("User-Agent", uarand.GetRandom())
}
resp, err := drv.client.Do(req)
if err != nil {
return nil, errors.Wrapf(err, "failed to retrieve a document %s", u)
@@ -92,7 +99,7 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values
return NewHTMLDocument(u, doc)
}
func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) {
func (drv *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) {
buf := bytes.NewBuffer([]byte(str))
doc, err := goquery.NewDocumentFromReader(buf)
@@ -104,8 +111,8 @@ func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTM
return NewHTMLDocument("#string", doc)
}
func (d *Driver) Close() error {
d.client = nil
func (drv *Driver) Close() error {
drv.client = nil
return nil
}

View File

@@ -11,6 +11,7 @@ type (
maxRetries int
concurrency int
proxy string
userAgent string
}
)
@@ -49,3 +50,9 @@ func WithProxy(address string) Option {
opts.proxy = address
}
}
func WithUserAgent(value string) Option {
return func(opts *Options) {
opts.userAgent = value
}
}

View File

@@ -8,9 +8,12 @@ type (
Environment struct {
CDPAddress string
ProxyAddress string
UserAgent string
}
)
const RandomUserAgent = "*"
func WithContext(ctx context.Context, e Environment) context.Context {
return context.WithValue(ctx, ctxKey{}, e)
}

View File

@@ -12,10 +12,11 @@ import (
type (
Options struct {
proxy string
cdp string
params map[string]core.Value
logging *logging.Options
proxy string
cdp string
params map[string]core.Value
logging *logging.Options
userAgent string
}
Option func(*Options)
@@ -58,6 +59,18 @@ func WithProxy(address string) Option {
}
}
func WithUserAgent(value string) Option {
return func(options *Options) {
options.userAgent = value
}
}
func WithRandomUserAgent() Option {
return func(options *Options) {
options.userAgent = env.RandomUserAgent
}
}
func WithLog(writer io.Writer) Option {
return func(options *Options) {
options.logging.Writer = writer
@@ -76,6 +89,7 @@ func (opts *Options) withContext(parent context.Context) context.Context {
ctx = env.WithContext(ctx, env.Environment{
CDPAddress: opts.cdp,
ProxyAddress: opts.proxy,
UserAgent: opts.userAgent,
})
return ctx