mirror of
https://github.com/MontFerret/ferret.git
synced 2025-08-13 19:52:52 +02:00
Bug/#31 ua (#73)
* Made custom and random UA optional * Added pirvate context keys
This commit is contained in:
@@ -57,6 +57,7 @@ func Exec(query string, opts Options) {
|
||||
runtime.WithLogLevel(logging.DebugLevel),
|
||||
runtime.WithParams(opts.Params),
|
||||
runtime.WithProxy(opts.Proxy),
|
||||
runtime.WithUserAgent(opts.UserAgent),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
|
@@ -1,7 +1,8 @@
|
||||
package cli
|
||||
|
||||
type Options struct {
|
||||
Cdp string
|
||||
Params map[string]interface{}
|
||||
Proxy string
|
||||
Cdp string
|
||||
Params map[string]interface{}
|
||||
Proxy string
|
||||
UserAgent string
|
||||
}
|
||||
|
@@ -99,6 +99,7 @@ func Repl(version string, opts Options) {
|
||||
runtime.WithLogLevel(logging.DebugLevel),
|
||||
runtime.WithParams(opts.Params),
|
||||
runtime.WithProxy(opts.Proxy),
|
||||
runtime.WithUserAgent(opts.UserAgent),
|
||||
)
|
||||
|
||||
timer.Stop()
|
||||
|
13
main.go
13
main.go
@@ -82,6 +82,12 @@ var (
|
||||
"",
|
||||
"address of proxy server to use (only applicable for static pages)",
|
||||
)
|
||||
|
||||
userAgent = flag.String(
|
||||
"user-agent",
|
||||
"",
|
||||
"set custom user agent. '*' triggers UA generation",
|
||||
)
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -141,9 +147,10 @@ func main() {
|
||||
}
|
||||
|
||||
opts := cli.Options{
|
||||
Cdp: cdpConn,
|
||||
Params: p,
|
||||
Proxy: *proxyAddress,
|
||||
Cdp: cdpConn,
|
||||
Params: p,
|
||||
Proxy: *proxyAddress,
|
||||
UserAgent: *userAgent,
|
||||
}
|
||||
|
||||
stat, _ := os.Stdin.Stat()
|
||||
|
19
pkg/html/common/ua.go
Normal file
19
pkg/html/common/ua.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/env"
|
||||
"github.com/corpix/uarand"
|
||||
)
|
||||
|
||||
func GetUserAgent(val string) string {
|
||||
if val == "" {
|
||||
return val
|
||||
}
|
||||
|
||||
if val != env.RandomUserAgent {
|
||||
return val
|
||||
}
|
||||
|
||||
// TODO: Change the implementation
|
||||
return uarand.GetRandom()
|
||||
}
|
@@ -10,11 +10,15 @@ import (
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
)
|
||||
|
||||
type Name string
|
||||
type (
|
||||
DriverName string
|
||||
dynamicCtxKey struct{}
|
||||
staticCtxKey struct{}
|
||||
)
|
||||
|
||||
const (
|
||||
Dynamic Name = "dynamic"
|
||||
Static Name = "static"
|
||||
Dynamic DriverName = "dynamic"
|
||||
Static DriverName = "static"
|
||||
)
|
||||
|
||||
type Driver interface {
|
||||
@@ -22,12 +26,34 @@ type Driver interface {
|
||||
Close() error
|
||||
}
|
||||
|
||||
func ToContext(ctx context.Context, name Name, drv Driver) context.Context {
|
||||
return context.WithValue(ctx, name, drv)
|
||||
func ToContext(ctx context.Context, name DriverName, drv Driver) context.Context {
|
||||
var key interface{}
|
||||
|
||||
switch name {
|
||||
case Dynamic:
|
||||
key = dynamicCtxKey{}
|
||||
case Static:
|
||||
key = staticCtxKey{}
|
||||
default:
|
||||
return ctx
|
||||
}
|
||||
|
||||
return context.WithValue(ctx, key, drv)
|
||||
}
|
||||
|
||||
func FromContext(ctx context.Context, name Name) (Driver, error) {
|
||||
val := ctx.Value(name)
|
||||
func FromContext(ctx context.Context, name DriverName) (Driver, error) {
|
||||
var key interface{}
|
||||
|
||||
switch name {
|
||||
case Dynamic:
|
||||
key = dynamicCtxKey{}
|
||||
case Static:
|
||||
key = staticCtxKey{}
|
||||
default:
|
||||
return nil, core.Error(core.ErrInvalidArgument, fmt.Sprintf("%s driver", name))
|
||||
}
|
||||
|
||||
val := ctx.Value(key)
|
||||
|
||||
drv, ok := val.(Driver)
|
||||
|
||||
@@ -43,10 +69,11 @@ func WithDynamicDriver(ctx context.Context) context.Context {
|
||||
|
||||
return context.WithValue(
|
||||
ctx,
|
||||
Dynamic,
|
||||
dynamicCtxKey{},
|
||||
dynamic.NewDriver(
|
||||
e.CDPAddress,
|
||||
dynamic.WithProxy(e.ProxyAddress),
|
||||
dynamic.WithUserAgent(e.UserAgent),
|
||||
),
|
||||
)
|
||||
}
|
||||
@@ -56,9 +83,10 @@ func WithStaticDriver(ctx context.Context) context.Context {
|
||||
|
||||
return context.WithValue(
|
||||
ctx,
|
||||
Static,
|
||||
staticCtxKey{},
|
||||
static.NewDriver(
|
||||
static.WithProxy(e.ProxyAddress),
|
||||
static.WithUserAgent(e.UserAgent),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
@@ -2,8 +2,9 @@ package dynamic
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/MontFerret/ferret/pkg/html/common"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/logging"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/corpix/uarand"
|
||||
"github.com/mafredri/cdp"
|
||||
"github.com/mafredri/cdp/devtool"
|
||||
"github.com/mafredri/cdp/protocol/emulation"
|
||||
@@ -22,25 +23,33 @@ type Driver struct {
|
||||
client *cdp.Client
|
||||
session *session.Manager
|
||||
contextID target.BrowserContextID
|
||||
opts *Options
|
||||
options *Options
|
||||
}
|
||||
|
||||
func NewDriver(address string, opts ...Option) *Driver {
|
||||
drv := new(Driver)
|
||||
drv.dev = devtool.New(address)
|
||||
drv.opts = new(Options)
|
||||
drv.options = new(Options)
|
||||
|
||||
for _, opt := range opts {
|
||||
opt(drv.opts)
|
||||
opt(drv.options)
|
||||
}
|
||||
|
||||
return drv
|
||||
}
|
||||
|
||||
func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (values.HTMLNode, error) {
|
||||
logger := logging.FromContext(ctx)
|
||||
|
||||
err := drv.init(ctx)
|
||||
|
||||
if err != nil {
|
||||
logger.
|
||||
Error().
|
||||
Err(err).
|
||||
Str("driver", "dynamic").
|
||||
Msg("failed to initialize the driver")
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -59,6 +68,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
|
||||
createTarget, err := drv.client.Target.CreateTarget(ctx, createTargetArgs)
|
||||
|
||||
if err != nil {
|
||||
logger.
|
||||
Error().
|
||||
Err(err).
|
||||
Str("driver", "dynamic").
|
||||
Msg("failed to create a browser target")
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -66,6 +81,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
|
||||
conn, err := drv.session.Dial(ctx, createTarget.TargetID)
|
||||
|
||||
if err != nil {
|
||||
logger.
|
||||
Error().
|
||||
Err(err).
|
||||
Str("driver", "dynamic").
|
||||
Msg("failed to establish a connection")
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -92,9 +113,21 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va
|
||||
},
|
||||
|
||||
func() error {
|
||||
ua := common.GetUserAgent(drv.options.userAgent)
|
||||
|
||||
logger.
|
||||
Debug().
|
||||
Str("user-agent", ua).
|
||||
Msg("using User-Agent")
|
||||
|
||||
// do not use custom user agent
|
||||
if ua == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return client.Emulation.SetUserAgentOverride(
|
||||
ctx,
|
||||
emulation.NewSetUserAgentOverrideArgs(uarand.GetRandom()),
|
||||
emulation.NewSetUserAgentOverrideArgs(ua),
|
||||
)
|
||||
},
|
||||
)
|
||||
|
@@ -2,7 +2,8 @@ package dynamic
|
||||
|
||||
type (
|
||||
Options struct {
|
||||
proxy string
|
||||
proxy string
|
||||
userAgent string
|
||||
}
|
||||
|
||||
Option func(opts *Options)
|
||||
@@ -13,3 +14,9 @@ func WithProxy(address string) Option {
|
||||
opts.proxy = address
|
||||
}
|
||||
}
|
||||
|
||||
func WithUserAgent(value string) Option {
|
||||
return func(opts *Options) {
|
||||
opts.userAgent = value
|
||||
}
|
||||
}
|
||||
|
@@ -3,6 +3,7 @@ package static
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"github.com/MontFerret/ferret/pkg/html/common"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/corpix/uarand"
|
||||
@@ -61,7 +62,7 @@ func newClientWithProxy(options *Options) (*http.Client, error) {
|
||||
return &http.Client{Transport: tr}, nil
|
||||
}
|
||||
|
||||
func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) {
|
||||
func (drv *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) {
|
||||
u := targetURL.String()
|
||||
req, err := http.NewRequest(http.MethodGet, u, nil)
|
||||
|
||||
@@ -73,9 +74,15 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.9,ru;q=0.8")
|
||||
req.Header.Set("Cache-Control", "no-cache")
|
||||
req.Header.Set("Pragma", "no-cache")
|
||||
req.Header.Set("User-Agent", uarand.GetRandom())
|
||||
|
||||
resp, err := d.client.Do(req)
|
||||
ua := common.GetUserAgent(drv.options.userAgent)
|
||||
|
||||
// use custom user agent
|
||||
if ua != "" {
|
||||
req.Header.Set("User-Agent", uarand.GetRandom())
|
||||
}
|
||||
|
||||
resp, err := drv.client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to retrieve a document %s", u)
|
||||
@@ -92,7 +99,7 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values
|
||||
return NewHTMLDocument(u, doc)
|
||||
}
|
||||
|
||||
func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) {
|
||||
func (drv *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) {
|
||||
buf := bytes.NewBuffer([]byte(str))
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(buf)
|
||||
@@ -104,8 +111,8 @@ func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTM
|
||||
return NewHTMLDocument("#string", doc)
|
||||
}
|
||||
|
||||
func (d *Driver) Close() error {
|
||||
d.client = nil
|
||||
func (drv *Driver) Close() error {
|
||||
drv.client = nil
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@@ -11,6 +11,7 @@ type (
|
||||
maxRetries int
|
||||
concurrency int
|
||||
proxy string
|
||||
userAgent string
|
||||
}
|
||||
)
|
||||
|
||||
@@ -49,3 +50,9 @@ func WithProxy(address string) Option {
|
||||
opts.proxy = address
|
||||
}
|
||||
}
|
||||
|
||||
func WithUserAgent(value string) Option {
|
||||
return func(opts *Options) {
|
||||
opts.userAgent = value
|
||||
}
|
||||
}
|
||||
|
3
pkg/runtime/env/env.go
vendored
3
pkg/runtime/env/env.go
vendored
@@ -8,9 +8,12 @@ type (
|
||||
Environment struct {
|
||||
CDPAddress string
|
||||
ProxyAddress string
|
||||
UserAgent string
|
||||
}
|
||||
)
|
||||
|
||||
const RandomUserAgent = "*"
|
||||
|
||||
func WithContext(ctx context.Context, e Environment) context.Context {
|
||||
return context.WithValue(ctx, ctxKey{}, e)
|
||||
}
|
||||
|
@@ -12,10 +12,11 @@ import (
|
||||
|
||||
type (
|
||||
Options struct {
|
||||
proxy string
|
||||
cdp string
|
||||
params map[string]core.Value
|
||||
logging *logging.Options
|
||||
proxy string
|
||||
cdp string
|
||||
params map[string]core.Value
|
||||
logging *logging.Options
|
||||
userAgent string
|
||||
}
|
||||
|
||||
Option func(*Options)
|
||||
@@ -58,6 +59,18 @@ func WithProxy(address string) Option {
|
||||
}
|
||||
}
|
||||
|
||||
func WithUserAgent(value string) Option {
|
||||
return func(options *Options) {
|
||||
options.userAgent = value
|
||||
}
|
||||
}
|
||||
|
||||
func WithRandomUserAgent() Option {
|
||||
return func(options *Options) {
|
||||
options.userAgent = env.RandomUserAgent
|
||||
}
|
||||
}
|
||||
|
||||
func WithLog(writer io.Writer) Option {
|
||||
return func(options *Options) {
|
||||
options.logging.Writer = writer
|
||||
@@ -76,6 +89,7 @@ func (opts *Options) withContext(parent context.Context) context.Context {
|
||||
ctx = env.WithContext(ctx, env.Environment{
|
||||
CDPAddress: opts.cdp,
|
||||
ProxyAddress: opts.proxy,
|
||||
UserAgent: opts.userAgent,
|
||||
})
|
||||
|
||||
return ctx
|
||||
|
Reference in New Issue
Block a user