mirror of
				https://github.com/MontFerret/ferret.git
				synced 2025-10-30 23:37:40 +02:00 
			
		
		
		
	Bug/#31 ua (#73)
* Made custom and random UA optional * Added pirvate context keys
This commit is contained in:
		| @@ -57,6 +57,7 @@ func Exec(query string, opts Options) { | ||||
| 		runtime.WithLogLevel(logging.DebugLevel), | ||||
| 		runtime.WithParams(opts.Params), | ||||
| 		runtime.WithProxy(opts.Proxy), | ||||
| 		runtime.WithUserAgent(opts.UserAgent), | ||||
| 	) | ||||
|  | ||||
| 	if err != nil { | ||||
|   | ||||
| @@ -1,7 +1,8 @@ | ||||
| package cli | ||||
|  | ||||
| type Options struct { | ||||
| 	Cdp    string | ||||
| 	Params map[string]interface{} | ||||
| 	Proxy  string | ||||
| 	Cdp       string | ||||
| 	Params    map[string]interface{} | ||||
| 	Proxy     string | ||||
| 	UserAgent string | ||||
| } | ||||
|   | ||||
| @@ -99,6 +99,7 @@ func Repl(version string, opts Options) { | ||||
| 			runtime.WithLogLevel(logging.DebugLevel), | ||||
| 			runtime.WithParams(opts.Params), | ||||
| 			runtime.WithProxy(opts.Proxy), | ||||
| 			runtime.WithUserAgent(opts.UserAgent), | ||||
| 		) | ||||
|  | ||||
| 		timer.Stop() | ||||
|   | ||||
							
								
								
									
										13
									
								
								main.go
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								main.go
									
									
									
									
									
								
							| @@ -82,6 +82,12 @@ var ( | ||||
| 		"", | ||||
| 		"address of proxy server to use (only applicable for static pages)", | ||||
| 	) | ||||
|  | ||||
| 	userAgent = flag.String( | ||||
| 		"user-agent", | ||||
| 		"", | ||||
| 		"set custom user agent. '*' triggers UA generation", | ||||
| 	) | ||||
| ) | ||||
|  | ||||
| func main() { | ||||
| @@ -141,9 +147,10 @@ func main() { | ||||
| 	} | ||||
|  | ||||
| 	opts := cli.Options{ | ||||
| 		Cdp:    cdpConn, | ||||
| 		Params: p, | ||||
| 		Proxy:  *proxyAddress, | ||||
| 		Cdp:       cdpConn, | ||||
| 		Params:    p, | ||||
| 		Proxy:     *proxyAddress, | ||||
| 		UserAgent: *userAgent, | ||||
| 	} | ||||
|  | ||||
| 	stat, _ := os.Stdin.Stat() | ||||
|   | ||||
							
								
								
									
										19
									
								
								pkg/html/common/ua.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								pkg/html/common/ua.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| package common | ||||
|  | ||||
| import ( | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/env" | ||||
| 	"github.com/corpix/uarand" | ||||
| ) | ||||
|  | ||||
| func GetUserAgent(val string) string { | ||||
| 	if val == "" { | ||||
| 		return val | ||||
| 	} | ||||
|  | ||||
| 	if val != env.RandomUserAgent { | ||||
| 		return val | ||||
| 	} | ||||
|  | ||||
| 	// TODO: Change the implementation | ||||
| 	return uarand.GetRandom() | ||||
| } | ||||
| @@ -10,11 +10,15 @@ import ( | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| ) | ||||
|  | ||||
| type Name string | ||||
| type ( | ||||
| 	DriverName    string | ||||
| 	dynamicCtxKey struct{} | ||||
| 	staticCtxKey  struct{} | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	Dynamic Name = "dynamic" | ||||
| 	Static  Name = "static" | ||||
| 	Dynamic DriverName = "dynamic" | ||||
| 	Static  DriverName = "static" | ||||
| ) | ||||
|  | ||||
| type Driver interface { | ||||
| @@ -22,12 +26,34 @@ type Driver interface { | ||||
| 	Close() error | ||||
| } | ||||
|  | ||||
| func ToContext(ctx context.Context, name Name, drv Driver) context.Context { | ||||
| 	return context.WithValue(ctx, name, drv) | ||||
| func ToContext(ctx context.Context, name DriverName, drv Driver) context.Context { | ||||
| 	var key interface{} | ||||
|  | ||||
| 	switch name { | ||||
| 	case Dynamic: | ||||
| 		key = dynamicCtxKey{} | ||||
| 	case Static: | ||||
| 		key = staticCtxKey{} | ||||
| 	default: | ||||
| 		return ctx | ||||
| 	} | ||||
|  | ||||
| 	return context.WithValue(ctx, key, drv) | ||||
| } | ||||
|  | ||||
| func FromContext(ctx context.Context, name Name) (Driver, error) { | ||||
| 	val := ctx.Value(name) | ||||
| func FromContext(ctx context.Context, name DriverName) (Driver, error) { | ||||
| 	var key interface{} | ||||
|  | ||||
| 	switch name { | ||||
| 	case Dynamic: | ||||
| 		key = dynamicCtxKey{} | ||||
| 	case Static: | ||||
| 		key = staticCtxKey{} | ||||
| 	default: | ||||
| 		return nil, core.Error(core.ErrInvalidArgument, fmt.Sprintf("%s driver", name)) | ||||
| 	} | ||||
|  | ||||
| 	val := ctx.Value(key) | ||||
|  | ||||
| 	drv, ok := val.(Driver) | ||||
|  | ||||
| @@ -43,10 +69,11 @@ func WithDynamicDriver(ctx context.Context) context.Context { | ||||
|  | ||||
| 	return context.WithValue( | ||||
| 		ctx, | ||||
| 		Dynamic, | ||||
| 		dynamicCtxKey{}, | ||||
| 		dynamic.NewDriver( | ||||
| 			e.CDPAddress, | ||||
| 			dynamic.WithProxy(e.ProxyAddress), | ||||
| 			dynamic.WithUserAgent(e.UserAgent), | ||||
| 		), | ||||
| 	) | ||||
| } | ||||
| @@ -56,9 +83,10 @@ func WithStaticDriver(ctx context.Context) context.Context { | ||||
|  | ||||
| 	return context.WithValue( | ||||
| 		ctx, | ||||
| 		Static, | ||||
| 		staticCtxKey{}, | ||||
| 		static.NewDriver( | ||||
| 			static.WithProxy(e.ProxyAddress), | ||||
| 			static.WithUserAgent(e.UserAgent), | ||||
| 		), | ||||
| 	) | ||||
| } | ||||
|   | ||||
| @@ -2,8 +2,9 @@ package dynamic | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"github.com/MontFerret/ferret/pkg/html/common" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/logging" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| 	"github.com/corpix/uarand" | ||||
| 	"github.com/mafredri/cdp" | ||||
| 	"github.com/mafredri/cdp/devtool" | ||||
| 	"github.com/mafredri/cdp/protocol/emulation" | ||||
| @@ -22,25 +23,33 @@ type Driver struct { | ||||
| 	client    *cdp.Client | ||||
| 	session   *session.Manager | ||||
| 	contextID target.BrowserContextID | ||||
| 	opts      *Options | ||||
| 	options   *Options | ||||
| } | ||||
|  | ||||
| func NewDriver(address string, opts ...Option) *Driver { | ||||
| 	drv := new(Driver) | ||||
| 	drv.dev = devtool.New(address) | ||||
| 	drv.opts = new(Options) | ||||
| 	drv.options = new(Options) | ||||
|  | ||||
| 	for _, opt := range opts { | ||||
| 		opt(drv.opts) | ||||
| 		opt(drv.options) | ||||
| 	} | ||||
|  | ||||
| 	return drv | ||||
| } | ||||
|  | ||||
| func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (values.HTMLNode, error) { | ||||
| 	logger := logging.FromContext(ctx) | ||||
|  | ||||
| 	err := drv.init(ctx) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		logger. | ||||
| 			Error(). | ||||
| 			Err(err). | ||||
| 			Str("driver", "dynamic"). | ||||
| 			Msg("failed to initialize the driver") | ||||
|  | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| @@ -59,6 +68,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va | ||||
| 	createTarget, err := drv.client.Target.CreateTarget(ctx, createTargetArgs) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		logger. | ||||
| 			Error(). | ||||
| 			Err(err). | ||||
| 			Str("driver", "dynamic"). | ||||
| 			Msg("failed to create a browser target") | ||||
|  | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| @@ -66,6 +81,12 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va | ||||
| 	conn, err := drv.session.Dial(ctx, createTarget.TargetID) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		logger. | ||||
| 			Error(). | ||||
| 			Err(err). | ||||
| 			Str("driver", "dynamic"). | ||||
| 			Msg("failed to establish a connection") | ||||
|  | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| @@ -92,9 +113,21 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va | ||||
| 		}, | ||||
|  | ||||
| 		func() error { | ||||
| 			ua := common.GetUserAgent(drv.options.userAgent) | ||||
|  | ||||
| 			logger. | ||||
| 				Debug(). | ||||
| 				Str("user-agent", ua). | ||||
| 				Msg("using User-Agent") | ||||
|  | ||||
| 			// do not use custom user agent | ||||
| 			if ua == "" { | ||||
| 				return nil | ||||
| 			} | ||||
|  | ||||
| 			return client.Emulation.SetUserAgentOverride( | ||||
| 				ctx, | ||||
| 				emulation.NewSetUserAgentOverrideArgs(uarand.GetRandom()), | ||||
| 				emulation.NewSetUserAgentOverrideArgs(ua), | ||||
| 			) | ||||
| 		}, | ||||
| 	) | ||||
|   | ||||
| @@ -2,7 +2,8 @@ package dynamic | ||||
|  | ||||
| type ( | ||||
| 	Options struct { | ||||
| 		proxy string | ||||
| 		proxy     string | ||||
| 		userAgent string | ||||
| 	} | ||||
|  | ||||
| 	Option func(opts *Options) | ||||
| @@ -13,3 +14,9 @@ func WithProxy(address string) Option { | ||||
| 		opts.proxy = address | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithUserAgent(value string) Option { | ||||
| 	return func(opts *Options) { | ||||
| 		opts.userAgent = value | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -3,6 +3,7 @@ package static | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"context" | ||||
| 	"github.com/MontFerret/ferret/pkg/html/common" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| 	"github.com/PuerkitoBio/goquery" | ||||
| 	"github.com/corpix/uarand" | ||||
| @@ -61,7 +62,7 @@ func newClientWithProxy(options *Options) (*http.Client, error) { | ||||
| 	return &http.Client{Transport: tr}, nil | ||||
| } | ||||
|  | ||||
| func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) { | ||||
| func (drv *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) { | ||||
| 	u := targetURL.String() | ||||
| 	req, err := http.NewRequest(http.MethodGet, u, nil) | ||||
|  | ||||
| @@ -73,9 +74,15 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values | ||||
| 	req.Header.Set("Accept-Language", "en-US,en;q=0.9,ru;q=0.8") | ||||
| 	req.Header.Set("Cache-Control", "no-cache") | ||||
| 	req.Header.Set("Pragma", "no-cache") | ||||
| 	req.Header.Set("User-Agent", uarand.GetRandom()) | ||||
|  | ||||
| 	resp, err := d.client.Do(req) | ||||
| 	ua := common.GetUserAgent(drv.options.userAgent) | ||||
|  | ||||
| 	// use custom user agent | ||||
| 	if ua != "" { | ||||
| 		req.Header.Set("User-Agent", uarand.GetRandom()) | ||||
| 	} | ||||
|  | ||||
| 	resp, err := drv.client.Do(req) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return nil, errors.Wrapf(err, "failed to retrieve a document %s", u) | ||||
| @@ -92,7 +99,7 @@ func (d *Driver) GetDocument(_ context.Context, targetURL values.String) (values | ||||
| 	return NewHTMLDocument(u, doc) | ||||
| } | ||||
|  | ||||
| func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) { | ||||
| func (drv *Driver) ParseDocument(_ context.Context, str values.String) (values.HTMLNode, error) { | ||||
| 	buf := bytes.NewBuffer([]byte(str)) | ||||
|  | ||||
| 	doc, err := goquery.NewDocumentFromReader(buf) | ||||
| @@ -104,8 +111,8 @@ func (d *Driver) ParseDocument(_ context.Context, str values.String) (values.HTM | ||||
| 	return NewHTMLDocument("#string", doc) | ||||
| } | ||||
|  | ||||
| func (d *Driver) Close() error { | ||||
| 	d.client = nil | ||||
| func (drv *Driver) Close() error { | ||||
| 	drv.client = nil | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
| @@ -11,6 +11,7 @@ type ( | ||||
| 		maxRetries  int | ||||
| 		concurrency int | ||||
| 		proxy       string | ||||
| 		userAgent   string | ||||
| 	} | ||||
| ) | ||||
|  | ||||
| @@ -49,3 +50,9 @@ func WithProxy(address string) Option { | ||||
| 		opts.proxy = address | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithUserAgent(value string) Option { | ||||
| 	return func(opts *Options) { | ||||
| 		opts.userAgent = value | ||||
| 	} | ||||
| } | ||||
|   | ||||
							
								
								
									
										3
									
								
								pkg/runtime/env/env.go
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								pkg/runtime/env/env.go
									
									
									
									
										vendored
									
									
								
							| @@ -8,9 +8,12 @@ type ( | ||||
| 	Environment struct { | ||||
| 		CDPAddress   string | ||||
| 		ProxyAddress string | ||||
| 		UserAgent    string | ||||
| 	} | ||||
| ) | ||||
|  | ||||
| const RandomUserAgent = "*" | ||||
|  | ||||
| func WithContext(ctx context.Context, e Environment) context.Context { | ||||
| 	return context.WithValue(ctx, ctxKey{}, e) | ||||
| } | ||||
|   | ||||
| @@ -12,10 +12,11 @@ import ( | ||||
|  | ||||
| type ( | ||||
| 	Options struct { | ||||
| 		proxy   string | ||||
| 		cdp     string | ||||
| 		params  map[string]core.Value | ||||
| 		logging *logging.Options | ||||
| 		proxy     string | ||||
| 		cdp       string | ||||
| 		params    map[string]core.Value | ||||
| 		logging   *logging.Options | ||||
| 		userAgent string | ||||
| 	} | ||||
|  | ||||
| 	Option func(*Options) | ||||
| @@ -58,6 +59,18 @@ func WithProxy(address string) Option { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithUserAgent(value string) Option { | ||||
| 	return func(options *Options) { | ||||
| 		options.userAgent = value | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithRandomUserAgent() Option { | ||||
| 	return func(options *Options) { | ||||
| 		options.userAgent = env.RandomUserAgent | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func WithLog(writer io.Writer) Option { | ||||
| 	return func(options *Options) { | ||||
| 		options.logging.Writer = writer | ||||
| @@ -76,6 +89,7 @@ func (opts *Options) withContext(parent context.Context) context.Context { | ||||
| 	ctx = env.WithContext(ctx, env.Environment{ | ||||
| 		CDPAddress:   opts.cdp, | ||||
| 		ProxyAddress: opts.proxy, | ||||
| 		UserAgent:    opts.userAgent, | ||||
| 	}) | ||||
|  | ||||
| 	return ctx | ||||
|   | ||||
		Reference in New Issue
	
	Block a user