mirror of
https://github.com/MontFerret/ferret.git
synced 2024-12-12 11:15:14 +02:00
#593 handling non-200 status codes
This commit is contained in:
parent
79566c3b76
commit
6295919806
11
examples/non-200.fql
Normal file
11
examples/non-200.fql
Normal file
@ -0,0 +1,11 @@
|
||||
LET p = DOCUMENT('https://www.g2.com/categories', {
|
||||
ignore: {
|
||||
statusCodes: [
|
||||
{
|
||||
code: 403
|
||||
}
|
||||
]
|
||||
}
|
||||
})
|
||||
|
||||
RETURN p.response.statusCode
|
@ -78,9 +78,9 @@ func LoadHTMLPage(
|
||||
netOpts.Cookies[params.URL] = params.Cookies
|
||||
}
|
||||
|
||||
if params.Disable != nil {
|
||||
if len(params.Disable.Resources) > 0 {
|
||||
netOpts.Filter.Patterns = params.Disable.Resources
|
||||
if params.Ignore != nil {
|
||||
if len(params.Ignore.Resources) > 0 {
|
||||
netOpts.Filter.Patterns = params.Ignore.Resources
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@ package http
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"github.com/gobwas/glob"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
@ -171,7 +172,13 @@ func (drv *Driver) Open(ctx context.Context, params drivers.Params) (drivers.HTM
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
if !drv.responseCodeAllowed(resp) {
|
||||
var queryFilters []drivers.StatusCodeFilter
|
||||
|
||||
if params.Ignore != nil {
|
||||
queryFilters = params.Ignore.StatusCodes
|
||||
}
|
||||
|
||||
if !drv.responseCodeAllowed(resp, queryFilters) {
|
||||
return nil, errors.New(resp.Status)
|
||||
}
|
||||
|
||||
@ -214,7 +221,43 @@ func (drv *Driver) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (drv *Driver) responseCodeAllowed(resp *http.Response) bool {
|
||||
_, exists := drv.options.AllowedHTTPCodes[resp.StatusCode]
|
||||
return exists
|
||||
func (drv *Driver) responseCodeAllowed(resp *http.Response, additional []drivers.StatusCodeFilter) bool {
|
||||
var allowed bool
|
||||
reqURL := resp.Request.URL.String()
|
||||
|
||||
// OK is by default
|
||||
if resp.StatusCode >= 200 && resp.StatusCode <= 299 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Try to use those that are passed within a query
|
||||
for _, filter := range additional {
|
||||
allowed = filter.Code == resp.StatusCode
|
||||
|
||||
// check url
|
||||
if allowed && filter.URL != "" {
|
||||
allowed = glob.MustCompile(filter.URL).Match(reqURL)
|
||||
}
|
||||
|
||||
if allowed {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// if still not allowed, try the default ones
|
||||
if !allowed {
|
||||
for _, filter := range drv.options.HTTPCodesFilter {
|
||||
allowed = filter.Code == resp.StatusCode
|
||||
|
||||
if allowed && filter.URL != nil {
|
||||
allowed = filter.URL.Match(reqURL)
|
||||
}
|
||||
|
||||
if allowed {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return allowed
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"github.com/gobwas/glob"
|
||||
stdhttp "net/http"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/drivers"
|
||||
@ -10,17 +11,22 @@ import (
|
||||
type (
|
||||
Option func(opts *Options)
|
||||
|
||||
compiledStatusCodeFilter struct {
|
||||
URL glob.Glob
|
||||
Code int
|
||||
}
|
||||
|
||||
Options struct {
|
||||
Name string
|
||||
Backoff pester.BackoffStrategy
|
||||
MaxRetries int
|
||||
Concurrency int
|
||||
Proxy string
|
||||
UserAgent string
|
||||
Headers drivers.HTTPHeaders
|
||||
Cookies drivers.HTTPCookies
|
||||
AllowedHTTPCodes map[int]struct{}
|
||||
HTTPTransport *stdhttp.Transport
|
||||
Name string
|
||||
Backoff pester.BackoffStrategy
|
||||
MaxRetries int
|
||||
Concurrency int
|
||||
Proxy string
|
||||
UserAgent string
|
||||
Headers drivers.HTTPHeaders
|
||||
Cookies drivers.HTTPCookies
|
||||
HTTPCodesFilter []compiledStatusCodeFilter
|
||||
HTTPTransport *stdhttp.Transport
|
||||
}
|
||||
)
|
||||
|
||||
@ -30,7 +36,7 @@ func newOptions(setters []Option) *Options {
|
||||
opts.Backoff = pester.ExponentialBackoff
|
||||
opts.Concurrency = 3
|
||||
opts.MaxRetries = 5
|
||||
opts.AllowedHTTPCodes = map[int]struct{}{stdhttp.StatusOK: struct{}{}}
|
||||
opts.HTTPCodesFilter = make([]compiledStatusCodeFilter, 0, 5)
|
||||
|
||||
for _, setter := range setters {
|
||||
setter(opts)
|
||||
@ -133,14 +139,18 @@ func WithCookies(cookies []drivers.HTTPCookie) Option {
|
||||
|
||||
func WithAllowedHTTPCode(httpCode int) Option {
|
||||
return func(opts *Options) {
|
||||
opts.AllowedHTTPCodes[httpCode] = struct{}{}
|
||||
opts.HTTPCodesFilter = append(opts.HTTPCodesFilter, compiledStatusCodeFilter{
|
||||
Code: httpCode,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func WithAllowedHTTPCodes(httpCodes []int) Option {
|
||||
return func(opts *Options) {
|
||||
for _, code := range httpCodes {
|
||||
opts.AllowedHTTPCodes[code] = struct{}{}
|
||||
opts.HTTPCodesFilter = append(opts.HTTPCodesFilter, compiledStatusCodeFilter{
|
||||
Code: code,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6,8 +6,14 @@ type (
|
||||
Type string
|
||||
}
|
||||
|
||||
Disable struct {
|
||||
Resources []ResourceFilter
|
||||
StatusCodeFilter struct {
|
||||
URL string
|
||||
Code int
|
||||
}
|
||||
|
||||
Ignore struct {
|
||||
Resources []ResourceFilter
|
||||
StatusCodes []StatusCodeFilter
|
||||
}
|
||||
|
||||
Viewport struct {
|
||||
@ -25,7 +31,7 @@ type (
|
||||
Cookies HTTPCookies
|
||||
Headers HTTPHeaders
|
||||
Viewport *Viewport
|
||||
Disable *Disable
|
||||
Ignore *Ignore
|
||||
}
|
||||
|
||||
ParseParams struct {
|
||||
|
@ -243,6 +243,16 @@ func (t *Object) MustGet(key String) core.Value {
|
||||
return val
|
||||
}
|
||||
|
||||
func (t *Object) MustGetOr(key String, defaultValue core.Value) core.Value {
|
||||
val, found := t.value[string(key)]
|
||||
|
||||
if found {
|
||||
return val
|
||||
}
|
||||
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func (t *Object) Get(key String) (core.Value, Boolean) {
|
||||
val, found := t.value[string(key)]
|
||||
|
||||
|
@ -2,6 +2,7 @@ package html
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/pkg/errors"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -27,10 +28,10 @@ type PageLoadParams struct {
|
||||
// @param {Boolean} [params.keepCookies=False] - Boolean value indicating whether to use cookies from previous sessions i.e. not to open a page in the Incognito mode.
|
||||
// @param {HTTPCookies} [params.cookies] - Set of HTTP cookies to use during page loading.
|
||||
// @param {HTTPHeaders} [params.headers] - Set of HTTP headers to use during page loading.
|
||||
// @param {Object} [params.disable] - Set of parameters to disable some page functionality or behavior.
|
||||
// @param {Object[]} [params.disable.resources] - Collection of rules to disable resources during page load and navigation.
|
||||
// @param {String} [params.disable.resources.*.url] - Resource url pattern. If set, requests for matching urls will be blocked. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
|
||||
// @param {String} [params.disable.resources.*.type] - Resource type. If set, requests for matching resource types will be blocked.
|
||||
// @param {Object} [params.ignore] - Set of parameters to ignore some page functionality or behavior.
|
||||
// @param {Object[]} [params.ignore.resources] - Collection of rules to ignore resources during page load and navigation.
|
||||
// @param {String} [params.ignore.resources.*.url] - Resource url pattern. If set, requests for matching urls will be blocked. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
|
||||
// @param {String} [params.ignore.resources.*.type] - Resource type. If set, requests for matching resource types will be blocked.
|
||||
// @param {Object} [params.viewport] - Viewport params.
|
||||
// @param {Int} [params.viewport.height] - Viewport height.
|
||||
// @param {Int} [params.viewport.width] - Viewport width.
|
||||
@ -191,16 +192,16 @@ func newPageLoadParams(url values.String, arg core.Value) (PageLoadParams, error
|
||||
res.Viewport = viewport
|
||||
}
|
||||
|
||||
disable, exists := obj.Get(values.NewString("disable"))
|
||||
ignore, exists := obj.Get(values.NewString("ignore"))
|
||||
|
||||
if exists {
|
||||
disable, err := parseDisable(disable)
|
||||
ignore, err := parseIgnore(ignore)
|
||||
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
res.Disable = disable
|
||||
res.Ignore = ignore
|
||||
}
|
||||
case types.String:
|
||||
res.Driver = arg.(values.String).String()
|
||||
@ -408,16 +409,16 @@ func parseViewport(value core.Value) (*drivers.Viewport, error) {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func parseDisable(value core.Value) (*drivers.Disable, error) {
|
||||
func parseIgnore(value core.Value) (*drivers.Ignore, error) {
|
||||
if err := core.ValidateType(value, types.Object); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := &drivers.Disable{}
|
||||
res := &drivers.Ignore{}
|
||||
|
||||
disable := value.(*values.Object)
|
||||
ignore := value.(*values.Object)
|
||||
|
||||
resources, exists := disable.Get("resources")
|
||||
resources, exists := ignore.Get("resources")
|
||||
|
||||
if exists {
|
||||
if err := core.ValidateType(resources, types.Array); err != nil {
|
||||
@ -458,5 +459,43 @@ func parseDisable(value core.Value) (*drivers.Disable, error) {
|
||||
}
|
||||
}
|
||||
|
||||
statusCodes, exists := ignore.Get("statusCodes")
|
||||
|
||||
if exists {
|
||||
if err := core.ValidateType(statusCodes, types.Array); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
statusCodes := statusCodes.(*values.Array)
|
||||
|
||||
res.StatusCodes = make([]drivers.StatusCodeFilter, 0, statusCodes.Length())
|
||||
|
||||
var e error
|
||||
|
||||
statusCodes.ForEach(func(el core.Value, idx int) bool {
|
||||
if e = core.ValidateType(el, types.Object); e != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
pattern := el.(*values.Object)
|
||||
|
||||
url := pattern.MustGetOr("url", values.NewString(""))
|
||||
code, codeExists := pattern.Get("code")
|
||||
|
||||
// ignore element
|
||||
if !codeExists {
|
||||
e = errors.New("http code is required")
|
||||
return false
|
||||
}
|
||||
|
||||
res.StatusCodes = append(res.StatusCodes, drivers.StatusCodeFilter{
|
||||
URL: url.String(),
|
||||
Code: int(values.ToInt(code)),
|
||||
})
|
||||
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user