package html
import (
"context"
"github.com/pkg/errors"
"strings"
"time"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/drivers/cdp"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/runtime/values/types"
)
type PageLoadParams struct {
drivers.Params
Driver string
Timeout time.Duration
}
// DOCUMENT opens an HTML page by a given url.
// By default, loads a page by http call - resulted page does not support any interactions.
// @param {Object} [params] - An object containing the following properties :
// @param {String} [params.driver] - Driver name to use.
// @param {Int} [params.timeout=60000] - Page load timeout.
// @param {String} [params.userAgent] - Custom user agent.
// @param {Boolean} [params.keepCookies=False] - Boolean value indicating whether to use cookies from previous sessions i.e. not to open a page in the Incognito mode.
// @param {Object[] | Object} [params.cookies] - Set of HTTP cookies to use during page loading.
// @param {String} params.cookies.*.name - Cookie name.
// @param {String} params.cookies.*.value - Cookie value.
// @param {String} params.cookies.*.path - Cookie path.
// @param {String} params.cookies.*.domain - Cookie domain.
// @param {Int} [params.cookies.*.maxAge] - Cookie max age.
// @param {String|DateTime} [params.cookies.*.expires] - Cookie expiration date time.
// @param {String} [params.cookies.*.sameSite] - Cookie cross-origin policy.
// @param {Boolean} [params.cookies.*.httpOnly=false] - Cookie cannot be accessed through client side script.
// @param {Boolean} [params.cookies.*.secure=false] - Cookie sent to the server only with an encrypted request over the HTTPS protocol.
// @param {Object} [params.headers] - Set of HTTP headers to use during page loading.
// @param {Object} [params.ignore] - Set of parameters to ignore some page functionality or behavior.
// @param {Object[]} [params.ignore.resources] - Collection of rules to ignore resources during page load and navigation.
// @param {String} [params.ignore.resources.*.url] - Resource url pattern. If set, requests for matching urls will be blocked. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
// @param {String} [params.ignore.resources.*.type] - Resource type. If set, requests for matching resource types will be blocked.
// @param {Object[]} [params.ignore.statusCodes] - Collection of rules to ignore certain HTTP codes that can cause failures.
// @param {String} [params.ignore.statusCodes.*.url] - Url pattern. If set, codes for matching urls will be ignored. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
// @param {Int} [params.ignore.statusCodes.*.code] - HTTP code to ignore.
// @param {Object} [params.viewport] - Viewport params.
// @param {Int} [params.viewport.height] - Viewport height.
// @param {Int} [params.viewport.width] - Viewport width.
// @param {Float} [params.viewport.scaleFactor] - Viewport scale factor.
// @param {Boolean} [params.viewport.mobile] - Value that indicates whether to emulate mobile device.
// @param {Boolean} [params.viewport.landscape] - Value that indicates whether to render a page in landscape position.
// @return {HTMLPage} - Loaded HTML page.
func Open(ctx context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 1, 2)
if err != nil {
return values.None, err
}
err = core.ValidateType(args[0], types.String)
if err != nil {
return values.None, err
}
url := args[0].(values.String)
var params PageLoadParams
if len(args) == 1 {
params = newDefaultDocLoadParams(url)
} else {
p, err := newPageLoadParams(url, args[1])
if err != nil {
return values.None, err
}
params = p
}
ctx, cancel := context.WithTimeout(ctx, params.Timeout)
defer cancel()
drv, err := drivers.FromContext(ctx, params.Driver)
if err != nil {
return values.None, err
}
return drv.Open(ctx, params.Params)
}
func newDefaultDocLoadParams(url values.String) PageLoadParams {
return PageLoadParams{
Params: drivers.Params{
URL: url.String(),
},
Timeout: drivers.DefaultPageLoadTimeout * time.Millisecond,
}
}
func newPageLoadParams(url values.String, arg core.Value) (PageLoadParams, error) {
res := newDefaultDocLoadParams(url)
if err := core.ValidateType(arg, types.Boolean, types.String, types.Object); err != nil {
return res, err
}
switch arg.Type() {
case types.Object:
obj := arg.(*values.Object)
driver, exists := obj.Get(values.NewString("driver"))
if exists {
if err := core.ValidateType(driver, types.String); err != nil {
return res, err
}
res.Driver = driver.(values.String).String()
}
timeout, exists := obj.Get(values.NewString("timeout"))
if exists {
if err := core.ValidateType(timeout, types.Int); err != nil {
return res, err
}
res.Timeout = time.Duration(timeout.(values.Int)) * time.Millisecond
}
userAgent, exists := obj.Get(values.NewString("userAgent"))
if exists {
if err := core.ValidateType(userAgent, types.String); err != nil {
return res, err
}
res.UserAgent = userAgent.String()
}
keepCookies, exists := obj.Get(values.NewString("keepCookies"))
if exists {
if err := core.ValidateType(keepCookies, types.Boolean); err != nil {
return res, err
}
res.KeepCookies = bool(keepCookies.(values.Boolean))
}
cookies, exists := obj.Get(values.NewString("cookies"))
if exists {
if err := core.ValidateType(cookies, types.Array, types.Object); err != nil {
return res, err
}
switch c := cookies.(type) {
case *values.Array:
cookies, err := parseCookieArray(c)
if err != nil {
return res, err
}
res.Cookies = cookies
case *values.Object:
cookies, err := parseCookieObject(c)
if err != nil {
return res, err
}
res.Cookies = cookies
default:
res.Cookies = drivers.NewHTTPCookies()
}
}
headers, exists := obj.Get(values.NewString("headers"))
if exists {
if err := core.ValidateType(headers, types.Object); err != nil {
return res, err
}
header := parseHeader(headers.(*values.Object))
res.Headers = header
}
viewport, exists := obj.Get(values.NewString("viewport"))
if exists {
viewport, err := parseViewport(viewport)
if err != nil {
return res, err
}
res.Viewport = viewport
}
ignore, exists := obj.Get(values.NewString("ignore"))
if exists {
ignore, err := parseIgnore(ignore)
if err != nil {
return res, err
}
res.Ignore = ignore
}
case types.String:
res.Driver = arg.(values.String).String()
case types.Boolean:
b := arg.(values.Boolean)
// fallback
if b {
res.Driver = cdp.DriverName
}
}
return res, nil
}
func parseCookieObject(obj *values.Object) (*drivers.HTTPCookies, error) {
if obj == nil {
return nil, errors.Wrap(core.ErrMissedArgument, "cookies")
}
var err error
res := drivers.NewHTTPCookies()
obj.ForEach(func(value core.Value, _ string) bool {
cookie, e := parseCookie(value)
if e != nil {
err = e
return false
}
res.Set(cookie)
return true
})
return res, err
}
func parseCookieArray(arr *values.Array) (*drivers.HTTPCookies, error) {
if arr == nil {
return nil, errors.Wrap(core.ErrMissedArgument, "cookies")
}
var err error
res := drivers.NewHTTPCookies()
arr.ForEach(func(value core.Value, _ int) bool {
cookie, e := parseCookie(value)
if e != nil {
err = e
return false
}
res.Set(cookie)
return true
})
return res, err
}
func parseCookie(value core.Value) (drivers.HTTPCookie, error) {
err := core.ValidateType(value, types.Object, drivers.HTTPCookieType)
if err != nil {
return drivers.HTTPCookie{}, err
}
if value.Type() == drivers.HTTPCookieType {
return value.(drivers.HTTPCookie), nil
}
co := value.(*values.Object)
cookie := drivers.HTTPCookie{
Name: co.MustGet("name").String(),
Value: co.MustGet("value").String(),
Path: co.MustGet("path").String(),
Domain: co.MustGet("domain").String(),
}
maxAge, exists := co.Get("maxAge")
if exists {
if err = core.ValidateType(maxAge, types.Int); err != nil {
return drivers.HTTPCookie{}, err
}
cookie.MaxAge = int(maxAge.(values.Int))
}
expires, exists := co.Get("expires")
if exists {
if err = core.ValidateType(expires, types.DateTime, types.String); err != nil {
return drivers.HTTPCookie{}, err
}
if expires.Type() == types.DateTime {
cookie.Expires = expires.(values.DateTime).Unwrap().(time.Time)
} else {
t, err := time.Parse(values.DefaultTimeLayout, expires.String())
if err != nil {
return drivers.HTTPCookie{}, err
}
cookie.Expires = t
}
}
sameSite, exists := co.Get("sameSite")
if exists {
sameSite := strings.ToLower(sameSite.String())
switch sameSite {
case "lax":
cookie.SameSite = drivers.SameSiteLaxMode
case "strict":
cookie.SameSite = drivers.SameSiteStrictMode
default:
cookie.SameSite = drivers.SameSiteDefaultMode
}
}
httpOnly, exists := co.Get("httpOnly")
if exists {
if err = core.ValidateType(httpOnly, types.Boolean); err != nil {
return drivers.HTTPCookie{}, err
}
cookie.HTTPOnly = bool(httpOnly.(values.Boolean))
}
secure, exists := co.Get("secure")
if exists {
if err = core.ValidateType(secure, types.Boolean); err != nil {
return drivers.HTTPCookie{}, err
}
cookie.Secure = bool(secure.(values.Boolean))
}
return cookie, err
}
func parseHeader(headers *values.Object) *drivers.HTTPHeaders {
res := drivers.NewHTTPHeaders()
headers.ForEach(func(value core.Value, key string) bool {
if value.Type() == types.Array {
value := value.(*values.Array)
keyValues := make([]string, 0, value.Length())
value.ForEach(func(v core.Value, idx int) bool {
keyValues = append(keyValues, v.String())
return true
})
res.SetArr(key, keyValues)
} else {
res.Set(key, value.String())
}
return true
})
return res
}
func parseViewport(value core.Value) (*drivers.Viewport, error) {
if err := core.ValidateType(value, types.Object); err != nil {
return nil, err
}
res := &drivers.Viewport{}
viewport := value.(*values.Object)
width, exists := viewport.Get(values.NewString("width"))
if exists {
if err := core.ValidateType(width, types.Int); err != nil {
return nil, err
}
res.Width = int(values.ToInt(width))
}
height, exists := viewport.Get(values.NewString("height"))
if exists {
if err := core.ValidateType(height, types.Int); err != nil {
return nil, err
}
res.Height = int(values.ToInt(height))
}
mobile, exists := viewport.Get(values.NewString("mobile"))
if exists {
res.Mobile = bool(values.ToBoolean(mobile))
}
landscape, exists := viewport.Get(values.NewString("landscape"))
if exists {
res.Landscape = bool(values.ToBoolean(landscape))
}
scaleFactor, exists := viewport.Get(values.NewString("scaleFactor"))
if exists {
res.ScaleFactor = float64(values.ToFloat(scaleFactor))
}
return res, nil
}
func parseIgnore(value core.Value) (*drivers.Ignore, error) {
if err := core.ValidateType(value, types.Object); err != nil {
return nil, err
}
res := &drivers.Ignore{}
ignore := value.(*values.Object)
resources, exists := ignore.Get("resources")
if exists {
if err := core.ValidateType(resources, types.Array); err != nil {
return nil, err
}
resources := resources.(*values.Array)
res.Resources = make([]drivers.ResourceFilter, 0, resources.Length())
var e error
resources.ForEach(func(el core.Value, idx int) bool {
if e = core.ValidateType(el, types.Object); e != nil {
return false
}
pattern := el.(*values.Object)
url, urlExists := pattern.Get("url")
resType, resTypeExists := pattern.Get("type")
// ignore element
if !urlExists && !resTypeExists {
return true
}
res.Resources = append(res.Resources, drivers.ResourceFilter{
URL: url.String(),
Type: resType.String(),
})
return true
})
if e != nil {
return nil, e
}
}
statusCodes, exists := ignore.Get("statusCodes")
if exists {
if err := core.ValidateType(statusCodes, types.Array); err != nil {
return nil, err
}
statusCodes := statusCodes.(*values.Array)
res.StatusCodes = make([]drivers.StatusCodeFilter, 0, statusCodes.Length())
var e error
statusCodes.ForEach(func(el core.Value, idx int) bool {
if e = core.ValidateType(el, types.Object); e != nil {
return false
}
pattern := el.(*values.Object)
url := pattern.MustGetOr("url", values.NewString(""))
code, codeExists := pattern.Get("code")
// ignore element
if !codeExists {
e = errors.New("http code is required")
return false
}
res.StatusCodes = append(res.StatusCodes, drivers.StatusCodeFilter{
URL: url.String(),
Code: int(values.ToInt(code)),
})
return true
})
}
return res, nil
}