2018-09-18 16:42:38 -04:00
package html
import (
"context"
2021-03-05 08:08:28 -05:00
"github.com/pkg/errors"
2019-03-15 19:59:05 -04:00
"strings"
2019-02-13 12:31:18 -05:00
"time"
2018-12-21 23:14:41 -05:00
"github.com/MontFerret/ferret/pkg/drivers"
2019-02-19 18:10:18 -05:00
"github.com/MontFerret/ferret/pkg/drivers/cdp"
2018-09-18 16:42:38 -04:00
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
2019-02-13 12:31:18 -05:00
"github.com/MontFerret/ferret/pkg/runtime/values/types"
2018-09-18 16:42:38 -04:00
)
2019-06-19 17:58:56 -04:00
type PageLoadParams struct {
2019-07-17 13:29:16 -04:00
drivers . Params
2019-02-19 18:10:18 -05:00
Driver string
2018-11-21 23:11:01 -05:00
Timeout time . Duration
2018-11-21 20:38:27 -05:00
}
2019-09-07 14:03:17 -04:00
// DOCUMENT opens an HTML page by a given url.
2019-07-17 13:29:16 -04:00
// By default, loads a page by http call - resulted page does not support any interactions.
2020-08-07 21:49:29 -04:00
// @param {Object} [params] - An object containing the following properties :
// @param {String} [params.driver] - Driver name to use.
// @param {Int} [params.timeout=60000] - Page load timeout.
// @param {String} [params.userAgent] - Custom user agent.
// @param {Boolean} [params.keepCookies=False] - Boolean value indicating whether to use cookies from previous sessions i.e. not to open a page in the Incognito mode.
2021-03-27 12:47:13 -04:00
// @param {Object[] | Object} [params.cookies] - Set of HTTP cookies to use during page loading.
// @param {String} params.cookies.*.name - Cookie name.
// @param {String} params.cookies.*.value - Cookie value.
// @param {String} params.cookies.*.path - Cookie path.
// @param {String} params.cookies.*.domain - Cookie domain.
// @param {Int} [params.cookies.*.maxAge] - Cookie max age.
// @param {String|DateTime} [params.cookies.*.expires] - Cookie expiration date time.
// @param {String} [params.cookies.*.sameSite] - Cookie cross-origin policy.
// @param {Boolean} [params.cookies.*.httpOnly=false] - Cookie cannot be accessed through client side script.
// @param {Boolean} [params.cookies.*.secure=false] - Cookie sent to the server only with an encrypted request over the HTTPS protocol.
// @param {Object} [params.headers] - Set of HTTP headers to use during page loading.
2021-03-05 08:08:28 -05:00
// @param {Object} [params.ignore] - Set of parameters to ignore some page functionality or behavior.
// @param {Object[]} [params.ignore.resources] - Collection of rules to ignore resources during page load and navigation.
// @param {String} [params.ignore.resources.*.url] - Resource url pattern. If set, requests for matching urls will be blocked. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
// @param {String} [params.ignore.resources.*.type] - Resource type. If set, requests for matching resource types will be blocked.
2021-03-06 01:25:37 -05:00
// @param {Object[]} [params.ignore.statusCodes] - Collection of rules to ignore certain HTTP codes that can cause failures.
// @param {String} [params.ignore.statusCodes.*.url] - Url pattern. If set, codes for matching urls will be ignored. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
// @param {Int} [params.ignore.statusCodes.*.code] - HTTP code to ignore.
2020-08-07 21:49:29 -04:00
// @param {Object} [params.viewport] - Viewport params.
// @param {Int} [params.viewport.height] - Viewport height.
// @param {Int} [params.viewport.width] - Viewport width.
// @param {Float} [params.viewport.scaleFactor] - Viewport scale factor.
// @param {Boolean} [params.viewport.mobile] - Value that indicates whether to emulate mobile device.
// @param {Boolean} [params.viewport.landscape] - Value that indicates whether to render a page in landscape position.
// @return {HTMLPage} - Loaded HTML page.
2019-06-19 17:58:56 -04:00
func Open ( ctx context . Context , args ... core . Value ) ( core . Value , error ) {
2018-12-21 23:14:41 -05:00
err := core . ValidateArgs ( args , 1 , 2 )
2018-09-18 16:42:38 -04:00
if err != nil {
return values . None , err
}
2019-02-13 12:31:18 -05:00
err = core . ValidateType ( args [ 0 ] , types . String )
2018-09-18 16:42:38 -04:00
2018-11-21 20:38:27 -05:00
if err != nil {
return values . None , err
}
2018-10-06 22:33:39 -04:00
url := args [ 0 ] . ( values . String )
2018-09-18 16:42:38 -04:00
2019-06-19 17:58:56 -04:00
var params PageLoadParams
2018-09-18 16:42:38 -04:00
2018-12-21 23:14:41 -05:00
if len ( args ) == 1 {
2019-03-15 19:59:05 -04:00
params = newDefaultDocLoadParams ( url )
2018-12-21 23:14:41 -05:00
} else {
2019-06-19 17:58:56 -04:00
p , err := newPageLoadParams ( url , args [ 1 ] )
2018-12-21 23:14:41 -05:00
if err != nil {
return values . None , err
}
2018-09-18 16:42:38 -04:00
2018-12-21 23:14:41 -05:00
params = p
}
2018-09-18 16:42:38 -04:00
2018-11-21 23:11:01 -05:00
ctx , cancel := context . WithTimeout ( ctx , params . Timeout )
2018-11-21 20:38:27 -05:00
defer cancel ( )
2019-02-19 18:10:18 -05:00
drv , err := drivers . FromContext ( ctx , params . Driver )
2018-12-21 23:14:41 -05:00
2018-09-23 04:33:20 -04:00
if err != nil {
return values . None , err
2018-09-18 16:42:38 -04:00
}
2019-07-17 13:29:16 -04:00
return drv . Open ( ctx , params . Params )
2018-09-18 16:42:38 -04:00
}
2018-11-21 20:38:27 -05:00
2019-06-19 17:58:56 -04:00
func newDefaultDocLoadParams ( url values . String ) PageLoadParams {
return PageLoadParams {
2019-07-17 13:29:16 -04:00
Params : drivers . Params {
2019-03-15 19:59:05 -04:00
URL : url . String ( ) ,
} ,
2019-09-05 12:17:22 -04:00
Timeout : drivers . DefaultPageLoadTimeout * time . Millisecond ,
2018-11-21 20:38:27 -05:00
}
2018-12-21 23:14:41 -05:00
}
2018-11-21 20:38:27 -05:00
2019-06-19 17:58:56 -04:00
func newPageLoadParams ( url values . String , arg core . Value ) ( PageLoadParams , error ) {
2019-03-15 19:59:05 -04:00
res := newDefaultDocLoadParams ( url )
2018-11-21 20:38:27 -05:00
2019-02-19 18:10:18 -05:00
if err := core . ValidateType ( arg , types . Boolean , types . String , types . Object ) ; err != nil {
2018-12-21 23:14:41 -05:00
return res , err
}
2018-11-21 20:38:27 -05:00
2019-02-19 18:10:18 -05:00
switch arg . Type ( ) {
case types . Object :
obj := arg . ( * values . Object )
2018-11-21 20:38:27 -05:00
2019-02-19 18:10:18 -05:00
driver , exists := obj . Get ( values . NewString ( "driver" ) )
if exists {
if err := core . ValidateType ( driver , types . String ) ; err != nil {
return res , err
}
2018-11-21 20:38:27 -05:00
2019-02-19 18:10:18 -05:00
res . Driver = driver . ( values . String ) . String ( )
}
timeout , exists := obj . Get ( values . NewString ( "timeout" ) )
2018-12-21 23:14:41 -05:00
2019-02-19 18:10:18 -05:00
if exists {
if err := core . ValidateType ( timeout , types . Int ) ; err != nil {
return res , err
}
2018-12-21 23:14:41 -05:00
2019-06-25 15:33:42 -04:00
res . Timeout = time . Duration ( timeout . ( values . Int ) ) * time . Millisecond
2018-11-21 20:38:27 -05:00
}
2019-03-15 19:59:05 -04:00
userAgent , exists := obj . Get ( values . NewString ( "userAgent" ) )
if exists {
if err := core . ValidateType ( userAgent , types . String ) ; err != nil {
return res , err
}
res . UserAgent = userAgent . String ( )
}
keepCookies , exists := obj . Get ( values . NewString ( "keepCookies" ) )
if exists {
if err := core . ValidateType ( keepCookies , types . Boolean ) ; err != nil {
return res , err
}
res . KeepCookies = bool ( keepCookies . ( values . Boolean ) )
}
cookies , exists := obj . Get ( values . NewString ( "cookies" ) )
if exists {
2019-09-05 11:49:21 -04:00
if err := core . ValidateType ( cookies , types . Array , types . Object ) ; err != nil {
2019-03-15 19:59:05 -04:00
return res , err
}
2019-09-05 11:49:21 -04:00
switch c := cookies . ( type ) {
case * values . Array :
cookies , err := parseCookieArray ( c )
2019-03-15 19:59:05 -04:00
2019-09-05 11:49:21 -04:00
if err != nil {
return res , err
}
2019-03-15 19:59:05 -04:00
2019-09-05 11:49:21 -04:00
res . Cookies = cookies
case * values . Object :
cookies , err := parseCookieObject ( c )
if err != nil {
return res , err
}
res . Cookies = cookies
default :
2021-03-26 12:01:00 -04:00
res . Cookies = drivers . NewHTTPCookies ( )
2019-09-05 11:49:21 -04:00
}
2019-03-15 19:59:05 -04:00
}
2019-08-04 17:25:47 -04:00
headers , exists := obj . Get ( values . NewString ( "headers" ) )
2019-03-15 19:59:05 -04:00
if exists {
2019-08-04 17:25:47 -04:00
if err := core . ValidateType ( headers , types . Object ) ; err != nil {
2019-03-15 19:59:05 -04:00
return res , err
}
2019-08-04 17:25:47 -04:00
header := parseHeader ( headers . ( * values . Object ) )
res . Headers = header
2019-03-15 19:59:05 -04:00
}
2019-07-17 13:29:16 -04:00
viewport , exists := obj . Get ( values . NewString ( "viewport" ) )
if exists {
viewport , err := parseViewport ( viewport )
if err != nil {
return res , err
}
res . Viewport = viewport
}
2021-02-19 11:40:30 -05:00
2021-03-05 08:08:28 -05:00
ignore , exists := obj . Get ( values . NewString ( "ignore" ) )
2021-02-19 11:40:30 -05:00
if exists {
2021-03-05 08:08:28 -05:00
ignore , err := parseIgnore ( ignore )
2021-02-19 11:40:30 -05:00
if err != nil {
return res , err
}
2021-03-05 08:08:28 -05:00
res . Ignore = ignore
2021-02-19 11:40:30 -05:00
}
2019-02-19 18:10:18 -05:00
case types . String :
res . Driver = arg . ( values . String ) . String ( )
case types . Boolean :
b := arg . ( values . Boolean )
2018-11-21 20:38:27 -05:00
2019-02-19 18:10:18 -05:00
// fallback
if b {
res . Driver = cdp . DriverName
2018-11-21 20:38:27 -05:00
}
}
return res , nil
}
2019-03-15 19:59:05 -04:00
2021-03-26 12:01:00 -04:00
func parseCookieObject ( obj * values . Object ) ( * drivers . HTTPCookies , error ) {
if obj == nil {
return nil , errors . Wrap ( core . ErrMissedArgument , "cookies" )
}
2019-09-05 11:49:21 -04:00
var err error
2021-03-26 12:01:00 -04:00
res := drivers . NewHTTPCookies ( )
2019-09-05 11:49:21 -04:00
obj . ForEach ( func ( value core . Value , _ string ) bool {
cookie , e := parseCookie ( value )
if e != nil {
err = e
return false
}
2021-03-26 12:01:00 -04:00
res . Set ( cookie )
2019-09-05 11:49:21 -04:00
return true
} )
return res , err
}
2021-03-26 12:01:00 -04:00
func parseCookieArray ( arr * values . Array ) ( * drivers . HTTPCookies , error ) {
if arr == nil {
return nil , errors . Wrap ( core . ErrMissedArgument , "cookies" )
}
2019-03-15 19:59:05 -04:00
var err error
2021-03-26 12:01:00 -04:00
res := drivers . NewHTTPCookies ( )
2019-03-15 19:59:05 -04:00
2019-09-05 11:49:21 -04:00
arr . ForEach ( func ( value core . Value , _ int ) bool {
2019-03-15 19:59:05 -04:00
cookie , e := parseCookie ( value )
if e != nil {
err = e
return false
}
2021-03-26 12:01:00 -04:00
res . Set ( cookie )
2019-03-15 19:59:05 -04:00
return true
} )
return res , err
}
func parseCookie ( value core . Value ) ( drivers . HTTPCookie , error ) {
2019-05-04 00:10:34 +03:00
err := core . ValidateType ( value , types . Object , drivers . HTTPCookieType )
2019-03-29 17:48:51 +03:00
if err != nil {
2019-03-15 19:59:05 -04:00
return drivers . HTTPCookie { } , err
}
if value . Type ( ) == drivers . HTTPCookieType {
return value . ( drivers . HTTPCookie ) , nil
}
co := value . ( * values . Object )
cookie := drivers . HTTPCookie {
Name : co . MustGet ( "name" ) . String ( ) ,
Value : co . MustGet ( "value" ) . String ( ) ,
Path : co . MustGet ( "path" ) . String ( ) ,
Domain : co . MustGet ( "domain" ) . String ( ) ,
}
maxAge , exists := co . Get ( "maxAge" )
if exists {
if err = core . ValidateType ( maxAge , types . Int ) ; err != nil {
return drivers . HTTPCookie { } , err
}
cookie . MaxAge = int ( maxAge . ( values . Int ) )
}
expires , exists := co . Get ( "expires" )
if exists {
2021-03-27 12:47:13 -04:00
if err = core . ValidateType ( expires , types . DateTime , types . String ) ; err != nil {
2019-03-15 19:59:05 -04:00
return drivers . HTTPCookie { } , err
}
if expires . Type ( ) == types . DateTime {
cookie . Expires = expires . ( values . DateTime ) . Unwrap ( ) . ( time . Time )
} else {
2021-03-27 12:47:13 -04:00
t , err := time . Parse ( values . DefaultTimeLayout , expires . String ( ) )
2019-03-15 19:59:05 -04:00
if err != nil {
return drivers . HTTPCookie { } , err
}
cookie . Expires = t
}
}
sameSite , exists := co . Get ( "sameSite" )
if exists {
sameSite := strings . ToLower ( sameSite . String ( ) )
switch sameSite {
case "lax" :
cookie . SameSite = drivers . SameSiteLaxMode
case "strict" :
cookie . SameSite = drivers . SameSiteStrictMode
default :
cookie . SameSite = drivers . SameSiteDefaultMode
}
}
httpOnly , exists := co . Get ( "httpOnly" )
if exists {
if err = core . ValidateType ( httpOnly , types . Boolean ) ; err != nil {
return drivers . HTTPCookie { } , err
}
cookie . HTTPOnly = bool ( httpOnly . ( values . Boolean ) )
}
secure , exists := co . Get ( "secure" )
if exists {
if err = core . ValidateType ( secure , types . Boolean ) ; err != nil {
return drivers . HTTPCookie { } , err
}
cookie . Secure = bool ( secure . ( values . Boolean ) )
}
return cookie , err
}
2021-03-26 12:01:00 -04:00
func parseHeader ( headers * values . Object ) * drivers . HTTPHeaders {
res := drivers . NewHTTPHeaders ( )
2019-03-15 19:59:05 -04:00
2019-08-04 17:25:47 -04:00
headers . ForEach ( func ( value core . Value , key string ) bool {
2021-03-26 12:01:00 -04:00
if value . Type ( ) == types . Array {
value := value . ( * values . Array )
keyValues := make ( [ ] string , 0 , value . Length ( ) )
value . ForEach ( func ( v core . Value , idx int ) bool {
keyValues = append ( keyValues , v . String ( ) )
return true
} )
res . SetArr ( key , keyValues )
} else {
res . Set ( key , value . String ( ) )
}
2019-03-15 19:59:05 -04:00
return true
} )
2019-03-29 17:48:51 +03:00
return res
2019-03-15 19:59:05 -04:00
}
2019-07-17 13:29:16 -04:00
func parseViewport ( value core . Value ) ( * drivers . Viewport , error ) {
if err := core . ValidateType ( value , types . Object ) ; err != nil {
return nil , err
}
res := & drivers . Viewport { }
viewport := value . ( * values . Object )
width , exists := viewport . Get ( values . NewString ( "width" ) )
if exists {
if err := core . ValidateType ( width , types . Int ) ; err != nil {
return nil , err
}
res . Width = int ( values . ToInt ( width ) )
}
height , exists := viewport . Get ( values . NewString ( "height" ) )
if exists {
if err := core . ValidateType ( height , types . Int ) ; err != nil {
return nil , err
}
res . Height = int ( values . ToInt ( height ) )
}
mobile , exists := viewport . Get ( values . NewString ( "mobile" ) )
if exists {
res . Mobile = bool ( values . ToBoolean ( mobile ) )
}
landscape , exists := viewport . Get ( values . NewString ( "landscape" ) )
if exists {
res . Landscape = bool ( values . ToBoolean ( landscape ) )
}
scaleFactor , exists := viewport . Get ( values . NewString ( "scaleFactor" ) )
if exists {
res . ScaleFactor = float64 ( values . ToFloat ( scaleFactor ) )
}
return res , nil
}
2021-02-19 11:40:30 -05:00
2021-03-05 08:08:28 -05:00
func parseIgnore ( value core . Value ) ( * drivers . Ignore , error ) {
2021-02-19 11:40:30 -05:00
if err := core . ValidateType ( value , types . Object ) ; err != nil {
return nil , err
}
2021-03-05 08:08:28 -05:00
res := & drivers . Ignore { }
2021-02-19 11:40:30 -05:00
2021-03-05 08:08:28 -05:00
ignore := value . ( * values . Object )
2021-02-19 11:40:30 -05:00
2021-03-05 08:08:28 -05:00
resources , exists := ignore . Get ( "resources" )
2021-02-19 11:40:30 -05:00
if exists {
if err := core . ValidateType ( resources , types . Array ) ; err != nil {
return nil , err
}
resources := resources . ( * values . Array )
res . Resources = make ( [ ] drivers . ResourceFilter , 0 , resources . Length ( ) )
var e error
resources . ForEach ( func ( el core . Value , idx int ) bool {
if e = core . ValidateType ( el , types . Object ) ; e != nil {
return false
}
pattern := el . ( * values . Object )
url , urlExists := pattern . Get ( "url" )
resType , resTypeExists := pattern . Get ( "type" )
// ignore element
if ! urlExists && ! resTypeExists {
return true
}
res . Resources = append ( res . Resources , drivers . ResourceFilter {
URL : url . String ( ) ,
Type : resType . String ( ) ,
} )
return true
} )
if e != nil {
return nil , e
}
}
2021-03-05 08:08:28 -05:00
statusCodes , exists := ignore . Get ( "statusCodes" )
if exists {
if err := core . ValidateType ( statusCodes , types . Array ) ; err != nil {
return nil , err
}
statusCodes := statusCodes . ( * values . Array )
res . StatusCodes = make ( [ ] drivers . StatusCodeFilter , 0 , statusCodes . Length ( ) )
var e error
statusCodes . ForEach ( func ( el core . Value , idx int ) bool {
if e = core . ValidateType ( el , types . Object ) ; e != nil {
return false
}
pattern := el . ( * values . Object )
url := pattern . MustGetOr ( "url" , values . NewString ( "" ) )
code , codeExists := pattern . Get ( "code" )
// ignore element
if ! codeExists {
e = errors . New ( "http code is required" )
return false
}
res . StatusCodes = append ( res . StatusCodes , drivers . StatusCodeFilter {
URL : url . String ( ) ,
Code : int ( values . ToInt ( code ) ) ,
} )
return true
} )
}
2021-02-19 11:40:30 -05:00
return res , nil
}