mirror of
				https://github.com/MontFerret/ferret.git
				synced 2025-10-30 23:37:40 +02:00 
			
		
		
		
	Added timeout to DOCUMENT function (#186)
This commit is contained in:
		| @@ -92,7 +92,10 @@ func (r *Runner) runQueries(dir string) ([]Result, error) { | ||||
| 	results := make([]Result, 0, len(files)) | ||||
|  | ||||
| 	c := compiler.New() | ||||
| 	c.RegisterFunctions(Assertions()) | ||||
|  | ||||
| 	if err := c.RegisterFunctions(Assertions()); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	// read scripts | ||||
| 	for _, f := range files { | ||||
| @@ -146,7 +149,13 @@ func (r *Runner) runQuery(c *compiler.FqlCompiler, name, script string) Result { | ||||
|  | ||||
| 	var result string | ||||
|  | ||||
| 	json.Unmarshal(out, &result) | ||||
| 	if err := json.Unmarshal(out, &result); err != nil { | ||||
| 		return Result{ | ||||
| 			name:     name, | ||||
| 			duration: duration, | ||||
| 			err:      err, | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if result == "" { | ||||
| 		return Result{ | ||||
|   | ||||
| @@ -53,9 +53,6 @@ func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (va | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	ctx, cancel := context.WithTimeout(ctx, DefaultTimeout) | ||||
| 	defer cancel() | ||||
|  | ||||
| 	url := targetURL.String() | ||||
|  | ||||
| 	if url == "" { | ||||
|   | ||||
| @@ -63,7 +63,7 @@ func newClientWithProxy(options *Options) (*http.Client, error) { | ||||
| 	return &http.Client{Transport: tr}, nil | ||||
| } | ||||
|  | ||||
| func (drv *Driver) GetDocument(_ context.Context, targetURL values.String) (values.HTMLNode, error) { | ||||
| func (drv *Driver) GetDocument(ctx context.Context, targetURL values.String) (values.HTMLNode, error) { | ||||
| 	u := targetURL.String() | ||||
| 	req, err := http.NewRequest(http.MethodGet, u, nil) | ||||
|  | ||||
| @@ -76,6 +76,8 @@ func (drv *Driver) GetDocument(_ context.Context, targetURL values.String) (valu | ||||
| 	req.Header.Set("Cache-Control", "no-cache") | ||||
| 	req.Header.Set("Pragma", "no-cache") | ||||
|  | ||||
| 	req = req.WithContext(ctx) | ||||
|  | ||||
| 	ua := common.GetUserAgent(drv.options.userAgent) | ||||
|  | ||||
| 	// use custom user agent | ||||
|   | ||||
| @@ -2,20 +2,27 @@ package html | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
|  | ||||
| 	"github.com/MontFerret/ferret/pkg/html" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/core" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| 	"time" | ||||
| ) | ||||
|  | ||||
| // Document loads a document by a given url. | ||||
| type LoadDocumentArgs struct { | ||||
| 	Dynamic values.Boolean | ||||
| 	Timeout values.Int | ||||
| } | ||||
|  | ||||
| // Page loads a HTML document by a given url. | ||||
| // By default, loads a document by http call - resulted document does not support any interactions. | ||||
| // If passed "true" as a second argument, headless browser is used for loading the document which support interactions. | ||||
| // @param url (String) - Target url string. If passed "about:blank" for dynamic document - it will open an empty page. | ||||
| // @param dynamic (Boolean) - Optional boolean value indicating whether to use dynamic document. | ||||
| // @param dynamicOrTimeout (Boolean|Int, optional) - If boolean value is passed, it indicates whether to use dynamic document. | ||||
| // If integer values is passed it sets a custom timeout. | ||||
| // @param timeout (Int, optional) - Sets a custom timeout. | ||||
| // @returns (HTMLDocument) - Returns loaded HTML document. | ||||
| func Document(ctx context.Context, args ...core.Value) (core.Value, error) { | ||||
| 	err := core.ValidateArgs(args, 1, 2) | ||||
| 	err := core.ValidateArgs(args, 1, 3) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| @@ -23,22 +30,24 @@ func Document(ctx context.Context, args ...core.Value) (core.Value, error) { | ||||
|  | ||||
| 	err = core.ValidateType(args[0], core.StringType) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	url := args[0].(values.String) | ||||
| 	dynamic := values.False | ||||
|  | ||||
| 	if len(args) == 2 { | ||||
| 		err = core.ValidateType(args[1], core.BooleanType) | ||||
| 	params, err := parseLoadDocumentArgs(args) | ||||
|  | ||||
| 		if err != nil { | ||||
| 			return values.None, err | ||||
| 		} | ||||
|  | ||||
| 		dynamic = args[1].(values.Boolean) | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	var drv html.Driver | ||||
|  | ||||
| 	if !dynamic { | ||||
| 	ctx, cancel := context.WithTimeout(ctx, time.Duration(params.Timeout)*time.Millisecond) | ||||
| 	defer cancel() | ||||
|  | ||||
| 	if params.Dynamic == false { | ||||
| 		drv, err = html.FromContext(ctx, html.Static) | ||||
| 	} else { | ||||
| 		drv, err = html.FromContext(ctx, html.Dynamic) | ||||
| @@ -50,3 +59,41 @@ func Document(ctx context.Context, args ...core.Value) (core.Value, error) { | ||||
|  | ||||
| 	return drv.GetDocument(ctx, url) | ||||
| } | ||||
|  | ||||
| func parseLoadDocumentArgs(args []core.Value) (LoadDocumentArgs, error) { | ||||
| 	res := LoadDocumentArgs{ | ||||
| 		Timeout: values.Int(time.Second * 30), | ||||
| 	} | ||||
|  | ||||
| 	if len(args) == 3 { | ||||
| 		err := core.ValidateType(args[1], core.BooleanType) | ||||
|  | ||||
| 		if err != nil { | ||||
| 			return res, err | ||||
| 		} | ||||
|  | ||||
| 		res.Dynamic = args[1].(values.Boolean) | ||||
|  | ||||
| 		err = core.ValidateType(args[2], core.IntType) | ||||
|  | ||||
| 		if err != nil { | ||||
| 			return res, err | ||||
| 		} | ||||
|  | ||||
| 		res.Timeout = args[2].(values.Int) | ||||
| 	} else if len(args) == 2 { | ||||
| 		err := core.ValidateType(args[1], core.BooleanType, core.IntType) | ||||
|  | ||||
| 		if err != nil { | ||||
| 			return res, err | ||||
| 		} | ||||
|  | ||||
| 		if args[1].Type() == core.BooleanType { | ||||
| 			res.Dynamic = args[1].(values.Boolean) | ||||
| 		} else { | ||||
| 			res.Timeout = args[1].(values.Int) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return res, nil | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user