mirror of
				https://github.com/MontFerret/ferret.git
				synced 2025-10-30 23:37:40 +02:00 
			
		
		
		
	Added missed UA setting (#318)
* Added misset UA setting * Update doc_ua.fql * Delete ferret_embedding_basic.go
This commit is contained in:
		| @@ -3,6 +3,8 @@ package runner | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"net/http" | ||||
|  | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/core" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| @@ -10,7 +12,9 @@ import ( | ||||
|  | ||||
| func Assertions() map[string]core.Function { | ||||
| 	return map[string]core.Function{ | ||||
| 		"EXPECT": expect, | ||||
| 		"EXPECT":       expect, | ||||
| 		"T::EXPECT":    expect, | ||||
| 		"T::HTTP::GET": httpGet, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -27,3 +31,29 @@ func expect(_ context.Context, args ...core.Value) (core.Value, error) { | ||||
|  | ||||
| 	return values.NewString(fmt.Sprintf(`expected "%s", but got "%s"`, args[0], args[1])), nil | ||||
| } | ||||
|  | ||||
| func httpGet(_ context.Context, args ...core.Value) (core.Value, error) { | ||||
| 	err := core.ValidateArgs(args, 1, 2) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	url := args[0].String() | ||||
|  | ||||
| 	resp, err := http.Get(url) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	defer resp.Body.Close() | ||||
|  | ||||
| 	b, err := ioutil.ReadAll(resp.Body) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	return values.String(b), nil | ||||
| } | ||||
|   | ||||
| @@ -2,10 +2,13 @@ package server | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"github.com/labstack/echo" | ||||
| 	"net/http" | ||||
| 	"path/filepath" | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/labstack/echo" | ||||
| ) | ||||
|  | ||||
| type ( | ||||
| @@ -37,6 +40,43 @@ func New(settings Settings) *Server { | ||||
| 	}) | ||||
| 	e.Static("/", settings.Dir) | ||||
| 	e.File("/", filepath.Join(settings.Dir, "index.html")) | ||||
| 	api := e.Group("/api") | ||||
| 	api.GET("/ts", func(ctx echo.Context) error { | ||||
| 		var headers string | ||||
|  | ||||
| 		if len(ctx.Request().Header) > 0 { | ||||
| 			b, err := json.Marshal(ctx.Request().Header) | ||||
|  | ||||
| 			if err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			headers = string(b) | ||||
| 		} | ||||
|  | ||||
| 		ts := time.Now().Format("2006-01-02 15:04:05") | ||||
|  | ||||
| 		return ctx.HTML(http.StatusOK, fmt.Sprintf(` | ||||
| 		<!DOCTYPE html> | ||||
| 		<html> | ||||
| 			<head> | ||||
| 				<meta charset="utf-8" /> | ||||
| 			</head> | ||||
| 			<body> | ||||
| 				<span id="timestamp">%s</span> | ||||
| 				<span id="headers">%s</span> | ||||
| 			</body> | ||||
| 		</html> | ||||
| 	`, ts, headers)) | ||||
| 	}) | ||||
| 	api.GET("/ping", func(ctx echo.Context) error { | ||||
| 		return ctx.JSON(http.StatusOK, echo.Map{ | ||||
| 			"header": ctx.Request().Header, | ||||
| 			"url":    ctx.Request().URL, | ||||
| 			"data":   "pong", | ||||
| 			"ts":     time.Now(), | ||||
| 		}) | ||||
| 	}) | ||||
|  | ||||
| 	return &Server{e, settings} | ||||
| } | ||||
|   | ||||
							
								
								
									
										10
									
								
								e2e/tests/static/doc_ua.fql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								e2e/tests/static/doc_ua.fql
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| LET url = @static + '/api/ts' | ||||
| LET ua = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) brave/0.7.10 Chrome/47.0.2526.110 Brave/0.36.5 Safari/537.36" | ||||
| LET page = DOCUMENT(url, { | ||||
|    userAgent: ua | ||||
| }) | ||||
|  | ||||
| LET h = ELEMENT(page, "#headers") | ||||
| LET headers = JSON_PARSE(h.innerText) | ||||
|  | ||||
| RETURN T::EXPECT(ua, headers["User-Agent"][0]) | ||||
| @@ -126,6 +126,10 @@ func GetInDocument(ctx context.Context, doc drivers.HTMLDocument, path []core.Va | ||||
| 			} | ||||
|  | ||||
| 			return GetInElement(ctx, el, path[1:]) | ||||
| 		case "innerHTML": | ||||
| 			return doc.GetElement().GetInnerHTML(ctx), nil | ||||
| 		case "innerText": | ||||
| 			return doc.GetElement().GetInnerText(ctx), nil | ||||
| 		default: | ||||
| 			return GetInNode(ctx, doc.GetElement(), path) | ||||
| 		} | ||||
|   | ||||
| @@ -119,6 +119,10 @@ func (drv *Driver) Open(ctx context.Context, params drivers.OpenPageParams) (dri | ||||
| 		Str("user-agent", ua). | ||||
| 		Msg("using User-Agent") | ||||
|  | ||||
| 	if ua != "" { | ||||
| 		req.Header.Set("User-Agent", ua) | ||||
| 	} | ||||
|  | ||||
| 	resp, err := drv.client.Do(req) | ||||
|  | ||||
| 	if err != nil { | ||||
| @@ -127,6 +131,10 @@ func (drv *Driver) Open(ctx context.Context, params drivers.OpenPageParams) (dri | ||||
|  | ||||
| 	defer resp.Body.Close() | ||||
|  | ||||
| 	if resp.StatusCode != http.StatusOK { | ||||
| 		return nil, errors.New(resp.Status) | ||||
| 	} | ||||
|  | ||||
| 	doc, err := goquery.NewDocumentFromReader(resp.Body) | ||||
|  | ||||
| 	if err != nil { | ||||
|   | ||||
| @@ -3,6 +3,7 @@ package strings | ||||
| import ( | ||||
| 	"context" | ||||
| 	"encoding/base64" | ||||
| 	"net/url" | ||||
|  | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
|  | ||||
| @@ -14,6 +15,7 @@ import ( | ||||
| // @returns value (String) - The decoded string. | ||||
| func FromBase64(_ context.Context, args ...core.Value) (core.Value, error) { | ||||
| 	err := core.ValidateArgs(args, 1, 1) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.EmptyString, err | ||||
| 	} | ||||
| @@ -27,3 +29,22 @@ func FromBase64(_ context.Context, args ...core.Value) (core.Value, error) { | ||||
|  | ||||
| 	return values.NewString(string(out)), nil | ||||
| } | ||||
|  | ||||
| // DecodeURIComponent returns the decoded String of uri. | ||||
| // @param (String) - Uri to decode. | ||||
| // @returns String - Decoded string. | ||||
| func DecodeURIComponent(_ context.Context, args ...core.Value) (core.Value, error) { | ||||
| 	err := core.ValidateArgs(args, 1, 1) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.EmptyString, err | ||||
| 	} | ||||
|  | ||||
| 	str, err := url.QueryUnescape(args[0].String()) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	return values.NewString(str), nil | ||||
| } | ||||
|   | ||||
							
								
								
									
										25
									
								
								pkg/stdlib/strings/escape.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								pkg/stdlib/strings/escape.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | ||||
| package strings | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"html" | ||||
|  | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/core" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| ) | ||||
|  | ||||
| // EscapeHTML escapes special characters like "<" to become "<". It | ||||
| // escapes only five such characters: <, >, &, ' and ". | ||||
| // UnescapeString(EscapeString(s)) == s always holds, but the converse isn't | ||||
| // always true. | ||||
| // @param (String) - Uri to escape. | ||||
| // @returns String - Escaped string. | ||||
| func EscapeHTML(_ context.Context, args ...core.Value) (core.Value, error) { | ||||
| 	err := core.ValidateArgs(args, 1, 1) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	return values.NewString(html.EscapeString(args[0].String())), nil | ||||
| } | ||||
							
								
								
									
										22
									
								
								pkg/stdlib/strings/escape_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								pkg/stdlib/strings/escape_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| package strings_test | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/MontFerret/ferret/pkg/stdlib/strings" | ||||
|  | ||||
| 	. "github.com/smartystreets/goconvey/convey" | ||||
| ) | ||||
|  | ||||
| func TestEscapeHTML(t *testing.T) { | ||||
| 	Convey("EscapeHTML", t, func() { | ||||
| 		Convey("Should escape an HTML string", func() { | ||||
| 			out, err := strings.EscapeHTML(context.Background(), values.NewString(`<body><span>Foobar</span></body>`)) | ||||
|  | ||||
| 			So(err, ShouldBeNil) | ||||
| 			So(out, ShouldEqual, values.NewString("<body><span>Foobar</span></body>")) | ||||
| 		}) | ||||
| 	}) | ||||
| } | ||||
| @@ -7,6 +7,8 @@ func NewLib() map[string]core.Function { | ||||
| 		"CONCAT":               Concat, | ||||
| 		"CONCAT_SEPARATOR":     ConcatWithSeparator, | ||||
| 		"CONTAINS":             Contains, | ||||
| 		"ESCAPE_HTML":          EscapeHTML, | ||||
| 		"DECODE_URI_COMPONENT": DecodeURIComponent, | ||||
| 		"ENCODE_URI_COMPONENT": EncodeURIComponent, | ||||
| 		"FIND_FIRST":           FindFirst, | ||||
| 		"FIND_LAST":            FindLast, | ||||
| @@ -34,5 +36,6 @@ func NewLib() map[string]core.Function { | ||||
| 		"TRIM":                 Trim, | ||||
| 		"UPPER":                Upper, | ||||
| 		"FMT":                  Fmt, | ||||
| 		"UNESCAPE_HTML":        UnescapeHTML, | ||||
| 	} | ||||
| } | ||||
|   | ||||
							
								
								
									
										26
									
								
								pkg/stdlib/strings/unescape.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								pkg/stdlib/strings/unescape.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| package strings | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"html" | ||||
|  | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/core" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| ) | ||||
|  | ||||
| // UnescapeHTML unescapes entities like "<" to become "<". It unescapes a | ||||
| // larger range of entities than EscapeString escapes. For example, "á" | ||||
| // unescapes to "á", as does "á" and "á". | ||||
| // UnescapeString(EscapeString(s)) == s always holds, but the converse isn't | ||||
| // always true. | ||||
| // @param (String) - Uri to escape. | ||||
| // @returns String - Escaped string. | ||||
| func UnescapeHTML(_ context.Context, args ...core.Value) (core.Value, error) { | ||||
| 	err := core.ValidateArgs(args, 1, 1) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return values.None, err | ||||
| 	} | ||||
|  | ||||
| 	return values.NewString(html.UnescapeString(args[0].String())), nil | ||||
| } | ||||
							
								
								
									
										23
									
								
								pkg/stdlib/strings/unescape_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								pkg/stdlib/strings/unescape_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| package strings_test | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"github.com/MontFerret/ferret/pkg/runtime/values" | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/MontFerret/ferret/pkg/stdlib/strings" | ||||
|  | ||||
| 	. "github.com/smartystreets/goconvey/convey" | ||||
| ) | ||||
|  | ||||
| func TestUnescapeHTML(t *testing.T) { | ||||
| 	Convey("UnescapeHTML", t, func() { | ||||
| 		Convey("Should unescape an string", func() { | ||||
| 			out, err := strings.UnescapeHTML(context.Background(), values.NewString("<body><span>Foobar</span></body>")) | ||||
|  | ||||
| 			expected := values.NewString("<body><span>Foobar</span></body>") | ||||
| 			So(err, ShouldBeNil) | ||||
| 			So(out, ShouldEqual, expected) | ||||
| 		}) | ||||
| 	}) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user