From 2cfd1040a9c01f6472b6f72e6aa66841fd06e822 Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Tue, 25 Jun 2019 12:51:51 -0400 Subject: [PATCH] Added missed UA setting (#318) * Added misset UA setting * Update doc_ua.fql * Delete ferret_embedding_basic.go --- e2e/runner/lib.go | 32 +++++++++++++++++++++- e2e/server/server.go | 42 ++++++++++++++++++++++++++++- e2e/tests/static/doc_ua.fql | 10 +++++++ pkg/drivers/common/getter.go | 4 +++ pkg/drivers/http/driver.go | 8 ++++++ pkg/stdlib/strings/decode.go | 21 +++++++++++++++ pkg/stdlib/strings/escape.go | 25 +++++++++++++++++ pkg/stdlib/strings/escape_test.go | 22 +++++++++++++++ pkg/stdlib/strings/lib.go | 3 +++ pkg/stdlib/strings/unescape.go | 26 ++++++++++++++++++ pkg/stdlib/strings/unescape_test.go | 23 ++++++++++++++++ 11 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 e2e/tests/static/doc_ua.fql create mode 100644 pkg/stdlib/strings/escape.go create mode 100644 pkg/stdlib/strings/escape_test.go create mode 100644 pkg/stdlib/strings/unescape.go create mode 100644 pkg/stdlib/strings/unescape_test.go diff --git a/e2e/runner/lib.go b/e2e/runner/lib.go index e3e3ef01..5c9fdcbb 100644 --- a/e2e/runner/lib.go +++ b/e2e/runner/lib.go @@ -3,6 +3,8 @@ package runner import ( "context" "fmt" + "io/ioutil" + "net/http" "github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/values" @@ -10,7 +12,9 @@ import ( func Assertions() map[string]core.Function { return map[string]core.Function{ - "EXPECT": expect, + "EXPECT": expect, + "T::EXPECT": expect, + "T::HTTP::GET": httpGet, } } @@ -27,3 +31,29 @@ func expect(_ context.Context, args ...core.Value) (core.Value, error) { return values.NewString(fmt.Sprintf(`expected "%s", but got "%s"`, args[0], args[1])), nil } + +func httpGet(_ context.Context, args ...core.Value) (core.Value, error) { + err := core.ValidateArgs(args, 1, 2) + + if err != nil { + return values.None, err + } + + url := args[0].String() + + resp, err := http.Get(url) + + if err != nil { + return values.None, err + } + + defer resp.Body.Close() + + b, err := ioutil.ReadAll(resp.Body) + + if err != nil { + return values.None, err + } + + return values.String(b), nil +} diff --git a/e2e/server/server.go b/e2e/server/server.go index 46e004fc..15a0c5d3 100644 --- a/e2e/server/server.go +++ b/e2e/server/server.go @@ -2,10 +2,13 @@ package server import ( "context" + "encoding/json" "fmt" - "github.com/labstack/echo" "net/http" "path/filepath" + "time" + + "github.com/labstack/echo" ) type ( @@ -37,6 +40,43 @@ func New(settings Settings) *Server { }) e.Static("/", settings.Dir) e.File("/", filepath.Join(settings.Dir, "index.html")) + api := e.Group("/api") + api.GET("/ts", func(ctx echo.Context) error { + var headers string + + if len(ctx.Request().Header) > 0 { + b, err := json.Marshal(ctx.Request().Header) + + if err != nil { + return err + } + + headers = string(b) + } + + ts := time.Now().Format("2006-01-02 15:04:05") + + return ctx.HTML(http.StatusOK, fmt.Sprintf(` + + + + + + + %s + %s + + + `, ts, headers)) + }) + api.GET("/ping", func(ctx echo.Context) error { + return ctx.JSON(http.StatusOK, echo.Map{ + "header": ctx.Request().Header, + "url": ctx.Request().URL, + "data": "pong", + "ts": time.Now(), + }) + }) return &Server{e, settings} } diff --git a/e2e/tests/static/doc_ua.fql b/e2e/tests/static/doc_ua.fql new file mode 100644 index 00000000..c300a50f --- /dev/null +++ b/e2e/tests/static/doc_ua.fql @@ -0,0 +1,10 @@ +LET url = @static + '/api/ts' +LET ua = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) brave/0.7.10 Chrome/47.0.2526.110 Brave/0.36.5 Safari/537.36" +LET page = DOCUMENT(url, { + userAgent: ua +}) + +LET h = ELEMENT(page, "#headers") +LET headers = JSON_PARSE(h.innerText) + +RETURN T::EXPECT(ua, headers["User-Agent"][0]) diff --git a/pkg/drivers/common/getter.go b/pkg/drivers/common/getter.go index 8048eb3e..1d1921d1 100644 --- a/pkg/drivers/common/getter.go +++ b/pkg/drivers/common/getter.go @@ -126,6 +126,10 @@ func GetInDocument(ctx context.Context, doc drivers.HTMLDocument, path []core.Va } return GetInElement(ctx, el, path[1:]) + case "innerHTML": + return doc.GetElement().GetInnerHTML(ctx), nil + case "innerText": + return doc.GetElement().GetInnerText(ctx), nil default: return GetInNode(ctx, doc.GetElement(), path) } diff --git a/pkg/drivers/http/driver.go b/pkg/drivers/http/driver.go index 214c16fb..38f3eea8 100644 --- a/pkg/drivers/http/driver.go +++ b/pkg/drivers/http/driver.go @@ -119,6 +119,10 @@ func (drv *Driver) Open(ctx context.Context, params drivers.OpenPageParams) (dri Str("user-agent", ua). Msg("using User-Agent") + if ua != "" { + req.Header.Set("User-Agent", ua) + } + resp, err := drv.client.Do(req) if err != nil { @@ -127,6 +131,10 @@ func (drv *Driver) Open(ctx context.Context, params drivers.OpenPageParams) (dri defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, errors.New(resp.Status) + } + doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { diff --git a/pkg/stdlib/strings/decode.go b/pkg/stdlib/strings/decode.go index 9f870a95..c879bb23 100644 --- a/pkg/stdlib/strings/decode.go +++ b/pkg/stdlib/strings/decode.go @@ -3,6 +3,7 @@ package strings import ( "context" "encoding/base64" + "net/url" "github.com/MontFerret/ferret/pkg/runtime/values" @@ -14,6 +15,7 @@ import ( // @returns value (String) - The decoded string. func FromBase64(_ context.Context, args ...core.Value) (core.Value, error) { err := core.ValidateArgs(args, 1, 1) + if err != nil { return values.EmptyString, err } @@ -27,3 +29,22 @@ func FromBase64(_ context.Context, args ...core.Value) (core.Value, error) { return values.NewString(string(out)), nil } + +// DecodeURIComponent returns the decoded String of uri. +// @param (String) - Uri to decode. +// @returns String - Decoded string. +func DecodeURIComponent(_ context.Context, args ...core.Value) (core.Value, error) { + err := core.ValidateArgs(args, 1, 1) + + if err != nil { + return values.EmptyString, err + } + + str, err := url.QueryUnescape(args[0].String()) + + if err != nil { + return values.None, err + } + + return values.NewString(str), nil +} diff --git a/pkg/stdlib/strings/escape.go b/pkg/stdlib/strings/escape.go new file mode 100644 index 00000000..fc65b7fd --- /dev/null +++ b/pkg/stdlib/strings/escape.go @@ -0,0 +1,25 @@ +package strings + +import ( + "context" + "html" + + "github.com/MontFerret/ferret/pkg/runtime/core" + "github.com/MontFerret/ferret/pkg/runtime/values" +) + +// EscapeHTML escapes special characters like "<" to become "<". It +// escapes only five such characters: <, >, &, ' and ". +// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't +// always true. +// @param (String) - Uri to escape. +// @returns String - Escaped string. +func EscapeHTML(_ context.Context, args ...core.Value) (core.Value, error) { + err := core.ValidateArgs(args, 1, 1) + + if err != nil { + return values.None, err + } + + return values.NewString(html.EscapeString(args[0].String())), nil +} diff --git a/pkg/stdlib/strings/escape_test.go b/pkg/stdlib/strings/escape_test.go new file mode 100644 index 00000000..07824d6d --- /dev/null +++ b/pkg/stdlib/strings/escape_test.go @@ -0,0 +1,22 @@ +package strings_test + +import ( + "context" + "github.com/MontFerret/ferret/pkg/runtime/values" + "testing" + + "github.com/MontFerret/ferret/pkg/stdlib/strings" + + . "github.com/smartystreets/goconvey/convey" +) + +func TestEscapeHTML(t *testing.T) { + Convey("EscapeHTML", t, func() { + Convey("Should escape an HTML string", func() { + out, err := strings.EscapeHTML(context.Background(), values.NewString(`Foobar`)) + + So(err, ShouldBeNil) + So(out, ShouldEqual, values.NewString("<body><span>Foobar</span></body>")) + }) + }) +} diff --git a/pkg/stdlib/strings/lib.go b/pkg/stdlib/strings/lib.go index 8761e8e1..a156b2ae 100644 --- a/pkg/stdlib/strings/lib.go +++ b/pkg/stdlib/strings/lib.go @@ -7,6 +7,8 @@ func NewLib() map[string]core.Function { "CONCAT": Concat, "CONCAT_SEPARATOR": ConcatWithSeparator, "CONTAINS": Contains, + "ESCAPE_HTML": EscapeHTML, + "DECODE_URI_COMPONENT": DecodeURIComponent, "ENCODE_URI_COMPONENT": EncodeURIComponent, "FIND_FIRST": FindFirst, "FIND_LAST": FindLast, @@ -34,5 +36,6 @@ func NewLib() map[string]core.Function { "TRIM": Trim, "UPPER": Upper, "FMT": Fmt, + "UNESCAPE_HTML": UnescapeHTML, } } diff --git a/pkg/stdlib/strings/unescape.go b/pkg/stdlib/strings/unescape.go new file mode 100644 index 00000000..a1a841ec --- /dev/null +++ b/pkg/stdlib/strings/unescape.go @@ -0,0 +1,26 @@ +package strings + +import ( + "context" + "html" + + "github.com/MontFerret/ferret/pkg/runtime/core" + "github.com/MontFerret/ferret/pkg/runtime/values" +) + +// UnescapeHTML unescapes entities like "<" to become "<". It unescapes a +// larger range of entities than EscapeString escapes. For example, "á" +// unescapes to "รก", as does "á" and "á". +// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't +// always true. +// @param (String) - Uri to escape. +// @returns String - Escaped string. +func UnescapeHTML(_ context.Context, args ...core.Value) (core.Value, error) { + err := core.ValidateArgs(args, 1, 1) + + if err != nil { + return values.None, err + } + + return values.NewString(html.UnescapeString(args[0].String())), nil +} diff --git a/pkg/stdlib/strings/unescape_test.go b/pkg/stdlib/strings/unescape_test.go new file mode 100644 index 00000000..0543ad8a --- /dev/null +++ b/pkg/stdlib/strings/unescape_test.go @@ -0,0 +1,23 @@ +package strings_test + +import ( + "context" + "github.com/MontFerret/ferret/pkg/runtime/values" + "testing" + + "github.com/MontFerret/ferret/pkg/stdlib/strings" + + . "github.com/smartystreets/goconvey/convey" +) + +func TestUnescapeHTML(t *testing.T) { + Convey("UnescapeHTML", t, func() { + Convey("Should unescape an string", func() { + out, err := strings.UnescapeHTML(context.Background(), values.NewString("<body><span>Foobar</span></body>")) + + expected := values.NewString("Foobar") + So(err, ShouldBeNil) + So(out, ShouldEqual, expected) + }) + }) +}