mirror of
https://github.com/MontFerret/ferret.git
synced 2024-12-12 11:15:14 +02:00
Added missed UA setting (#318)
* Added misset UA setting * Update doc_ua.fql * Delete ferret_embedding_basic.go
This commit is contained in:
parent
eee801fb5b
commit
2cfd1040a9
@ -3,6 +3,8 @@ package runner
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
@ -10,7 +12,9 @@ import (
|
||||
|
||||
func Assertions() map[string]core.Function {
|
||||
return map[string]core.Function{
|
||||
"EXPECT": expect,
|
||||
"EXPECT": expect,
|
||||
"T::EXPECT": expect,
|
||||
"T::HTTP::GET": httpGet,
|
||||
}
|
||||
}
|
||||
|
||||
@ -27,3 +31,29 @@ func expect(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
|
||||
return values.NewString(fmt.Sprintf(`expected "%s", but got "%s"`, args[0], args[1])), nil
|
||||
}
|
||||
|
||||
func httpGet(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
err := core.ValidateArgs(args, 1, 2)
|
||||
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
url := args[0].String()
|
||||
|
||||
resp, err := http.Get(url)
|
||||
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
b, err := ioutil.ReadAll(resp.Body)
|
||||
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
return values.String(b), nil
|
||||
}
|
||||
|
@ -2,10 +2,13 @@ package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/labstack/echo"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/labstack/echo"
|
||||
)
|
||||
|
||||
type (
|
||||
@ -37,6 +40,43 @@ func New(settings Settings) *Server {
|
||||
})
|
||||
e.Static("/", settings.Dir)
|
||||
e.File("/", filepath.Join(settings.Dir, "index.html"))
|
||||
api := e.Group("/api")
|
||||
api.GET("/ts", func(ctx echo.Context) error {
|
||||
var headers string
|
||||
|
||||
if len(ctx.Request().Header) > 0 {
|
||||
b, err := json.Marshal(ctx.Request().Header)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
headers = string(b)
|
||||
}
|
||||
|
||||
ts := time.Now().Format("2006-01-02 15:04:05")
|
||||
|
||||
return ctx.HTML(http.StatusOK, fmt.Sprintf(`
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
</head>
|
||||
<body>
|
||||
<span id="timestamp">%s</span>
|
||||
<span id="headers">%s</span>
|
||||
</body>
|
||||
</html>
|
||||
`, ts, headers))
|
||||
})
|
||||
api.GET("/ping", func(ctx echo.Context) error {
|
||||
return ctx.JSON(http.StatusOK, echo.Map{
|
||||
"header": ctx.Request().Header,
|
||||
"url": ctx.Request().URL,
|
||||
"data": "pong",
|
||||
"ts": time.Now(),
|
||||
})
|
||||
})
|
||||
|
||||
return &Server{e, settings}
|
||||
}
|
||||
|
10
e2e/tests/static/doc_ua.fql
Normal file
10
e2e/tests/static/doc_ua.fql
Normal file
@ -0,0 +1,10 @@
|
||||
LET url = @static + '/api/ts'
|
||||
LET ua = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) brave/0.7.10 Chrome/47.0.2526.110 Brave/0.36.5 Safari/537.36"
|
||||
LET page = DOCUMENT(url, {
|
||||
userAgent: ua
|
||||
})
|
||||
|
||||
LET h = ELEMENT(page, "#headers")
|
||||
LET headers = JSON_PARSE(h.innerText)
|
||||
|
||||
RETURN T::EXPECT(ua, headers["User-Agent"][0])
|
@ -126,6 +126,10 @@ func GetInDocument(ctx context.Context, doc drivers.HTMLDocument, path []core.Va
|
||||
}
|
||||
|
||||
return GetInElement(ctx, el, path[1:])
|
||||
case "innerHTML":
|
||||
return doc.GetElement().GetInnerHTML(ctx), nil
|
||||
case "innerText":
|
||||
return doc.GetElement().GetInnerText(ctx), nil
|
||||
default:
|
||||
return GetInNode(ctx, doc.GetElement(), path)
|
||||
}
|
||||
|
@ -119,6 +119,10 @@ func (drv *Driver) Open(ctx context.Context, params drivers.OpenPageParams) (dri
|
||||
Str("user-agent", ua).
|
||||
Msg("using User-Agent")
|
||||
|
||||
if ua != "" {
|
||||
req.Header.Set("User-Agent", ua)
|
||||
}
|
||||
|
||||
resp, err := drv.client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
@ -127,6 +131,10 @@ func (drv *Driver) Open(ctx context.Context, params drivers.OpenPageParams) (dri
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, errors.New(resp.Status)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
|
||||
if err != nil {
|
||||
|
@ -3,6 +3,7 @@ package strings
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"net/url"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
|
||||
@ -14,6 +15,7 @@ import (
|
||||
// @returns value (String) - The decoded string.
|
||||
func FromBase64(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
err := core.ValidateArgs(args, 1, 1)
|
||||
|
||||
if err != nil {
|
||||
return values.EmptyString, err
|
||||
}
|
||||
@ -27,3 +29,22 @@ func FromBase64(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
|
||||
return values.NewString(string(out)), nil
|
||||
}
|
||||
|
||||
// DecodeURIComponent returns the decoded String of uri.
|
||||
// @param (String) - Uri to decode.
|
||||
// @returns String - Decoded string.
|
||||
func DecodeURIComponent(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
err := core.ValidateArgs(args, 1, 1)
|
||||
|
||||
if err != nil {
|
||||
return values.EmptyString, err
|
||||
}
|
||||
|
||||
str, err := url.QueryUnescape(args[0].String())
|
||||
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
return values.NewString(str), nil
|
||||
}
|
||||
|
25
pkg/stdlib/strings/escape.go
Normal file
25
pkg/stdlib/strings/escape.go
Normal file
@ -0,0 +1,25 @@
|
||||
package strings
|
||||
|
||||
import (
|
||||
"context"
|
||||
"html"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
)
|
||||
|
||||
// EscapeHTML escapes special characters like "<" to become "<". It
|
||||
// escapes only five such characters: <, >, &, ' and ".
|
||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||
// always true.
|
||||
// @param (String) - Uri to escape.
|
||||
// @returns String - Escaped string.
|
||||
func EscapeHTML(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
err := core.ValidateArgs(args, 1, 1)
|
||||
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
return values.NewString(html.EscapeString(args[0].String())), nil
|
||||
}
|
22
pkg/stdlib/strings/escape_test.go
Normal file
22
pkg/stdlib/strings/escape_test.go
Normal file
@ -0,0 +1,22 @@
|
||||
package strings_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"testing"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/strings"
|
||||
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
)
|
||||
|
||||
func TestEscapeHTML(t *testing.T) {
|
||||
Convey("EscapeHTML", t, func() {
|
||||
Convey("Should escape an HTML string", func() {
|
||||
out, err := strings.EscapeHTML(context.Background(), values.NewString(`<body><span>Foobar</span></body>`))
|
||||
|
||||
So(err, ShouldBeNil)
|
||||
So(out, ShouldEqual, values.NewString("<body><span>Foobar</span></body>"))
|
||||
})
|
||||
})
|
||||
}
|
@ -7,6 +7,8 @@ func NewLib() map[string]core.Function {
|
||||
"CONCAT": Concat,
|
||||
"CONCAT_SEPARATOR": ConcatWithSeparator,
|
||||
"CONTAINS": Contains,
|
||||
"ESCAPE_HTML": EscapeHTML,
|
||||
"DECODE_URI_COMPONENT": DecodeURIComponent,
|
||||
"ENCODE_URI_COMPONENT": EncodeURIComponent,
|
||||
"FIND_FIRST": FindFirst,
|
||||
"FIND_LAST": FindLast,
|
||||
@ -34,5 +36,6 @@ func NewLib() map[string]core.Function {
|
||||
"TRIM": Trim,
|
||||
"UPPER": Upper,
|
||||
"FMT": Fmt,
|
||||
"UNESCAPE_HTML": UnescapeHTML,
|
||||
}
|
||||
}
|
||||
|
26
pkg/stdlib/strings/unescape.go
Normal file
26
pkg/stdlib/strings/unescape.go
Normal file
@ -0,0 +1,26 @@
|
||||
package strings
|
||||
|
||||
import (
|
||||
"context"
|
||||
"html"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
)
|
||||
|
||||
// UnescapeHTML unescapes entities like "<" to become "<". It unescapes a
|
||||
// larger range of entities than EscapeString escapes. For example, "á"
|
||||
// unescapes to "á", as does "á" and "á".
|
||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||
// always true.
|
||||
// @param (String) - Uri to escape.
|
||||
// @returns String - Escaped string.
|
||||
func UnescapeHTML(_ context.Context, args ...core.Value) (core.Value, error) {
|
||||
err := core.ValidateArgs(args, 1, 1)
|
||||
|
||||
if err != nil {
|
||||
return values.None, err
|
||||
}
|
||||
|
||||
return values.NewString(html.UnescapeString(args[0].String())), nil
|
||||
}
|
23
pkg/stdlib/strings/unescape_test.go
Normal file
23
pkg/stdlib/strings/unescape_test.go
Normal file
@ -0,0 +1,23 @@
|
||||
package strings_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||||
"testing"
|
||||
|
||||
"github.com/MontFerret/ferret/pkg/stdlib/strings"
|
||||
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
)
|
||||
|
||||
func TestUnescapeHTML(t *testing.T) {
|
||||
Convey("UnescapeHTML", t, func() {
|
||||
Convey("Should unescape an string", func() {
|
||||
out, err := strings.UnescapeHTML(context.Background(), values.NewString("<body><span>Foobar</span></body>"))
|
||||
|
||||
expected := values.NewString("<body><span>Foobar</span></body>")
|
||||
So(err, ShouldBeNil)
|
||||
So(out, ShouldEqual, expected)
|
||||
})
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue
Block a user