1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-01-22 03:39:08 +02:00

Added missed UA setting (#318)

* Added misset UA setting

* Update doc_ua.fql

* Delete ferret_embedding_basic.go
This commit is contained in:
Tim Voronov 2019-06-25 12:51:51 -04:00 committed by GitHub
parent eee801fb5b
commit 2cfd1040a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 214 additions and 2 deletions

View File

@ -3,6 +3,8 @@ package runner
import ( import (
"context" "context"
"fmt" "fmt"
"io/ioutil"
"net/http"
"github.com/MontFerret/ferret/pkg/runtime/core" "github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
@ -11,6 +13,8 @@ import (
func Assertions() map[string]core.Function { func Assertions() map[string]core.Function {
return map[string]core.Function{ return map[string]core.Function{
"EXPECT": expect, "EXPECT": expect,
"T::EXPECT": expect,
"T::HTTP::GET": httpGet,
} }
} }
@ -27,3 +31,29 @@ func expect(_ context.Context, args ...core.Value) (core.Value, error) {
return values.NewString(fmt.Sprintf(`expected "%s", but got "%s"`, args[0], args[1])), nil return values.NewString(fmt.Sprintf(`expected "%s", but got "%s"`, args[0], args[1])), nil
} }
func httpGet(_ context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 1, 2)
if err != nil {
return values.None, err
}
url := args[0].String()
resp, err := http.Get(url)
if err != nil {
return values.None, err
}
defer resp.Body.Close()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return values.None, err
}
return values.String(b), nil
}

View File

@ -2,10 +2,13 @@ package server
import ( import (
"context" "context"
"encoding/json"
"fmt" "fmt"
"github.com/labstack/echo"
"net/http" "net/http"
"path/filepath" "path/filepath"
"time"
"github.com/labstack/echo"
) )
type ( type (
@ -37,6 +40,43 @@ func New(settings Settings) *Server {
}) })
e.Static("/", settings.Dir) e.Static("/", settings.Dir)
e.File("/", filepath.Join(settings.Dir, "index.html")) e.File("/", filepath.Join(settings.Dir, "index.html"))
api := e.Group("/api")
api.GET("/ts", func(ctx echo.Context) error {
var headers string
if len(ctx.Request().Header) > 0 {
b, err := json.Marshal(ctx.Request().Header)
if err != nil {
return err
}
headers = string(b)
}
ts := time.Now().Format("2006-01-02 15:04:05")
return ctx.HTML(http.StatusOK, fmt.Sprintf(`
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
</head>
<body>
<span id="timestamp">%s</span>
<span id="headers">%s</span>
</body>
</html>
`, ts, headers))
})
api.GET("/ping", func(ctx echo.Context) error {
return ctx.JSON(http.StatusOK, echo.Map{
"header": ctx.Request().Header,
"url": ctx.Request().URL,
"data": "pong",
"ts": time.Now(),
})
})
return &Server{e, settings} return &Server{e, settings}
} }

View File

@ -0,0 +1,10 @@
LET url = @static + '/api/ts'
LET ua = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) brave/0.7.10 Chrome/47.0.2526.110 Brave/0.36.5 Safari/537.36"
LET page = DOCUMENT(url, {
userAgent: ua
})
LET h = ELEMENT(page, "#headers")
LET headers = JSON_PARSE(h.innerText)
RETURN T::EXPECT(ua, headers["User-Agent"][0])

View File

@ -126,6 +126,10 @@ func GetInDocument(ctx context.Context, doc drivers.HTMLDocument, path []core.Va
} }
return GetInElement(ctx, el, path[1:]) return GetInElement(ctx, el, path[1:])
case "innerHTML":
return doc.GetElement().GetInnerHTML(ctx), nil
case "innerText":
return doc.GetElement().GetInnerText(ctx), nil
default: default:
return GetInNode(ctx, doc.GetElement(), path) return GetInNode(ctx, doc.GetElement(), path)
} }

View File

@ -119,6 +119,10 @@ func (drv *Driver) Open(ctx context.Context, params drivers.OpenPageParams) (dri
Str("user-agent", ua). Str("user-agent", ua).
Msg("using User-Agent") Msg("using User-Agent")
if ua != "" {
req.Header.Set("User-Agent", ua)
}
resp, err := drv.client.Do(req) resp, err := drv.client.Do(req)
if err != nil { if err != nil {
@ -127,6 +131,10 @@ func (drv *Driver) Open(ctx context.Context, params drivers.OpenPageParams) (dri
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, errors.New(resp.Status)
}
doc, err := goquery.NewDocumentFromReader(resp.Body) doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil { if err != nil {

View File

@ -3,6 +3,7 @@ package strings
import ( import (
"context" "context"
"encoding/base64" "encoding/base64"
"net/url"
"github.com/MontFerret/ferret/pkg/runtime/values" "github.com/MontFerret/ferret/pkg/runtime/values"
@ -14,6 +15,7 @@ import (
// @returns value (String) - The decoded string. // @returns value (String) - The decoded string.
func FromBase64(_ context.Context, args ...core.Value) (core.Value, error) { func FromBase64(_ context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 1, 1) err := core.ValidateArgs(args, 1, 1)
if err != nil { if err != nil {
return values.EmptyString, err return values.EmptyString, err
} }
@ -27,3 +29,22 @@ func FromBase64(_ context.Context, args ...core.Value) (core.Value, error) {
return values.NewString(string(out)), nil return values.NewString(string(out)), nil
} }
// DecodeURIComponent returns the decoded String of uri.
// @param (String) - Uri to decode.
// @returns String - Decoded string.
func DecodeURIComponent(_ context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 1, 1)
if err != nil {
return values.EmptyString, err
}
str, err := url.QueryUnescape(args[0].String())
if err != nil {
return values.None, err
}
return values.NewString(str), nil
}

View File

@ -0,0 +1,25 @@
package strings
import (
"context"
"html"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
)
// EscapeHTML escapes special characters like "<" to become "&lt;". It
// escapes only five such characters: <, >, &, ' and ".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
// @param (String) - Uri to escape.
// @returns String - Escaped string.
func EscapeHTML(_ context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 1, 1)
if err != nil {
return values.None, err
}
return values.NewString(html.EscapeString(args[0].String())), nil
}

View File

@ -0,0 +1,22 @@
package strings_test
import (
"context"
"github.com/MontFerret/ferret/pkg/runtime/values"
"testing"
"github.com/MontFerret/ferret/pkg/stdlib/strings"
. "github.com/smartystreets/goconvey/convey"
)
func TestEscapeHTML(t *testing.T) {
Convey("EscapeHTML", t, func() {
Convey("Should escape an HTML string", func() {
out, err := strings.EscapeHTML(context.Background(), values.NewString(`<body><span>Foobar</span></body>`))
So(err, ShouldBeNil)
So(out, ShouldEqual, values.NewString("&lt;body&gt;&lt;span&gt;Foobar&lt;/span&gt;&lt;/body&gt;"))
})
})
}

View File

@ -7,6 +7,8 @@ func NewLib() map[string]core.Function {
"CONCAT": Concat, "CONCAT": Concat,
"CONCAT_SEPARATOR": ConcatWithSeparator, "CONCAT_SEPARATOR": ConcatWithSeparator,
"CONTAINS": Contains, "CONTAINS": Contains,
"ESCAPE_HTML": EscapeHTML,
"DECODE_URI_COMPONENT": DecodeURIComponent,
"ENCODE_URI_COMPONENT": EncodeURIComponent, "ENCODE_URI_COMPONENT": EncodeURIComponent,
"FIND_FIRST": FindFirst, "FIND_FIRST": FindFirst,
"FIND_LAST": FindLast, "FIND_LAST": FindLast,
@ -34,5 +36,6 @@ func NewLib() map[string]core.Function {
"TRIM": Trim, "TRIM": Trim,
"UPPER": Upper, "UPPER": Upper,
"FMT": Fmt, "FMT": Fmt,
"UNESCAPE_HTML": UnescapeHTML,
} }
} }

View File

@ -0,0 +1,26 @@
package strings
import (
"context"
"html"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
)
// UnescapeHTML unescapes entities like "&lt;" to become "<". It unescapes a
// larger range of entities than EscapeString escapes. For example, "&aacute;"
// unescapes to "á", as does "&#225;" and "&#xE1;".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
// @param (String) - Uri to escape.
// @returns String - Escaped string.
func UnescapeHTML(_ context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 1, 1)
if err != nil {
return values.None, err
}
return values.NewString(html.UnescapeString(args[0].String())), nil
}

View File

@ -0,0 +1,23 @@
package strings_test
import (
"context"
"github.com/MontFerret/ferret/pkg/runtime/values"
"testing"
"github.com/MontFerret/ferret/pkg/stdlib/strings"
. "github.com/smartystreets/goconvey/convey"
)
func TestUnescapeHTML(t *testing.T) {
Convey("UnescapeHTML", t, func() {
Convey("Should unescape an string", func() {
out, err := strings.UnescapeHTML(context.Background(), values.NewString("&lt;body&gt;&lt;span&gt;Foobar&lt;/span&gt;&lt;/body&gt;"))
expected := values.NewString("<body><span>Foobar</span></body>")
So(err, ShouldBeNil)
So(out, ShouldEqual, expected)
})
})
}