2018-12-22 06:14:41 +02:00
|
|
|
package http
|
2018-09-18 22:42:38 +02:00
|
|
|
|
|
|
|
import (
|
2019-02-20 01:10:18 +02:00
|
|
|
"context"
|
|
|
|
"hash/fnv"
|
|
|
|
|
|
|
|
"github.com/MontFerret/ferret/pkg/drivers"
|
|
|
|
"github.com/MontFerret/ferret/pkg/drivers/common"
|
2018-09-18 22:42:38 +02:00
|
|
|
"github.com/MontFerret/ferret/pkg/runtime/core"
|
2018-09-28 01:05:56 +02:00
|
|
|
"github.com/MontFerret/ferret/pkg/runtime/values"
|
2018-09-18 22:42:38 +02:00
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
)
|
|
|
|
|
2018-10-05 22:35:08 +02:00
|
|
|
type HTMLDocument struct {
|
2019-06-19 23:58:56 +02:00
|
|
|
doc *goquery.Document
|
|
|
|
element drivers.HTMLElement
|
|
|
|
url values.String
|
|
|
|
parent drivers.HTMLDocument
|
|
|
|
children *values.Array
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewRootHTMLDocument(
|
|
|
|
node *goquery.Document,
|
|
|
|
url string,
|
|
|
|
) (*HTMLDocument, error) {
|
|
|
|
return NewHTMLDocument(node, url, nil)
|
2018-09-18 22:42:38 +02:00
|
|
|
}
|
|
|
|
|
2018-10-05 22:35:08 +02:00
|
|
|
func NewHTMLDocument(
|
2018-09-18 22:42:38 +02:00
|
|
|
node *goquery.Document,
|
2019-03-16 01:59:05 +02:00
|
|
|
url string,
|
2019-06-19 23:58:56 +02:00
|
|
|
parent drivers.HTMLDocument,
|
|
|
|
) (*HTMLDocument, error) {
|
2018-09-18 22:42:38 +02:00
|
|
|
if url == "" {
|
|
|
|
return nil, core.Error(core.ErrMissedArgument, "document url")
|
|
|
|
}
|
|
|
|
|
|
|
|
if node == nil {
|
|
|
|
return nil, core.Error(core.ErrMissedArgument, "document root selection")
|
|
|
|
}
|
|
|
|
|
2018-10-06 01:40:09 +02:00
|
|
|
el, err := NewHTMLElement(node.Selection)
|
2018-09-18 22:42:38 +02:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
doc := new(HTMLDocument)
|
|
|
|
doc.doc = node
|
|
|
|
doc.element = el
|
|
|
|
doc.parent = parent
|
|
|
|
doc.url = values.NewString(url)
|
|
|
|
doc.children = values.NewArray(10)
|
|
|
|
|
|
|
|
frames := node.Find("iframe")
|
|
|
|
frames.Each(func(i int, selection *goquery.Selection) {
|
|
|
|
child, _ := NewHTMLDocument(goquery.NewDocumentFromNode(selection.Nodes[0]), selection.AttrOr("src", url), doc)
|
|
|
|
|
|
|
|
doc.children.Push(child)
|
|
|
|
})
|
|
|
|
|
|
|
|
return doc, nil
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) MarshalJSON() ([]byte, error) {
|
|
|
|
return doc.element.MarshalJSON()
|
2018-09-18 22:42:38 +02:00
|
|
|
}
|
|
|
|
|
2018-10-07 04:33:39 +02:00
|
|
|
func (doc *HTMLDocument) Type() core.Type {
|
2019-02-20 01:10:18 +02:00
|
|
|
return drivers.HTMLDocumentType
|
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) String() string {
|
2019-06-19 23:58:56 +02:00
|
|
|
str, err := doc.doc.Html()
|
2019-02-20 01:10:18 +02:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
return str
|
2018-09-18 22:42:38 +02:00
|
|
|
}
|
|
|
|
|
2019-02-13 19:31:18 +02:00
|
|
|
func (doc *HTMLDocument) Compare(other core.Value) int64 {
|
2019-02-20 01:10:18 +02:00
|
|
|
switch other.Type() {
|
|
|
|
case drivers.HTMLElementType:
|
|
|
|
otherDoc := other.(drivers.HTMLDocument)
|
2018-09-28 01:05:56 +02:00
|
|
|
|
2019-02-20 01:10:18 +02:00
|
|
|
return doc.url.Compare(otherDoc.GetURL())
|
|
|
|
default:
|
|
|
|
return drivers.Compare(doc.Type(), other.Type())
|
2018-09-18 22:42:38 +02:00
|
|
|
}
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) Unwrap() interface{} {
|
2019-06-19 23:58:56 +02:00
|
|
|
return doc.doc
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) Hash() uint64 {
|
|
|
|
h := fnv.New64a()
|
|
|
|
|
|
|
|
h.Write([]byte(doc.Type().String()))
|
|
|
|
h.Write([]byte(":"))
|
|
|
|
h.Write([]byte(doc.url))
|
|
|
|
|
|
|
|
return h.Sum64()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) Copy() core.Value {
|
2019-06-19 23:58:56 +02:00
|
|
|
cp, err := NewHTMLDocument(doc.doc, string(doc.url), doc.parent)
|
2019-02-20 01:10:18 +02:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return values.None
|
|
|
|
}
|
|
|
|
|
|
|
|
return cp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) Clone() core.Value {
|
2019-06-19 23:58:56 +02:00
|
|
|
cloned, err := NewHTMLDocument(doc.doc, doc.url.String(), doc.parent)
|
2019-02-20 01:10:18 +02:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return values.None
|
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
return cloned
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) Length() values.Int {
|
2019-06-19 23:58:56 +02:00
|
|
|
return values.NewInt(doc.doc.Length())
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) Iterate(_ context.Context) (core.Iterator, error) {
|
|
|
|
return common.NewIterator(doc.element)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) GetIn(ctx context.Context, path []core.Value) (core.Value, error) {
|
|
|
|
return common.GetInDocument(ctx, doc, path)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) SetIn(ctx context.Context, path []core.Value, value core.Value) error {
|
|
|
|
return common.SetInDocument(ctx, doc, path, value)
|
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
func (doc *HTMLDocument) GetNodeType() values.Int {
|
2019-02-20 01:10:18 +02:00
|
|
|
return 9
|
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
func (doc *HTMLDocument) GetNodeName() values.String {
|
2019-02-20 01:10:18 +02:00
|
|
|
return "#document"
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) GetChildNodes(ctx context.Context) core.Value {
|
|
|
|
return doc.element.GetChildNodes(ctx)
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) GetChildNode(ctx context.Context, idx values.Int) core.Value {
|
|
|
|
return doc.element.GetChildNode(ctx, idx)
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) QuerySelector(ctx context.Context, selector values.String) core.Value {
|
|
|
|
return doc.element.QuerySelector(ctx, selector)
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) QuerySelectorAll(ctx context.Context, selector values.String) core.Value {
|
|
|
|
return doc.element.QuerySelectorAll(ctx, selector)
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) CountBySelector(ctx context.Context, selector values.String) values.Int {
|
|
|
|
return doc.element.CountBySelector(ctx, selector)
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) ExistsBySelector(ctx context.Context, selector values.String) values.Boolean {
|
|
|
|
return doc.element.ExistsBySelector(ctx, selector)
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
2019-02-13 19:31:18 +02:00
|
|
|
|
2019-07-03 20:05:02 +02:00
|
|
|
func (doc *HTMLDocument) XPath(ctx context.Context, expression values.String) (core.Value, error) {
|
|
|
|
return doc.element.XPath(ctx, expression)
|
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
func (doc *HTMLDocument) IsDetached() values.Boolean {
|
|
|
|
return values.False
|
2018-09-18 22:42:38 +02:00
|
|
|
}
|
2018-09-28 01:05:56 +02:00
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
func (doc *HTMLDocument) GetTitle() values.String {
|
|
|
|
title := doc.doc.Find("head > title")
|
2019-03-16 01:59:05 +02:00
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
return values.NewString(title.Text())
|
2019-03-16 01:59:05 +02:00
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
func (doc *HTMLDocument) GetChildDocuments(_ context.Context) (*values.Array, error) {
|
|
|
|
return doc.children.Clone().(*values.Array), nil
|
2019-03-16 01:59:05 +02:00
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
func (doc *HTMLDocument) GetURL() values.String {
|
|
|
|
return doc.url
|
2019-03-16 01:59:05 +02:00
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
func (doc *HTMLDocument) GetElement() drivers.HTMLElement {
|
|
|
|
return doc.element
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
func (doc *HTMLDocument) GetName() values.String {
|
|
|
|
return ""
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
func (doc *HTMLDocument) GetParentDocument() drivers.HTMLDocument {
|
|
|
|
return doc.parent
|
2019-02-20 01:10:18 +02:00
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) ClickBySelector(_ context.Context, _ values.String) (values.Boolean, error) {
|
2019-02-20 01:10:18 +02:00
|
|
|
return false, core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) ClickBySelectorAll(_ context.Context, _ values.String) (values.Boolean, error) {
|
2019-02-20 01:10:18 +02:00
|
|
|
return false, core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) InputBySelector(_ context.Context, _ values.String, _ core.Value, _ values.Int) (values.Boolean, error) {
|
2019-02-20 01:10:18 +02:00
|
|
|
return false, core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) SelectBySelector(_ context.Context, _ values.String, _ *values.Array) (*values.Array, error) {
|
2019-02-20 01:10:18 +02:00
|
|
|
return nil, core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) PrintToPDF(_ context.Context, _ drivers.PDFParams) (values.Binary, error) {
|
2019-02-20 01:10:18 +02:00
|
|
|
return nil, core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) CaptureScreenshot(_ context.Context, _ drivers.ScreenshotParams) (values.Binary, error) {
|
2019-02-20 01:10:18 +02:00
|
|
|
return nil, core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) ScrollTop(_ context.Context) error {
|
2019-02-20 01:10:18 +02:00
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) ScrollBottom(_ context.Context) error {
|
2019-02-20 01:10:18 +02:00
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) ScrollBySelector(_ context.Context, _ values.String) error {
|
2019-02-20 01:10:18 +02:00
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-24 00:52:01 +02:00
|
|
|
func (doc *HTMLDocument) ScrollByXY(_ context.Context, _, _ values.Float) error {
|
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) MoveMouseBySelector(_ context.Context, _ values.String) error {
|
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) MoveMouseByXY(_ context.Context, _, _ values.Float) error {
|
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (doc *HTMLDocument) WaitForNavigation(_ context.Context) error {
|
2019-02-20 01:10:18 +02:00
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-03-07 04:52:41 +02:00
|
|
|
func (doc *HTMLDocument) WaitForElement(_ context.Context, _ values.String, _ drivers.WaitEvent) error {
|
2019-02-20 01:10:18 +02:00
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-03-07 04:52:41 +02:00
|
|
|
func (doc *HTMLDocument) WaitForClassBySelector(_ context.Context, _, _ values.String, _ drivers.WaitEvent) error {
|
2019-02-20 01:10:18 +02:00
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-03-07 04:52:41 +02:00
|
|
|
func (doc *HTMLDocument) WaitForClassBySelectorAll(_ context.Context, _, _ values.String, _ drivers.WaitEvent) error {
|
2019-02-20 01:10:18 +02:00
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-03-14 04:50:29 +02:00
|
|
|
func (doc *HTMLDocument) WaitForAttributeBySelector(_ context.Context, _, _ values.String, _ core.Value, _ drivers.WaitEvent) error {
|
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) WaitForAttributeBySelectorAll(_ context.Context, _, _ values.String, _ core.Value, _ drivers.WaitEvent) error {
|
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-03-15 04:10:15 +02:00
|
|
|
func (doc *HTMLDocument) WaitForStyleBySelector(_ context.Context, _, _ values.String, _ core.Value, _ drivers.WaitEvent) error {
|
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
func (doc *HTMLDocument) WaitForStyleBySelectorAll(_ context.Context, _, _ values.String, _ core.Value, _ drivers.WaitEvent) error {
|
|
|
|
return core.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
2019-02-20 01:10:18 +02:00
|
|
|
func (doc *HTMLDocument) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|