mirror of
https://github.com/MontFerret/ferret.git
synced 2024-12-16 11:37:36 +02:00
310 lines
5.8 KiB
Go
310 lines
5.8 KiB
Go
|
package browser
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"context"
|
||
|
"crypto/sha512"
|
||
|
"encoding/json"
|
||
|
"github.com/MontFerret/ferret/pkg/runtime/core"
|
||
|
"github.com/MontFerret/ferret/pkg/runtime/values"
|
||
|
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
|
||
|
"github.com/PuerkitoBio/goquery"
|
||
|
"github.com/mafredri/cdp"
|
||
|
"github.com/mafredri/cdp/protocol/dom"
|
||
|
"log"
|
||
|
"strconv"
|
||
|
"time"
|
||
|
)
|
||
|
|
||
|
const DefaultTimeout = time.Second * 30
|
||
|
|
||
|
type HtmlElement struct {
|
||
|
client *cdp.Client
|
||
|
id dom.NodeID
|
||
|
node dom.Node
|
||
|
attributes *values.Object
|
||
|
}
|
||
|
|
||
|
func NewHtmlElement(
|
||
|
client *cdp.Client,
|
||
|
id dom.NodeID,
|
||
|
) (*HtmlElement, error) {
|
||
|
if client == nil {
|
||
|
return nil, core.Error(core.ErrMissedArgument, "client")
|
||
|
}
|
||
|
|
||
|
ctx, cancelFn := context.WithTimeout(context.Background(), DefaultTimeout)
|
||
|
|
||
|
defer cancelFn()
|
||
|
|
||
|
node, err := client.DOM.DescribeNode(
|
||
|
ctx,
|
||
|
dom.
|
||
|
NewDescribeNodeArgs().
|
||
|
SetNodeID(id).
|
||
|
SetDepth(-1),
|
||
|
)
|
||
|
|
||
|
if err != nil {
|
||
|
log.Println("ERROR:", err)
|
||
|
return nil, core.Error(err, strconv.Itoa(int(id)))
|
||
|
}
|
||
|
|
||
|
return &HtmlElement{client, id, node.Node, nil}, nil
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) Close() error {
|
||
|
// el.client = nil
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) Type() core.Type {
|
||
|
return core.HtmlElementType
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) MarshalJSON() ([]byte, error) {
|
||
|
ctx, cancelFn := context.WithTimeout(context.Background(), DefaultTimeout)
|
||
|
|
||
|
defer cancelFn()
|
||
|
|
||
|
args := dom.NewGetOuterHTMLArgs()
|
||
|
args.NodeID = &el.node.NodeID
|
||
|
|
||
|
reply, err := el.client.DOM.GetOuterHTML(ctx, args)
|
||
|
|
||
|
if err != nil {
|
||
|
return nil, core.Error(err, strconv.Itoa(int(el.node.NodeID)))
|
||
|
}
|
||
|
|
||
|
return json.Marshal(reply.OuterHTML)
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) String() string {
|
||
|
return *el.node.Value
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) Compare(other core.Value) int {
|
||
|
switch other.Type() {
|
||
|
case core.HtmlDocumentType:
|
||
|
other := other.(*HtmlElement)
|
||
|
|
||
|
id := int(el.node.NodeID)
|
||
|
otherId := int(other.node.NodeID)
|
||
|
|
||
|
if id == otherId {
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
if id > otherId {
|
||
|
return 1
|
||
|
}
|
||
|
|
||
|
return -1
|
||
|
default:
|
||
|
if other.Type() > core.HtmlElementType {
|
||
|
return -1
|
||
|
}
|
||
|
|
||
|
return 1
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) Unwrap() interface{} {
|
||
|
return el.node
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) Hash() int {
|
||
|
h := sha512.New()
|
||
|
|
||
|
out, err := h.Write([]byte(*el.node.Value))
|
||
|
|
||
|
if err != nil {
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
return out
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) Value() core.Value {
|
||
|
return values.None
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) Length() values.Int {
|
||
|
if el.node.ChildNodeCount == nil {
|
||
|
return values.ZeroInt
|
||
|
}
|
||
|
|
||
|
return values.NewInt(*el.node.ChildNodeCount)
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) NodeType() values.Int {
|
||
|
return values.NewInt(el.node.NodeType)
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) NodeName() values.String {
|
||
|
return values.NewString(el.node.NodeName)
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) GetAttributes() core.Value {
|
||
|
if el.attributes == nil {
|
||
|
el.attributes = el.parseAttrs()
|
||
|
}
|
||
|
|
||
|
return el.attributes
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) GetAttribute(name values.String) core.Value {
|
||
|
if el.attributes == nil {
|
||
|
el.attributes = el.parseAttrs()
|
||
|
}
|
||
|
|
||
|
val, found := el.attributes.Get(name)
|
||
|
|
||
|
if !found {
|
||
|
return values.None
|
||
|
}
|
||
|
|
||
|
return val
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) GetChildNodes() core.Value {
|
||
|
arr := values.NewArray(len(el.node.Children))
|
||
|
|
||
|
for idx := range el.node.Children {
|
||
|
el := el.GetChildNode(values.NewInt(idx))
|
||
|
|
||
|
if el != values.None {
|
||
|
arr.Push(el)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return arr
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) GetChildNode(idx values.Int) core.Value {
|
||
|
if el.Length() < idx {
|
||
|
return values.None
|
||
|
}
|
||
|
|
||
|
childNode := el.node.Children[idx]
|
||
|
|
||
|
return &HtmlElement{el.client, childNode.NodeID, childNode, nil}
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) QuerySelector(selector values.String) core.Value {
|
||
|
ctx := context.Background()
|
||
|
|
||
|
selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String())
|
||
|
found, err := el.client.DOM.QuerySelector(ctx, selectorArgs)
|
||
|
|
||
|
if err != nil {
|
||
|
el.logErr(err, selector.String())
|
||
|
return values.None
|
||
|
}
|
||
|
|
||
|
res, err := NewHtmlElement(el.client, found.NodeID)
|
||
|
|
||
|
if err != nil {
|
||
|
return values.None
|
||
|
}
|
||
|
|
||
|
return res
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) QuerySelectorAll(selector values.String) core.Value {
|
||
|
ctx := context.Background()
|
||
|
|
||
|
selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String())
|
||
|
res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs)
|
||
|
|
||
|
if err != nil {
|
||
|
el.logErr(err, selector.String())
|
||
|
return values.None
|
||
|
}
|
||
|
|
||
|
arr := values.NewArray(len(res.NodeIDs))
|
||
|
|
||
|
for _, id := range res.NodeIDs {
|
||
|
childEl, err := NewHtmlElement(el.client, id)
|
||
|
|
||
|
if err != nil {
|
||
|
return values.None
|
||
|
}
|
||
|
|
||
|
arr.Push(childEl)
|
||
|
}
|
||
|
|
||
|
return arr
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) InnerText() values.String {
|
||
|
h := el.InnerHtml()
|
||
|
|
||
|
if h == values.EmptyString {
|
||
|
return h
|
||
|
}
|
||
|
|
||
|
buff := bytes.NewBuffer([]byte(h))
|
||
|
|
||
|
parsed, err := goquery.NewDocumentFromReader(buff)
|
||
|
|
||
|
if err != nil {
|
||
|
return values.EmptyString
|
||
|
}
|
||
|
|
||
|
return values.NewString(parsed.Text())
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) InnerHtml() values.String {
|
||
|
ctx, cancelFn := el.createCtx()
|
||
|
|
||
|
defer cancelFn()
|
||
|
|
||
|
res, err := el.client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(el.id))
|
||
|
|
||
|
if err != nil {
|
||
|
el.logErr(err)
|
||
|
|
||
|
return values.EmptyString
|
||
|
}
|
||
|
|
||
|
return values.NewString(res.OuterHTML)
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) createCtx() (context.Context, context.CancelFunc) {
|
||
|
return context.WithTimeout(context.Background(), DefaultTimeout)
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) parseAttrs() *values.Object {
|
||
|
var attr values.String
|
||
|
|
||
|
res := values.NewObject()
|
||
|
|
||
|
for _, el := range el.node.Attributes {
|
||
|
str := values.NewString(el)
|
||
|
|
||
|
if common.IsAttribute(el) {
|
||
|
attr = str
|
||
|
res.Set(str, values.EmptyString)
|
||
|
} else {
|
||
|
current, ok := res.Get(attr)
|
||
|
|
||
|
if ok {
|
||
|
res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return res
|
||
|
}
|
||
|
|
||
|
func (el *HtmlElement) logErr(values ...interface{}) {
|
||
|
args := make([]interface{}, 0, len(values)+1)
|
||
|
args = append(args, "ERROR:")
|
||
|
args = append(args, values...)
|
||
|
args = append(args, "id:", el.node.NodeID)
|
||
|
|
||
|
log.Println(args...)
|
||
|
}
|