1
0
mirror of https://github.com/MontFerret/ferret.git synced 2024-12-16 11:37:36 +02:00
ferret/pkg/stdlib/html/driver/dynamic/element.go

701 lines
13 KiB
Go
Raw Normal View History

2018-09-27 04:03:06 +02:00
package dynamic
2018-09-18 22:42:38 +02:00
import (
"bytes"
"context"
"crypto/sha512"
"encoding/json"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/common"
2018-09-27 23:19:55 +02:00
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/eval"
2018-09-27 04:03:06 +02:00
"github.com/MontFerret/ferret/pkg/stdlib/html/driver/dynamic/events"
2018-09-18 22:42:38 +02:00
"github.com/PuerkitoBio/goquery"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
2018-09-28 06:28:33 +02:00
"github.com/rs/zerolog"
2018-09-18 22:42:38 +02:00
"strconv"
2018-09-27 04:03:06 +02:00
"sync"
2018-09-18 22:42:38 +02:00
"time"
)
const DefaultTimeout = time.Second * 30
type HtmlElement struct {
2018-09-27 04:03:06 +02:00
sync.Mutex
2018-09-28 06:28:33 +02:00
logger *zerolog.Logger
client *cdp.Client
2018-09-27 04:03:06 +02:00
broker *events.EventBroker
2018-09-27 06:45:07 +02:00
connected values.Boolean
id dom.NodeID
nodeType values.Int
nodeName values.String
2018-09-27 06:45:07 +02:00
innerHtml values.String
2018-09-27 04:03:06 +02:00
innerText *common.LazyValue
2018-09-27 23:19:55 +02:00
value core.Value
2018-09-28 06:28:33 +02:00
rawAttrs []string
2018-09-27 04:03:06 +02:00
attributes *common.LazyValue
children []dom.NodeID
2018-09-27 04:03:06 +02:00
loadedChildren *common.LazyValue
2018-09-18 22:42:38 +02:00
}
func LoadElement(
2018-09-28 06:28:33 +02:00
logger *zerolog.Logger,
2018-09-18 22:42:38 +02:00
client *cdp.Client,
2018-09-27 04:03:06 +02:00
broker *events.EventBroker,
2018-09-18 22:42:38 +02:00
id dom.NodeID,
) (*HtmlElement, error) {
if client == nil {
return nil, core.Error(core.ErrMissedArgument, "client")
}
ctx, cancelFn := context.WithTimeout(context.Background(), DefaultTimeout)
defer cancelFn()
node, err := client.DOM.DescribeNode(
ctx,
dom.
NewDescribeNodeArgs().
SetNodeID(id).
2018-09-27 06:26:56 +02:00
SetDepth(1),
2018-09-18 22:42:38 +02:00
)
if err != nil {
return nil, core.Error(err, strconv.Itoa(int(id)))
}
2018-09-27 06:26:56 +02:00
innerHtml, err := loadInnerHtml(client, id)
2018-09-27 04:03:06 +02:00
if err != nil {
return nil, core.Error(err, strconv.Itoa(int(id)))
}
return NewHtmlElement(
2018-09-28 06:28:33 +02:00
logger,
2018-09-27 04:03:06 +02:00
client,
broker,
id,
node.Node,
2018-09-27 06:26:56 +02:00
innerHtml,
2018-09-27 04:03:06 +02:00
), nil
}
func NewHtmlElement(
2018-09-28 06:28:33 +02:00
logger *zerolog.Logger,
client *cdp.Client,
2018-09-27 04:03:06 +02:00
broker *events.EventBroker,
id dom.NodeID,
node dom.Node,
2018-09-27 04:03:06 +02:00
innerHtml values.String,
) *HtmlElement {
el := new(HtmlElement)
2018-09-28 06:28:33 +02:00
el.logger = logger
el.client = client
2018-09-27 04:03:06 +02:00
el.broker = broker
2018-09-27 06:45:07 +02:00
el.connected = values.True
el.id = id
el.nodeType = values.NewInt(node.NodeType)
el.nodeName = values.NewString(node.NodeName)
2018-09-27 06:45:07 +02:00
el.innerHtml = innerHtml
2018-09-28 06:28:33 +02:00
el.innerText = common.NewLazyValue(el.loadInnerText)
el.rawAttrs = node.Attributes[:]
el.attributes = common.NewLazyValue(el.loadAttrs)
2018-09-27 06:45:07 +02:00
el.value = values.EmptyString
2018-09-28 06:28:33 +02:00
el.loadedChildren = common.NewLazyValue(el.loadChildren)
if node.Value != nil {
2018-09-27 06:45:07 +02:00
el.value = values.NewString(*node.Value)
}
2018-09-27 06:26:56 +02:00
el.children = createChildrenArray(node.Children)
2018-09-27 17:32:52 +02:00
broker.AddEventListener("reload", el.handlePageReload)
broker.AddEventListener("attr:modified", el.handleAttrModified)
broker.AddEventListener("attr:removed", el.handleAttrRemoved)
broker.AddEventListener("children:count", el.handleChildrenCountChanged)
broker.AddEventListener("children:inserted", el.handleChildInserted)
broker.AddEventListener("children:deleted", el.handleChildDeleted)
return el
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) Close() error {
2018-09-27 17:32:52 +02:00
el.Lock()
defer el.Unlock()
// already closed
if !el.connected {
return nil
}
el.connected = false
el.broker.RemoveEventListener("reload", el.handlePageReload)
el.broker.RemoveEventListener("attr:modified", el.handleAttrModified)
el.broker.RemoveEventListener("attr:removed", el.handleAttrRemoved)
el.broker.RemoveEventListener("children:count", el.handleChildrenCountChanged)
el.broker.RemoveEventListener("children:inserted", el.handleChildInserted)
el.broker.RemoveEventListener("children:deleted", el.handleChildDeleted)
2018-09-18 22:42:38 +02:00
return nil
}
func (el *HtmlElement) Type() core.Type {
return core.HtmlElementType
}
func (el *HtmlElement) MarshalJSON() ([]byte, error) {
val, err := el.innerText.Value()
if err != nil {
return nil, err
}
return json.Marshal(val.String())
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) String() string {
return el.InnerHtml().String()
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) Compare(other core.Value) int {
switch other.Type() {
case core.HtmlDocumentType:
other := other.(*HtmlElement)
id := int(el.id)
otherId := int(other.id)
2018-09-18 22:42:38 +02:00
if id == otherId {
return 0
}
if id > otherId {
return 1
}
return -1
default:
if other.Type() > core.HtmlElementType {
return -1
}
return 1
}
}
func (el *HtmlElement) Unwrap() interface{} {
return el
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) Hash() int {
2018-09-28 06:28:33 +02:00
el.Lock()
defer el.Unlock()
2018-09-18 22:42:38 +02:00
h := sha512.New()
2018-09-28 06:28:33 +02:00
out, err := h.Write([]byte(el.innerHtml))
2018-09-18 22:42:38 +02:00
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Err(err).
Msg("failed to calculate hash value")
2018-09-18 22:42:38 +02:00
return 0
}
return out
}
func (el *HtmlElement) Value() core.Value {
2018-09-27 23:19:55 +02:00
if !el.IsConnected() {
return el.value
}
ctx, cancel := contextWithTimeout()
defer cancel()
val, err := eval.Property(ctx, el.client, el.id, "value")
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Err(err).
Msg("failed to get node value")
2018-09-27 23:19:55 +02:00
return el.value
}
el.value = val
return val
2018-09-18 22:42:38 +02:00
}
2018-09-27 17:53:26 +02:00
func (el *HtmlElement) Clone() core.Value {
return values.None
}
2018-09-18 22:42:38 +02:00
func (el *HtmlElement) Length() values.Int {
return values.NewInt(len(el.children))
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) NodeType() values.Int {
return el.nodeType
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) NodeName() values.String {
return el.nodeName
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) GetAttributes() core.Value {
2018-09-27 04:03:06 +02:00
val, err := el.attributes.Value()
if err != nil {
return values.None
}
2018-09-27 17:53:26 +02:00
// returning shallow copy
return val.Clone()
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) GetAttribute(name values.String) core.Value {
2018-09-27 04:03:06 +02:00
attrs, err := el.attributes.Value()
if err != nil {
return values.None
}
val, found := attrs.(*values.Object).Get(name)
2018-09-18 22:42:38 +02:00
if !found {
return values.None
}
return val
}
func (el *HtmlElement) GetChildNodes() core.Value {
2018-09-27 04:03:06 +02:00
val, err := el.loadedChildren.Value()
if err != nil {
return values.NewArray(0)
2018-09-18 22:42:38 +02:00
}
2018-09-27 04:03:06 +02:00
return val
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) GetChildNode(idx values.Int) core.Value {
2018-09-27 04:03:06 +02:00
val, err := el.loadedChildren.Value()
if err != nil {
return values.None
2018-09-18 22:42:38 +02:00
}
2018-09-27 04:03:06 +02:00
return val.(*values.Array).Get(idx)
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) QuerySelector(selector values.String) core.Value {
2018-09-27 06:45:07 +02:00
if !el.IsConnected() {
return values.NewArray(0)
}
2018-09-18 22:42:38 +02:00
ctx := context.Background()
selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String())
found, err := el.client.DOM.QuerySelector(ctx, selectorArgs)
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Str("selector", selector.String()).
Err(err).
Msg("failed to retrieve a node by selector")
2018-09-18 22:42:38 +02:00
return values.None
}
2018-09-28 06:28:33 +02:00
res, err := LoadElement(el.logger, el.client, el.broker, found.NodeID)
2018-09-18 22:42:38 +02:00
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Str("selector", selector.String()).
Err(err).
Msg("failed to load a child node by selector")
2018-09-18 22:42:38 +02:00
return values.None
}
return res
}
func (el *HtmlElement) QuerySelectorAll(selector values.String) core.Value {
2018-09-27 06:45:07 +02:00
if !el.IsConnected() {
return values.NewArray(0)
}
2018-09-18 22:42:38 +02:00
ctx := context.Background()
selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String())
res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs)
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Str("selector", selector.String()).
Err(err).
Msg("failed to retrieve nodes by selector")
2018-09-18 22:42:38 +02:00
return values.None
}
arr := values.NewArray(len(res.NodeIDs))
for _, id := range res.NodeIDs {
2018-09-28 06:28:33 +02:00
childEl, err := LoadElement(el.logger, el.client, el.broker, id)
2018-09-18 22:42:38 +02:00
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Str("selector", selector.String()).
Err(err).
Msg("failed to load nodes by selector")
2018-09-18 22:42:38 +02:00
return values.None
}
arr.Push(childEl)
}
return arr
}
func (el *HtmlElement) InnerText() values.String {
2018-09-27 04:03:06 +02:00
val, err := el.innerText.Value()
2018-09-18 22:42:38 +02:00
if err != nil {
return values.EmptyString
}
2018-09-27 04:03:06 +02:00
return val.(values.String)
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) InnerHtml() values.String {
2018-09-27 06:45:07 +02:00
el.Lock()
defer el.Unlock()
return el.innerHtml
2018-09-18 22:42:38 +02:00
}
func (el *HtmlElement) Click() (values.Boolean, error) {
2018-09-27 04:03:06 +02:00
ctx, cancel := contextWithTimeout()
defer cancel()
2018-09-25 23:58:57 +02:00
return events.DispatchEvent(ctx, el.client, el.id, "click")
}
2018-09-27 06:26:56 +02:00
2018-09-28 03:41:41 +02:00
func (el *HtmlElement) Input(value core.Value) error {
ctx, cancel := contextWithTimeout()
defer cancel()
return el.client.DOM.SetAttributeValue(ctx, dom.NewSetAttributeValueArgs(el.id, "value", value.String()))
}
2018-09-27 06:26:56 +02:00
func (el *HtmlElement) IsConnected() values.Boolean {
2018-09-27 06:45:07 +02:00
el.Lock()
defer el.Unlock()
return el.connected
2018-09-27 06:26:56 +02:00
}
2018-09-27 17:32:52 +02:00
2018-09-28 06:28:33 +02:00
func (el *HtmlElement) loadInnerText() (core.Value, error) {
h := el.InnerHtml()
if h == values.EmptyString {
return h, nil
}
buff := bytes.NewBuffer([]byte(h))
parsed, err := goquery.NewDocumentFromReader(buff)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to parse inner html")
return values.EmptyString, err
}
return values.NewString(parsed.Text()), nil
}
func (el *HtmlElement) loadAttrs() (core.Value, error) {
return parseAttrs(el.rawAttrs), nil
}
func (el *HtmlElement) loadChildren() (core.Value, error) {
if !el.IsConnected() {
return values.NewArray(0), nil
}
loaded, err := loadNodes(el.logger, el.client, el.broker, el.children)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to load child nodes")
return values.None, err
}
return loaded, nil
}
2018-09-27 17:32:52 +02:00
func (el *HtmlElement) handlePageReload(message interface{}) {
el.Close()
}
func (el *HtmlElement) handleAttrModified(message interface{}) {
reply, ok := message.(*dom.AttributeModifiedReply)
// well....
if !ok {
return
}
// it's not for this element
if reply.NodeID != el.id {
return
}
// they are not event loaded
// just ignore the event
if !el.attributes.Ready() {
return
}
val, err := el.attributes.Value()
// failed to load
if err != nil {
return
}
2018-09-27 17:53:26 +02:00
el.Lock()
defer el.Unlock()
2018-09-27 17:32:52 +02:00
attrs, ok := val.(*values.Object)
if !ok {
return
}
attrs.Set(values.NewString(reply.Name), values.NewString(reply.Value))
}
func (el *HtmlElement) handleAttrRemoved(message interface{}) {
reply, ok := message.(*dom.AttributeRemovedReply)
// well....
if !ok {
return
}
// it's not for this element
if reply.NodeID != el.id {
return
}
// they are not event loaded
// just ignore the event
if !el.attributes.Ready() {
return
}
val, err := el.attributes.Value()
// failed to load
if err != nil {
return
}
2018-09-27 17:53:26 +02:00
el.Lock()
defer el.Unlock()
2018-09-27 17:32:52 +02:00
attrs, ok := val.(*values.Object)
if !ok {
return
}
// TODO: actually, we need to sync it too...
attrs.Remove(values.NewString(reply.Name))
}
func (el *HtmlElement) handleChildrenCountChanged(message interface{}) {
reply, ok := message.(*dom.ChildNodeCountUpdatedReply)
if !ok {
return
}
if reply.NodeID != el.id {
return
}
node, err := el.client.DOM.DescribeNode(context.Background(), dom.NewDescribeNodeArgs())
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to update node")
2018-09-27 17:32:52 +02:00
return
}
el.Lock()
defer el.Unlock()
el.children = createChildrenArray(node.Node.Children)
}
func (el *HtmlElement) handleChildInserted(message interface{}) {
reply, ok := message.(*dom.ChildNodeInsertedReply)
if !ok {
return
}
if reply.ParentNodeID != el.id {
return
}
targetIdx := -1
prevId := reply.PreviousNodeID
nextId := reply.Node.NodeID
el.Lock()
defer el.Unlock()
for idx, id := range el.children {
if id == prevId {
targetIdx = idx
break
}
}
if targetIdx == -1 {
return
}
arr := el.children
el.children = append(arr[:targetIdx], append([]dom.NodeID{nextId}, arr[targetIdx:]...)...)
if !el.loadedChildren.Ready() {
return
}
loaded, err := el.loadedChildren.Value()
if err != nil {
return
}
loadedArr := loaded.(*values.Array)
2018-09-28 06:28:33 +02:00
loadedEl, err := LoadElement(el.logger, el.client, el.broker, nextId)
2018-09-27 17:32:52 +02:00
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to load an inserted node")
2018-09-27 17:32:52 +02:00
return
}
loadedArr.Insert(values.NewInt(targetIdx), loadedEl)
newInnerHtml, err := loadInnerHtml(el.client, el.id)
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to update node")
2018-09-27 17:32:52 +02:00
return
}
el.innerHtml = newInnerHtml
}
func (el *HtmlElement) handleChildDeleted(message interface{}) {
reply, ok := message.(*dom.ChildNodeRemovedReply)
if !ok {
return
}
if reply.ParentNodeID != el.id {
return
}
targetIdx := -1
targetId := reply.NodeID
el.Lock()
defer el.Unlock()
for idx, id := range el.children {
if id == targetId {
targetIdx = idx
break
}
}
if targetIdx == -1 {
return
}
arr := el.children
el.children = append(arr[:targetIdx], arr[targetIdx+1:]...)
if !el.loadedChildren.Ready() {
return
}
loaded, err := el.loadedChildren.Value()
if err != nil {
return
}
loadedArr := loaded.(*values.Array)
loadedArr.RemoveAt(values.NewInt(targetIdx))
newInnerHtml, err := loadInnerHtml(el.client, el.id)
if err != nil {
2018-09-28 06:28:33 +02:00
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Msg("failed to update node")
2018-09-27 17:32:52 +02:00
return
}
el.innerHtml = newInnerHtml
}