| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | package dynamic | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2018-10-08 20:20:40 -04:00
										 |  |  | 	"bytes" | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2018-10-07 21:32:30 -04:00
										 |  |  | 	"github.com/MontFerret/ferret/pkg/html/common" | 
					
						
							|  |  |  | 	"github.com/MontFerret/ferret/pkg/html/dynamic/events" | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | 	"github.com/MontFerret/ferret/pkg/runtime/values" | 
					
						
							| 
									
										
										
										
											2018-10-08 20:20:40 -04:00
										 |  |  | 	"github.com/PuerkitoBio/goquery" | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | 	"github.com/mafredri/cdp" | 
					
						
							|  |  |  | 	"github.com/mafredri/cdp/protocol/dom" | 
					
						
							|  |  |  | 	"github.com/mafredri/cdp/protocol/page" | 
					
						
							| 
									
										
										
										
											2018-09-28 00:28:33 -04:00
										 |  |  | 	"github.com/rs/zerolog" | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | 	"golang.org/x/sync/errgroup" | 
					
						
							| 
									
										
										
										
											2018-10-06 22:33:39 -04:00
										 |  |  | 	"strings" | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func pointerInt(input int) *int { | 
					
						
							|  |  |  | 	return &input | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type batchFunc = func() error | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func runBatch(funcs ...batchFunc) error { | 
					
						
							|  |  |  | 	eg := errgroup.Group{} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for _, f := range funcs { | 
					
						
							|  |  |  | 		eg.Go(f) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return eg.Wait() | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-07 21:23:36 -04:00
										 |  |  | func getRootElement(client *cdp.Client) (dom.Node, values.String, error) { | 
					
						
							|  |  |  | 	args := dom.NewGetDocumentArgs() | 
					
						
							|  |  |  | 	args.Depth = pointerInt(1) // lets load the entire document | 
					
						
							|  |  |  | 	ctx := context.Background() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	d, err := client.DOM.GetDocument(ctx, args) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return dom.Node{}, values.EmptyString, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	innerHTML, err := client.DOM.GetOuterHTML(ctx, dom.NewGetOuterHTMLArgs().SetNodeID(d.Root.NodeID)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return dom.Node{}, values.EmptyString, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return d.Root, values.NewString(innerHTML.OuterHTML), nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | func parseAttrs(attrs []string) *values.Object { | 
					
						
							|  |  |  | 	var attr values.String | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	res := values.NewObject() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for _, el := range attrs { | 
					
						
							| 
									
										
										
										
											2018-10-06 22:33:39 -04:00
										 |  |  | 		el = strings.TrimSpace(el) | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | 		str := values.NewString(el) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if common.IsAttribute(el) { | 
					
						
							|  |  |  | 			attr = str | 
					
						
							|  |  |  | 			res.Set(str, values.EmptyString) | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			current, ok := res.Get(attr) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if ok { | 
					
						
							| 
									
										
										
										
											2018-10-06 22:33:39 -04:00
										 |  |  | 				if current.String() != "" { | 
					
						
							|  |  |  | 					res.Set(attr, current.(values.String).Concat(values.SpaceString).Concat(str)) | 
					
						
							|  |  |  | 				} else { | 
					
						
							|  |  |  | 					res.Set(attr, str) | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return res | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-05 16:35:08 -04:00
										 |  |  | func loadInnerHTML(client *cdp.Client, id dom.NodeID) (values.String, error) { | 
					
						
							| 
									
										
										
										
											2018-09-27 00:26:56 -04:00
										 |  |  | 	res, err := client.DOM.GetOuterHTML(context.Background(), dom.NewGetOuterHTMLArgs().SetNodeID(id)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return "", err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return values.NewString(res.OuterHTML), err | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-08 20:20:40 -04:00
										 |  |  | func loadInnerText(client *cdp.Client, id dom.NodeID) (values.String, error) { | 
					
						
							|  |  |  | 	h, err := loadInnerHTML(client, id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return values.EmptyString, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if h == values.EmptyString { | 
					
						
							|  |  |  | 		return h, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return parseInnerText(h.String()) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func parseInnerText(innerHTML string) (values.String, error) { | 
					
						
							|  |  |  | 	buff := bytes.NewBuffer([]byte(innerHTML)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	parsed, err := goquery.NewDocumentFromReader(buff) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return values.EmptyString, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return values.NewString(parsed.Text()), nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-27 00:26:56 -04:00
										 |  |  | func createChildrenArray(nodes []dom.Node) []dom.NodeID { | 
					
						
							|  |  |  | 	children := make([]dom.NodeID, len(nodes)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for idx, child := range nodes { | 
					
						
							|  |  |  | 		children[idx] = child.NodeID | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return children | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-28 00:28:33 -04:00
										 |  |  | func loadNodes(logger *zerolog.Logger, client *cdp.Client, broker *events.EventBroker, nodes []dom.NodeID) (*values.Array, error) { | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | 	arr := values.NewArray(len(nodes)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for _, id := range nodes { | 
					
						
							| 
									
										
										
										
											2018-09-28 00:28:33 -04:00
										 |  |  | 		child, err := LoadElement(logger, client, broker, id) | 
					
						
							| 
									
										
										
										
											2018-09-26 22:03:06 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return nil, err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		arr.Push(child) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return arr, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func contextWithTimeout() (context.Context, context.CancelFunc) { | 
					
						
							|  |  |  | 	return context.WithTimeout(context.Background(), DefaultTimeout) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func waitForLoadEvent(ctx context.Context, client *cdp.Client) error { | 
					
						
							|  |  |  | 	loadEventFired, err := client.Page.LoadEventFired(ctx) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	_, err = loadEventFired.Recv() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return loadEventFired.Close() | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func createEventBroker(client *cdp.Client) (*events.EventBroker, error) { | 
					
						
							|  |  |  | 	ctx := context.Background() | 
					
						
							|  |  |  | 	load, err := client.Page.LoadEventFired(ctx) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	broker := events.NewEventBroker() | 
					
						
							|  |  |  | 	broker.AddEventStream("load", load, func() interface{} { | 
					
						
							|  |  |  | 		return new(page.LoadEventFiredReply) | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	err = broker.Start() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		broker.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	destroy, err := client.DOM.DocumentUpdated(ctx) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		broker.Close() | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	broker.AddEventStream("reload", destroy, func() interface{} { | 
					
						
							|  |  |  | 		return new(dom.DocumentUpdatedReply) | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	attrModified, err := client.DOM.AttributeModified(ctx) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		broker.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	broker.AddEventStream("attr:modified", attrModified, func() interface{} { | 
					
						
							|  |  |  | 		return new(dom.AttributeModifiedReply) | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	attrRemoved, err := client.DOM.AttributeRemoved(ctx) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		broker.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	broker.AddEventStream("attr:removed", attrRemoved, func() interface{} { | 
					
						
							|  |  |  | 		return new(dom.AttributeRemovedReply) | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	childrenCount, err := client.DOM.ChildNodeCountUpdated(ctx) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		broker.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	broker.AddEventStream("children:count", childrenCount, func() interface{} { | 
					
						
							|  |  |  | 		return new(dom.ChildNodeCountUpdatedReply) | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	childrenInsert, err := client.DOM.ChildNodeInserted(ctx) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		broker.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	broker.AddEventStream("children:inserted", childrenInsert, func() interface{} { | 
					
						
							|  |  |  | 		return new(dom.ChildNodeInsertedReply) | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	childDeleted, err := client.DOM.ChildNodeRemoved(ctx) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		broker.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	broker.AddEventStream("children:deleted", childDeleted, func() interface{} { | 
					
						
							|  |  |  | 		return new(dom.ChildNodeRemovedReply) | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return broker, nil | 
					
						
							|  |  |  | } |