1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-03-21 21:47:43 +02:00
ferret/pkg/drivers/http/xpath.go
Tim Voronov 90427cd537
Feature/new selector type (#657)
* Added remote type reference resolver

* Added support of XPath query selector

* Added CDP e2e testss covering XPath integration

* Added additional CDP e2e tests covering XPath integration

* Added type check to QuerySelector casting function

* Fixed XPath e2e tests

* Fixed vuln issue

* Added support of XPath selectors to http driver

* Added e2e tests for XPAth
2021-09-16 21:40:20 -04:00

123 lines
2.7 KiB
Go

package http
import (
"github.com/PuerkitoBio/goquery"
"github.com/antchfx/htmlquery"
"github.com/antchfx/xpath"
"golang.org/x/net/html"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
)
func EvalXPathToNode(selection *goquery.Selection, expression string) (drivers.HTMLNode, error) {
node := htmlquery.FindOne(fromSelectionToNode(selection), expression)
if node == nil {
return nil, nil
}
return parseXPathNode(node)
}
func EvalXPathToElement(selection *goquery.Selection, expression string) (drivers.HTMLElement, error) {
node, err := EvalXPathToNode(selection, expression)
if err != nil {
return nil, err
}
if node == nil {
return nil, nil
}
return drivers.ToElement(node)
}
func EvalXPathToNodes(selection *goquery.Selection, expression string) (*values.Array, error) {
return EvalXPathToNodesWith(selection, expression, func(node *html.Node) (core.Value, error) {
return parseXPathNode(node)
})
}
func EvalXPathToNodesWith(selection *goquery.Selection, expression string, mapper func(node *html.Node) (core.Value, error)) (*values.Array, error) {
out, err := evalXPathToInternal(selection, expression)
if err != nil {
return nil, err
}
switch res := out.(type) {
case *xpath.NodeIterator:
items := values.NewArray(10)
for res.MoveNext() {
item, err := mapper(res.Current().(*htmlquery.NodeNavigator).Current())
if err != nil {
return nil, err
}
items.Push(item)
}
return items, nil
default:
return values.EmptyArray(), nil
}
}
func EvalXPathTo(selection *goquery.Selection, expression string) (core.Value, error) {
out, err := evalXPathToInternal(selection, expression)
if err != nil {
return nil, err
}
switch res := out.(type) {
case *xpath.NodeIterator:
items := values.NewArray(10)
for res.MoveNext() {
item, err := parseXPathNode(res.Current().(*htmlquery.NodeNavigator).Current())
if err != nil {
return nil, err
}
items.Push(item)
}
return items, nil
default:
return values.Parse(res), nil
}
}
func evalXPathToInternal(selection *goquery.Selection, expression string) (interface{}, error) {
exp, err := xpath.Compile(expression)
if err != nil {
return nil, err
}
return exp.Evaluate(htmlquery.CreateXPathNavigator(fromSelectionToNode(selection))), nil
}
func parseXPathNode(node *html.Node) (drivers.HTMLNode, error) {
if node == nil {
return nil, nil
}
switch node.Type {
case html.DocumentNode:
url := htmlquery.SelectAttr(node, "url")
return NewHTMLDocument(goquery.NewDocumentFromNode(node), url, nil)
case html.ElementNode:
return NewHTMLElement(&goquery.Selection{Nodes: []*html.Node{node}})
default:
return nil, nil
}
}