1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-03-23 21:51:08 +02:00
ferret/pkg/drivers/common/getter.go
Tim Voronov 90427cd537
Feature/new selector type (#657)
* Added remote type reference resolver

* Added support of XPath query selector

* Added CDP e2e testss covering XPath integration

* Added additional CDP e2e tests covering XPath integration

* Added type check to QuerySelector casting function

* Fixed XPath e2e tests

* Fixed vuln issue

* Added support of XPath selectors to http driver

* Added e2e tests for XPAth
2021-09-16 21:40:20 -04:00

314 lines
6.7 KiB
Go

package common
import (
"context"
"github.com/pkg/errors"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/MontFerret/ferret/pkg/runtime/values/types"
)
func GetInPage(ctx context.Context, path []core.Value, page drivers.HTMLPage) (core.Value, core.PathError) {
if len(path) == 0 {
return page, nil
}
segmentIdx := 0
segment := path[segmentIdx]
if segment.Type() == types.String {
segment := segment.(values.String)
switch segment {
case "response":
resp, err := page.GetResponse(ctx)
if err != nil {
return nil, core.NewPathError(
errors.Wrap(err, "get response"),
0,
)
}
out, pathErr := resp.GetIn(ctx, path[segmentIdx+1:])
if pathErr != nil {
return values.None, core.NewPathErrorFrom(pathErr, segmentIdx)
}
return out, nil
case "mainFrame", "document":
out, pathErr := GetInDocument(ctx, path[segmentIdx+1:], page.GetMainFrame())
if pathErr != nil {
return values.None, core.NewPathErrorFrom(pathErr, segmentIdx)
}
return out, nil
case "frames":
if len(path) == 1 {
out, err := page.GetFrames(ctx)
if err != nil {
return nil, core.NewPathError(
errors.Wrap(err, "get response"),
segmentIdx,
)
}
return out, nil
}
segmentIdx = +1
idx := path[segmentIdx]
if !values.IsNumber(idx) {
return values.None, core.NewPathError(
core.TypeError(idx.Type(), types.Int, types.Float),
segmentIdx,
)
}
value, err := page.GetFrame(ctx, values.ToInt(idx))
if err != nil {
return values.None, core.NewPathError(err, segmentIdx)
}
if len(path) == 2 {
return value, nil
}
frame, err := drivers.ToDocument(value)
if err != nil {
return values.None, core.NewPathError(err, segmentIdx)
}
out, pathErr := GetInDocument(ctx, path[segmentIdx+1:], frame)
if err != nil {
return values.None, core.NewPathErrorFrom(pathErr, segmentIdx)
}
return out, nil
case "url", "URL":
return page.GetURL(), nil
case "cookies":
cookies, err := page.GetCookies(ctx)
if err != nil {
return values.None, core.NewPathError(err, segmentIdx)
}
if len(path) == 1 {
return cookies, nil
}
out, pathErr := cookies.GetIn(ctx, path[segmentIdx+1:])
if err != nil {
return values.None, core.NewPathErrorFrom(pathErr, segmentIdx)
}
return out, nil
case "title":
return page.GetMainFrame().GetTitle(), nil
case "isClosed":
return page.IsClosed(), nil
default:
return GetInDocument(ctx, path, page.GetMainFrame())
}
}
return GetInDocument(ctx, path, page.GetMainFrame())
}
func GetInDocument(ctx context.Context, path []core.Value, doc drivers.HTMLDocument) (core.Value, core.PathError) {
if len(path) == 0 {
return doc, nil
}
var out core.Value
var err error
segmentIdx := 0
segment := path[segmentIdx]
if segment.Type() == types.String {
segment := segment.(values.String)
switch segment {
case "url", "URL":
return doc.GetURL(), nil
case "name":
return doc.GetName(), nil
case "title":
return doc.GetTitle(), nil
case "parent":
parent, err := doc.GetParentDocument(ctx)
if err != nil {
return values.None, core.NewPathError(err, segmentIdx)
}
if parent == nil {
return values.None, nil
}
if len(path) == 1 {
return parent, nil
}
out, pathErr := GetInDocument(ctx, path[segmentIdx+1:], parent)
if pathErr != nil {
return values.None, core.NewPathErrorFrom(pathErr, segmentIdx)
}
return out, nil
case "body", "head":
out, err := doc.QuerySelector(ctx, drivers.NewCSSSelector(segment))
if err != nil {
return values.None, core.NewPathError(err, segmentIdx)
}
if out == values.None {
return out, nil
}
if len(path) == 1 {
return out, nil
}
el, err := drivers.ToElement(out)
if err != nil {
return values.None, core.NewPathError(err, segmentIdx)
}
out, pathErr := GetInElement(ctx, path[segmentIdx+1:], el)
if pathErr != nil {
return values.None, core.NewPathErrorFrom(pathErr, segmentIdx)
}
return out, nil
case "innerHTML":
out, err = doc.GetElement().GetInnerHTML(ctx)
case "innerText":
out, err = doc.GetElement().GetInnerText(ctx)
default:
return GetInNode(ctx, path, doc.GetElement())
}
return values.ReturnOrNext(ctx, path, segmentIdx, out, err)
}
return GetInNode(ctx, path, doc.GetElement())
}
func GetInElement(ctx context.Context, path []core.Value, el drivers.HTMLElement) (core.Value, core.PathError) {
if len(path) == 0 {
return el, nil
}
segmentIdx := 0
segment := path[segmentIdx]
if segment.Type() == types.String {
var out core.Value
var err error
segment := segment.(values.String)
switch segment {
case "innerText":
out, err = el.GetInnerText(ctx)
case "innerHTML":
out, err = el.GetInnerHTML(ctx)
case "value":
out, err = el.GetValue(ctx)
case "attributes":
if len(path) == 1 {
out, err = el.GetAttributes(ctx)
} else {
// e.g. attributes.href
segmentIdx++
attrName := path[segmentIdx]
out, err = el.GetAttribute(ctx, values.ToString(attrName))
}
case "style":
if len(path) == 1 {
out, err = el.GetStyles(ctx)
} else {
// e.g. style.color
segmentIdx++
styleName := path[segmentIdx]
out, err = el.GetStyle(ctx, values.ToString(styleName))
}
case "previousElementSibling":
out, err = el.GetPreviousElementSibling(ctx)
case "nextElementSibling":
out, err = el.GetNextElementSibling(ctx)
case "parentElement":
out, err = el.GetParentElement(ctx)
default:
return GetInNode(ctx, path, el)
}
return values.ReturnOrNext(ctx, path, segmentIdx, out, err)
}
return GetInNode(ctx, path, el)
}
func GetInNode(ctx context.Context, path []core.Value, node drivers.HTMLNode) (core.Value, core.PathError) {
if len(path) == 0 {
return node, nil
}
segmentIdx := 0
segment := path[segmentIdx]
var out core.Value
var err error
switch segment.Type() {
case types.Int:
out, err = node.GetChildNode(ctx, values.ToInt(segment))
case types.String:
segment := segment.(values.String)
switch segment {
case "nodeType":
out, err = node.GetNodeType(ctx)
case "nodeName":
out, err = node.GetNodeName(ctx)
case "children":
if len(path) == 1 {
out, err = node.GetChildNodes(ctx)
} else {
segmentIdx++
out, err = node.GetChildNode(ctx, values.ToInt(path[segmentIdx]))
}
case "length":
return node.Length(), nil
default:
return values.None, nil
}
default:
return values.None, core.NewPathError(
core.TypeError(segment.Type(), types.Int, types.String),
segmentIdx,
)
}
return values.ReturnOrNext(ctx, path, segmentIdx, out, err)
}