1
0
mirror of https://github.com/MontFerret/ferret.git synced 2024-12-12 11:15:14 +02:00

Bugfix/#421 xpath (#435)

* Fixed attr retrieval using XPATH in CDP

* Updated single node value in CDP

* Added e2e test

* Fixed attr retrieval with XPATH for HTTP driver

* Update Makefile

* Update attr.fql
This commit is contained in:
Tim Voronov 2019-12-29 12:46:46 -05:00 committed by GitHub
parent 2dd5457f8c
commit 145a16f97d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 53 additions and 13 deletions

View File

@ -0,0 +1,6 @@
LET url = @dynamic
LET page = DOCUMENT(url, { driver: "cdp" })
LET actual = XPATH(page, "//meta/@charset")
RETURN EXPECT(["utf-8"], actual)

View File

@ -1,5 +1,5 @@
LET url = @dynamic LET url = @dynamic
LET page = DOCUMENT(url, true) LET page = DOCUMENT(url, { driver: "cdp" })
LET actual = XPATH(page, "count(//body)") LET actual = XPATH(page, "count(//body)")

View File

@ -1,5 +1,5 @@
LET url = @dynamic + "?redirect=/forms" LET url = @dynamic + "?redirect=/forms"
LET page = DOCUMENT(url, true) LET page = DOCUMENT(url, { driver: "cdp" })
LET actual = XPATH(page, "//div[contains(@class, 'form-group')]") LET actual = XPATH(page, "//div[contains(@class, 'form-group')]")

View File

@ -0,0 +1,6 @@
LET url = @static + '/simple.html'
LET page = DOCUMENT(url)
LET actual = XPATH(page, "//meta/@charset")
RETURN EXPECT(["UTF-8"], actual)

View File

@ -627,6 +627,19 @@ func (el *HTMLElement) XPath(ctx context.Context, expression values.String) (res
continue continue
} }
// it's not a Node, it's an attr value
if descr.Value.ObjectID == nil {
var value interface{}
if err := json.Unmarshal(descr.Value.Value, &value); err != nil {
return values.None, err
}
result.Push(values.Parse(value))
continue
}
repl, err := el.client.DOM.RequestNode(ctx, dom.NewRequestNodeArgs(*descr.Value.ObjectID)) repl, err := el.client.DOM.RequestNode(ctx, dom.NewRequestNodeArgs(*descr.Value.ObjectID))
if err != nil { if err != nil {
@ -656,7 +669,13 @@ func (el *HTMLElement) XPath(ctx context.Context, expression values.String) (res
return result, nil return result, nil
case "object": case "object":
if out.ObjectID == nil { if out.ObjectID == nil {
return values.None, nil var value interface{}
if err := json.Unmarshal(out.Value, &value); err != nil {
return values.None, err
}
return values.Parse(value), nil
} }
repl, err := el.client.DOM.RequestNode(ctx, dom.NewRequestNodeArgs(*out.ObjectID)) repl, err := el.client.DOM.RequestNode(ctx, dom.NewRequestNodeArgs(*out.ObjectID))

View File

@ -2,6 +2,9 @@ package templates
const xPathTemplate = ` const xPathTemplate = `
(element, expression) => { (element, expression) => {
const unwrap = (item) => {
return item.nodeType != 2 ? item : item.nodeValue;
};
const out = document.evaluate( const out = document.evaluate(
expression, expression,
element, element,
@ -17,7 +20,7 @@ const xPathTemplate = `
let item; let item;
while ((item = out.iterateNext())) { while ((item = out.iterateNext())) {
result.push(item); result.push(unwrap(item));
} }
break; break;
@ -30,7 +33,7 @@ const xPathTemplate = `
const item = out.snapshotItem(i); const item = out.snapshotItem(i);
if (item != null) { if (item != null) {
result.push(item); result.push(unwrap(item));
} }
} }
break; break;
@ -49,7 +52,7 @@ const xPathTemplate = `
} }
case XPathResult.ANY_UNORDERED_NODE_TYPE: case XPathResult.ANY_UNORDERED_NODE_TYPE:
case XPathResult.FIRST_ORDERED_NODE_TYPE: { case XPathResult.FIRST_ORDERED_NODE_TYPE: {
result = out.singleNodeValue; result = unwrap(out.singleNodeValue);
break; break;
} }
default: { default: {

View File

@ -15,20 +15,26 @@ import (
) )
func parseXPathNode(nav *htmlquery.NodeNavigator) (core.Value, error) { func parseXPathNode(nav *htmlquery.NodeNavigator) (core.Value, error) {
node := nav.Current()
if node == nil {
return values.None, nil
}
switch nav.NodeType() { switch nav.NodeType() {
case xpath.ElementNode: case xpath.ElementNode:
node := nav.Current()
if node == nil {
return values.None, nil
}
return NewHTMLElement(&goquery.Selection{Nodes: []*html.Node{node}}) return NewHTMLElement(&goquery.Selection{Nodes: []*html.Node{node}})
case xpath.RootNode: case xpath.RootNode:
node := nav.Current()
if node == nil {
return values.None, nil
}
url := htmlquery.SelectAttr(node, "url") url := htmlquery.SelectAttr(node, "url")
return NewHTMLDocument(goquery.NewDocumentFromNode(node), url, nil) return NewHTMLDocument(goquery.NewDocumentFromNode(node), url, nil)
default: default:
return values.Parse(node.Data), nil return values.NewString(nav.Value()), nil
} }
} }