From 145a16f97daa59ce11f4687e35b4f73889ea120f Mon Sep 17 00:00:00 2001 From: Tim Voronov Date: Sun, 29 Dec 2019 12:46:46 -0500 Subject: [PATCH] Bugfix/#421 xpath (#435) * Fixed attr retrieval using XPATH in CDP * Updated single node value in CDP * Added e2e test * Fixed attr retrieval with XPATH for HTTP driver * Update Makefile * Update attr.fql --- e2e/tests/dynamic/doc/xpath/attr.fql | 6 ++++++ e2e/tests/dynamic/doc/xpath/count.fql | 2 +- e2e/tests/dynamic/doc/xpath/query.fql | 2 +- e2e/tests/static/doc/xpath/attr.fql | 6 ++++++ pkg/drivers/cdp/dom/element.go | 21 ++++++++++++++++++++- pkg/drivers/cdp/templates/xpath.go | 9 ++++++--- pkg/drivers/http/helpers.go | 20 +++++++++++++------- 7 files changed, 53 insertions(+), 13 deletions(-) create mode 100644 e2e/tests/dynamic/doc/xpath/attr.fql create mode 100644 e2e/tests/static/doc/xpath/attr.fql diff --git a/e2e/tests/dynamic/doc/xpath/attr.fql b/e2e/tests/dynamic/doc/xpath/attr.fql new file mode 100644 index 00000000..edb3029b --- /dev/null +++ b/e2e/tests/dynamic/doc/xpath/attr.fql @@ -0,0 +1,6 @@ +LET url = @dynamic +LET page = DOCUMENT(url, { driver: "cdp" }) + +LET actual = XPATH(page, "//meta/@charset") + +RETURN EXPECT(["utf-8"], actual) \ No newline at end of file diff --git a/e2e/tests/dynamic/doc/xpath/count.fql b/e2e/tests/dynamic/doc/xpath/count.fql index 4584589b..082dd7b7 100644 --- a/e2e/tests/dynamic/doc/xpath/count.fql +++ b/e2e/tests/dynamic/doc/xpath/count.fql @@ -1,5 +1,5 @@ LET url = @dynamic -LET page = DOCUMENT(url, true) +LET page = DOCUMENT(url, { driver: "cdp" }) LET actual = XPATH(page, "count(//body)") diff --git a/e2e/tests/dynamic/doc/xpath/query.fql b/e2e/tests/dynamic/doc/xpath/query.fql index c6e14244..2cc28211 100644 --- a/e2e/tests/dynamic/doc/xpath/query.fql +++ b/e2e/tests/dynamic/doc/xpath/query.fql @@ -1,5 +1,5 @@ LET url = @dynamic + "?redirect=/forms" -LET page = DOCUMENT(url, true) +LET page = DOCUMENT(url, { driver: "cdp" }) LET actual = XPATH(page, "//div[contains(@class, 'form-group')]") diff --git a/e2e/tests/static/doc/xpath/attr.fql b/e2e/tests/static/doc/xpath/attr.fql new file mode 100644 index 00000000..1813fbe3 --- /dev/null +++ b/e2e/tests/static/doc/xpath/attr.fql @@ -0,0 +1,6 @@ +LET url = @static + '/simple.html' +LET page = DOCUMENT(url) + +LET actual = XPATH(page, "//meta/@charset") + +RETURN EXPECT(["UTF-8"], actual) diff --git a/pkg/drivers/cdp/dom/element.go b/pkg/drivers/cdp/dom/element.go index 9ea16ca8..5601eb9b 100644 --- a/pkg/drivers/cdp/dom/element.go +++ b/pkg/drivers/cdp/dom/element.go @@ -627,6 +627,19 @@ func (el *HTMLElement) XPath(ctx context.Context, expression values.String) (res continue } + // it's not a Node, it's an attr value + if descr.Value.ObjectID == nil { + var value interface{} + + if err := json.Unmarshal(descr.Value.Value, &value); err != nil { + return values.None, err + } + + result.Push(values.Parse(value)) + + continue + } + repl, err := el.client.DOM.RequestNode(ctx, dom.NewRequestNodeArgs(*descr.Value.ObjectID)) if err != nil { @@ -656,7 +669,13 @@ func (el *HTMLElement) XPath(ctx context.Context, expression values.String) (res return result, nil case "object": if out.ObjectID == nil { - return values.None, nil + var value interface{} + + if err := json.Unmarshal(out.Value, &value); err != nil { + return values.None, err + } + + return values.Parse(value), nil } repl, err := el.client.DOM.RequestNode(ctx, dom.NewRequestNodeArgs(*out.ObjectID)) diff --git a/pkg/drivers/cdp/templates/xpath.go b/pkg/drivers/cdp/templates/xpath.go index 2b97b73f..965780f5 100644 --- a/pkg/drivers/cdp/templates/xpath.go +++ b/pkg/drivers/cdp/templates/xpath.go @@ -2,6 +2,9 @@ package templates const xPathTemplate = ` (element, expression) => { + const unwrap = (item) => { + return item.nodeType != 2 ? item : item.nodeValue; + }; const out = document.evaluate( expression, element, @@ -17,7 +20,7 @@ const xPathTemplate = ` let item; while ((item = out.iterateNext())) { - result.push(item); + result.push(unwrap(item)); } break; @@ -30,7 +33,7 @@ const xPathTemplate = ` const item = out.snapshotItem(i); if (item != null) { - result.push(item); + result.push(unwrap(item)); } } break; @@ -49,7 +52,7 @@ const xPathTemplate = ` } case XPathResult.ANY_UNORDERED_NODE_TYPE: case XPathResult.FIRST_ORDERED_NODE_TYPE: { - result = out.singleNodeValue; + result = unwrap(out.singleNodeValue); break; } default: { diff --git a/pkg/drivers/http/helpers.go b/pkg/drivers/http/helpers.go index dd169744..cc9d15e4 100644 --- a/pkg/drivers/http/helpers.go +++ b/pkg/drivers/http/helpers.go @@ -15,20 +15,26 @@ import ( ) func parseXPathNode(nav *htmlquery.NodeNavigator) (core.Value, error) { - node := nav.Current() - - if node == nil { - return values.None, nil - } - switch nav.NodeType() { case xpath.ElementNode: + node := nav.Current() + + if node == nil { + return values.None, nil + } + return NewHTMLElement(&goquery.Selection{Nodes: []*html.Node{node}}) case xpath.RootNode: + node := nav.Current() + + if node == nil { + return values.None, nil + } + url := htmlquery.SelectAttr(node, "url") return NewHTMLDocument(goquery.NewDocumentFromNode(node), url, nil) default: - return values.Parse(node.Data), nil + return values.NewString(nav.Value()), nil } }