1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-04-09 07:24:05 +02:00

Feature/inner html element child (#82)

* SOme wokrd

* Renamed example

* Updated example
This commit is contained in:
Tim Voronov 2018-10-08 20:20:40 -04:00 committed by GitHub
parent 0004667df6
commit 05a7582bba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 322 additions and 209 deletions

View File

@ -5,10 +5,7 @@ WAIT_ELEMENT(doc, '.chartTrack__details', 5000)
LET tracks = ELEMENTS(doc, '.chartTrack__details')
FOR track IN tracks
LET username = ELEMENT(track, '.chartTrack__username')
LET title = ELEMENT(track, '.chartTrack__title')
RETURN {
artist: TRIM(username.innerText),
track: TRIM(title.innerText)
artist: TRIM(INNER_TEXT(track, '.chartTrack__username')),
track: TRIM(INNER_TEXT(track, '.chartTrack__title'))
}

36
examples/pagination.fql Normal file
View File

@ -0,0 +1,36 @@
LET amazon = DOCUMENT('https://www.amazon.com/', true)
INPUT(amazon, '#twotabsearchtextbox', @criteria)
CLICK(amazon, '.nav-search-submit input[type="submit"]')
WAIT_NAVIGATION(amazon)
LET resultListSelector = '#s-results-list-atf'
LET resultItemSelector = '.s-result-item'
LET nextBtnSelector = '#pagnNextLink'
LET vendorSelector = 'div > div > div > div.a-fixed-left-grid-col.a-col-right > div.a-row.a-spacing-small > div:nth-child(2) > span:nth-child(2)'
LET priceSelector = 'div > div > div > div.a-fixed-left-grid-col.a-col-right > div:nth-child(4) > div.a-column.a-span7 > div:nth-child(1) > div:nth-child(3) > a > span.a-offscreen'
LET altPriceSelector = 'div > div > div > div.a-fixed-left-grid-col.a-col-right > div:nth-child(2) > div.a-column.a-span7 > div:nth-child(1) > div:nth-child(3) > a > span.a-offscreen'
LET result = (
FOR pageNum IN 1..@pages
LET clicked = pageNum == 1 ? false : CLICK(amazon, nextBtnSelector)
LET wait = clicked ? WAIT_NAVIGATION(amazon) : false
LET waitSelector = wait ? WAIT_ELEMENT(amazon, resultListSelector) : false
LET items = (
FOR el IN ELEMENTS(amazon, resultItemSelector)
LET priceTxtMain = INNER_TEXT(el, priceSelector)
LET priceTxt = priceTxtMain != "" ? priceTxtMain : INNER_TEXT(el, altPriceSelector)
RETURN {
title: INNER_TEXT(el, 'h2'),
vendor: INNER_TEXT(el, vendorSelector),
price: TO_FLOAT(SUBSTITUTE(priceTxt, "$", ""))
}
)
RETURN items
)
RETURN FLATTEN(result)

View File

@ -320,149 +320,38 @@ func (doc *HTMLDocument) QuerySelectorAll(selector values.String) core.Value {
}
func (doc *HTMLDocument) URL() core.Value {
doc.Lock()
defer doc.Unlock()
return doc.url
}
func (doc *HTMLDocument) InnerHTMLBySelector(selector values.String) values.String {
res, err := eval.Eval(
doc.client,
fmt.Sprintf(`
var el = document.querySelector(%s);
doc.Lock()
defer doc.Unlock()
if (el == null) {
return "";
}
return el.innerHTML;
`, eval.ParamString(selector.String())),
true,
false,
)
if err != nil {
doc.logger.Error().
Timestamp().
Err(err).
Str("selector", selector.String()).
Msg("failed to get inner HTML by selector")
return values.EmptyString
}
if res.Type() == core.StringType {
return res.(values.String)
}
return values.EmptyString
return doc.element.InnerHTMLBySelector(selector)
}
func (doc *HTMLDocument) InnerHTMLBySelectorAll(selector values.String) *values.Array {
res, err := eval.Eval(
doc.client,
fmt.Sprintf(`
var result = [];
var elements = document.querySelectorAll(%s);
doc.Lock()
defer doc.Unlock()
if (elements == null) {
return result;
}
elements.forEach((i) => {
result.push(i.innerHTML);
});
return result;
`, eval.ParamString(selector.String())),
true,
false,
)
if err != nil {
doc.logger.Error().
Timestamp().
Err(err).
Str("selector", selector.String()).
Msg("failed to get an array of inner HTML by selector")
return values.NewArray(0)
}
if res.Type() == core.ArrayType {
return res.(*values.Array)
}
return values.NewArray(0)
return doc.element.InnerHTMLBySelectorAll(selector)
}
func (doc *HTMLDocument) InnerTextBySelector(selector values.String) values.String {
res, err := eval.Eval(
doc.client,
fmt.Sprintf(`
var el = document.querySelector(%s);
doc.Lock()
defer doc.Unlock()
if (el == null) {
return "";
}
return el.innerText;
`, eval.ParamString(selector.String())),
true,
false,
)
if err != nil {
doc.logger.Error().
Timestamp().
Err(err).
Str("selector", selector.String()).
Msg("failed to get inner text by selector")
return values.EmptyString
}
if res.Type() == core.StringType {
return res.(values.String)
}
return values.EmptyString
return doc.element.InnerHTMLBySelector(selector)
}
func (doc *HTMLDocument) InnerTextBySelectorAll(selector values.String) *values.Array {
res, err := eval.Eval(
doc.client,
fmt.Sprintf(`
var result = [];
var elements = document.querySelectorAll(%s);
doc.Lock()
defer doc.Unlock()
if (elements == null) {
return result;
}
elements.forEach((i) => {
result.push(i.innerText);
});
return result;
`, eval.ParamString(selector.String())),
true,
false,
)
if err != nil {
doc.logger.Error().
Timestamp().
Err(err).
Str("selector", selector.String()).
Msg("failed to get an array inner text by selector")
return values.NewArray(0)
}
if res.Type() == core.ArrayType {
return res.(*values.Array)
}
return values.NewArray(0)
return doc.element.InnerTextBySelectorAll(selector)
}
func (doc *HTMLDocument) ClickBySelector(selector values.String) (values.Boolean, error) {

View File

@ -1,7 +1,6 @@
package dynamic
import (
"bytes"
"context"
"encoding/json"
"github.com/MontFerret/ferret/pkg/html/common"
@ -9,7 +8,6 @@ import (
"github.com/MontFerret/ferret/pkg/html/dynamic/events"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/PuerkitoBio/goquery"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/rs/zerolog"
@ -419,6 +417,83 @@ func (el *HTMLElement) InnerText() values.String {
return val.(values.String)
}
func (el *HTMLElement) InnerTextBySelector(selector values.String) values.String {
if !el.IsConnected() {
return values.EmptyString
}
ctx := context.Background()
selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String())
found, err := el.client.DOM.QuerySelector(ctx, selectorArgs)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Str("selector", selector.String()).
Msg("failed to retrieve nodes by selector")
return values.EmptyString
}
text, err := loadInnerText(el.client, found.NodeID)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Str("selector", selector.String()).
Msg("failed to load inner text for found child element")
return values.EmptyString
}
return text
}
func (el *HTMLElement) InnerTextBySelectorAll(selector values.String) *values.Array {
ctx := context.Background()
selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String())
res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Str("selector", selector.String()).
Msg("failed to retrieve nodes by selector")
return values.NewArray(0)
}
arr := values.NewArray(len(res.NodeIDs))
for _, id := range res.NodeIDs {
text, err := loadInnerText(el.client, id)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Str("selector", selector.String()).
Msg("failed to load inner text for found child element")
// return what we have
return arr
}
arr.Push(text)
}
return arr
}
func (el *HTMLElement) InnerHTML() values.String {
el.Lock()
defer el.Unlock()
@ -426,6 +501,83 @@ func (el *HTMLElement) InnerHTML() values.String {
return el.innerHTML
}
func (el *HTMLElement) InnerHTMLBySelector(selector values.String) values.String {
if !el.IsConnected() {
return values.EmptyString
}
ctx := context.Background()
selectorArgs := dom.NewQuerySelectorArgs(el.id, selector.String())
found, err := el.client.DOM.QuerySelector(ctx, selectorArgs)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Str("selector", selector.String()).
Msg("failed to retrieve nodes by selector")
return values.EmptyString
}
text, err := loadInnerHTML(el.client, found.NodeID)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Str("selector", selector.String()).
Msg("failed to load inner HTML for found child element")
return values.EmptyString
}
return text
}
func (el *HTMLElement) InnerHTMLBySelectorAll(selector values.String) *values.Array {
ctx := context.Background()
selectorArgs := dom.NewQuerySelectorAllArgs(el.id, selector.String())
res, err := el.client.DOM.QuerySelectorAll(ctx, selectorArgs)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Str("selector", selector.String()).
Msg("failed to retrieve nodes by selector")
return values.NewArray(0)
}
arr := values.NewArray(len(res.NodeIDs))
for _, id := range res.NodeIDs {
text, err := loadInnerHTML(el.client, id)
if err != nil {
el.logger.Error().
Timestamp().
Err(err).
Int("id", int(el.id)).
Str("selector", selector.String()).
Msg("failed to load inner HTML for found child element")
// return what we have
return arr
}
arr.Push(text)
}
return arr
}
func (el *HTMLElement) Click() (values.Boolean, error) {
ctx, cancel := contextWithTimeout()
@ -455,9 +607,7 @@ func (el *HTMLElement) loadInnerText() (core.Value, error) {
return h, nil
}
buff := bytes.NewBuffer([]byte(h))
parsed, err := goquery.NewDocumentFromReader(buff)
parser, err := parseInnerText(h.String())
if err != nil {
el.logger.Error().
@ -469,7 +619,7 @@ func (el *HTMLElement) loadInnerText() (core.Value, error) {
return values.EmptyString, err
}
return values.NewString(parsed.Text()), nil
return parser, nil
}
func (el *HTMLElement) loadAttrs() (core.Value, error) {

View File

@ -1,10 +1,12 @@
package dynamic
import (
"bytes"
"context"
"github.com/MontFerret/ferret/pkg/html/common"
"github.com/MontFerret/ferret/pkg/html/dynamic/events"
"github.com/MontFerret/ferret/pkg/runtime/values"
"github.com/PuerkitoBio/goquery"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/page"
@ -87,6 +89,32 @@ func loadInnerHTML(client *cdp.Client, id dom.NodeID) (values.String, error) {
return values.NewString(res.OuterHTML), err
}
func loadInnerText(client *cdp.Client, id dom.NodeID) (values.String, error) {
h, err := loadInnerHTML(client, id)
if err != nil {
return values.EmptyString, err
}
if h == values.EmptyString {
return h, nil
}
return parseInnerText(h.String())
}
func parseInnerText(innerHTML string) (values.String, error) {
buff := bytes.NewBuffer([]byte(innerHTML))
parsed, err := goquery.NewDocumentFromReader(buff)
if err != nil {
return values.EmptyString, err
}
return values.NewString(parsed.Text()), nil
}
func createChildrenArray(nodes []dom.Node) []dom.NodeID {
children := make([]dom.NodeID, len(nodes))

View File

@ -54,49 +54,3 @@ func (doc *HTMLDocument) Compare(other core.Value) int {
func (doc *HTMLDocument) URL() core.Value {
return doc.url
}
func (doc *HTMLDocument) InnerHTMLBySelector(selector values.String) values.String {
selection := doc.selection.Find(selector.String())
str, err := selection.Html()
// TODO: log error
if err != nil {
return values.EmptyString
}
return values.NewString(str)
}
func (doc *HTMLDocument) InnerHTMLBySelectorAll(selector values.String) *values.Array {
selection := doc.selection.Find(selector.String())
arr := values.NewArray(selection.Length())
selection.Each(func(_ int, selection *goquery.Selection) {
str, err := selection.Html()
// TODO: log error
if err == nil {
arr.Push(values.NewString(str))
}
})
return arr
}
func (doc *HTMLDocument) InnerTextBySelector(selector values.String) values.String {
selection := doc.selection.Find(selector.String())
return values.NewString(selection.Text())
}
func (doc *HTMLDocument) InnerTextBySelectorAll(selector values.String) *values.Array {
selection := doc.selection.Find(selector.String())
arr := values.NewArray(selection.Length())
selection.Each(func(_ int, selection *goquery.Selection) {
arr.Push(values.NewString(selection.Text()))
})
return arr
}

View File

@ -191,6 +191,52 @@ func (el *HTMLElement) QuerySelectorAll(selector values.String) core.Value {
return arr
}
func (el *HTMLElement) InnerHTMLBySelector(selector values.String) values.String {
selection := el.selection.Find(selector.String())
str, err := selection.Html()
// TODO: log error
if err != nil {
return values.EmptyString
}
return values.NewString(str)
}
func (el *HTMLElement) InnerHTMLBySelectorAll(selector values.String) *values.Array {
selection := el.selection.Find(selector.String())
arr := values.NewArray(selection.Length())
selection.Each(func(_ int, selection *goquery.Selection) {
str, err := selection.Html()
// TODO: log error
if err == nil {
arr.Push(values.NewString(str))
}
})
return arr
}
func (el *HTMLElement) InnerTextBySelector(selector values.String) values.String {
selection := el.selection.Find(selector.String())
return values.NewString(selection.Text())
}
func (el *HTMLElement) InnerTextBySelectorAll(selector values.String) *values.Array {
selection := el.selection.Find(selector.String())
arr := values.NewArray(selection.Length())
selection.Each(func(_ int, selection *goquery.Selection) {
arr.Push(values.NewString(selection.Text()))
})
return arr
}
func (el *HTMLElement) parseAttrs() *values.Object {
obj := values.NewObject()

View File

@ -54,24 +54,37 @@ func (operator *RangeOperator) Exec(ctx context.Context, scope *core.Scope) (cor
}
func (operator *RangeOperator) Eval(_ context.Context, left, right core.Value) (core.Value, error) {
err := core.ValidateType(left, core.IntType)
err := core.ValidateType(left, core.IntType, core.FloatType)
if err != nil {
return values.None, core.SourceError(operator.src, err)
}
err = core.ValidateType(right, core.IntType)
err = core.ValidateType(right, core.IntType, core.FloatType)
if err != nil {
return values.None, core.SourceError(operator.src, err)
}
var start int
var end int
if left.Type() == core.FloatType {
start = int(left.(values.Float))
} else {
start = int(left.(values.Int))
}
if right.Type() == core.FloatType {
end = int(right.(values.Float))
} else {
end = int(right.(values.Int))
}
arr := values.NewArray(10)
start := left.(values.Int)
end := right.(values.Int)
for i := start; i <= end; i++ {
arr.Push(i)
arr.Push(values.NewInt(i))
}
return arr, nil

View File

@ -29,12 +29,6 @@ type (
QuerySelector(selector String) core.Value
QuerySelectorAll(selector String) core.Value
}
HTMLDocument interface {
HTMLNode
URL() core.Value
InnerHTMLBySelector(selector String) String
@ -44,4 +38,10 @@ type (
InnerTextBySelectorAll(selector String) *Array
}
HTMLDocument interface {
HTMLNode
URL() core.Value
}
)

View File

@ -8,9 +8,9 @@ import (
/*
* Returns inner HTML string of a matched element
* @param doc (Document) - Parent document.
* @param doc (Document|Element) - Parent document or element.
* @param selector (String) - String of CSS selector.
* @returns (String) - Inner HTML string if an element found, otherwise NONE.
* @returns (String) - Inner HTML string if an element found, otherwise empty string.
*/
func InnerHTML(_ context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 2, 2)
@ -19,7 +19,7 @@ func InnerHTML(_ context.Context, args ...core.Value) (core.Value, error) {
return values.EmptyString, err
}
err = core.ValidateType(args[0], core.HTMLDocumentType)
err = core.ValidateType(args[0], core.HTMLDocumentType, core.HTMLElementType)
if err != nil {
return values.None, err
@ -31,8 +31,8 @@ func InnerHTML(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err
}
doc := args[0].(values.HTMLDocument)
node := args[0].(values.HTMLNode)
selector := args[1].(values.String)
return doc.InnerHTMLBySelector(selector), nil
return node.InnerHTMLBySelector(selector), nil
}

View File

@ -8,7 +8,7 @@ import (
/*
* Returns an array of inner HTML strings of matched elements.
* @param doc (Document) - Parent document.
* @param doc (HTMLDocument|HTMLElement) - Parent document or element.
* @param selector (String) - String of CSS selector.
* @returns (String) - An array of inner HTML strings if any element found, otherwise empty array.
*/
@ -19,7 +19,7 @@ func InnerHTMLAll(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err
}
err = core.ValidateType(args[0], core.HTMLDocumentType)
err = core.ValidateType(args[0], core.HTMLDocumentType, core.HTMLElementType)
if err != nil {
return values.None, err
@ -31,7 +31,7 @@ func InnerHTMLAll(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err
}
doc := args[0].(values.HTMLDocument)
doc := args[0].(values.HTMLNode)
selector := args[1].(values.String)
return doc.InnerHTMLBySelectorAll(selector), nil

View File

@ -8,9 +8,9 @@ import (
/*
* Returns inner text of a matched element
* @param doc (Document) - Parent document.
* @param doc (HTMLDocument|HTMLElement) - Parent document or element.
* @param selector (String) - String of CSS selector.
* @returns (String) - Inner text if an element found, otherwise NONE.
* @returns (String) - Inner text if an element found, otherwise empty string.
*/
func InnerText(_ context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 2, 2)
@ -19,7 +19,7 @@ func InnerText(_ context.Context, args ...core.Value) (core.Value, error) {
return values.EmptyString, err
}
err = core.ValidateType(args[0], core.HTMLDocumentType)
err = core.ValidateType(args[0], core.HTMLDocumentType, core.HTMLElementType)
if err != nil {
return values.None, err
@ -31,7 +31,7 @@ func InnerText(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err
}
doc := args[0].(values.HTMLDocument)
doc := args[0].(values.HTMLNode)
selector := args[1].(values.String)
return doc.InnerTextBySelector(selector), nil

View File

@ -8,7 +8,7 @@ import (
/*
* Returns an array of inner text of matched elements.
* @param doc (Document) - Parent document.
* @param doc (HTMLDocument|HTMLElement) - Parent document or element.
* @param selector (String) - String of CSS selector.
* @returns (String) - An array of inner text if any element found, otherwise empty array.
*/
@ -19,7 +19,7 @@ func InnerTextAll(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err
}
err = core.ValidateType(args[0], core.HTMLDocumentType)
err = core.ValidateType(args[0], core.HTMLDocumentType, core.HTMLElementType)
if err != nil {
return values.None, err
@ -31,7 +31,7 @@ func InnerTextAll(_ context.Context, args ...core.Value) (core.Value, error) {
return values.None, err
}
doc := args[0].(values.HTMLDocument)
doc := args[0].(values.HTMLNode)
selector := args[1].(values.String)
return doc.InnerTextBySelectorAll(selector), nil