1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-03-19 21:28:32 +02:00

Feature/custom iterator ()

* Added CollectionIterator interface

* Added PAGINATION function

* Fixed LIMIT clause

* Fixed linting issues
This commit is contained in:
Tim Voronov 2018-11-12 19:58:12 -05:00 committed by GitHub
parent de774ba03e
commit 291d07cbef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 1275 additions and 720 deletions

@ -3,9 +3,11 @@
### 0.5.0
#### Added
- DateTime functions.
- ``PAGINATION`` function.
#### Fixed
- Unable to define variables and make function calls before FILTER, SORT and etc statements.
- Unable to use params in LIMIT clause
- ``INNER_HTML`` returns outer HTML instead for dynamic elements.
- ``INNER_TEXT`` returns HTML instead from dynamic elements.

@ -0,0 +1,36 @@
LET amazon = DOCUMENT('https://www.amazon.com/', true)
INPUT(amazon, '#twotabsearchtextbox', @criteria)
CLICK(amazon, '.nav-search-submit input[type="submit"]')
WAIT_NAVIGATION(amazon)
LET resultListSelector = '#s-results-list-atf'
LET resultItemSelector = '.s-result-item'
LET nextBtnSelector = '#pagnNextLink'
LET vendorSelector = 'div > div > div > div.a-fixed-left-grid-col.a-col-right > div.a-row.a-spacing-small > div:nth-child(2) > span:nth-child(2)'
LET priceSelector = 'div > div > div > div.a-fixed-left-grid-col.a-col-right > div:nth-child(4) > div.a-column.a-span7 > div:nth-child(1) > div:nth-child(3) > a > span.a-offscreen'
LET altPriceSelector = 'div > div > div > div.a-fixed-left-grid-col.a-col-right > div:nth-child(2) > div.a-column.a-span7 > div:nth-child(1) > div:nth-child(3) > a > span.a-offscreen'
LET result = (
FOR pageNum IN PAGINATION(amazon, nextBtnSelector)
LIMIT @limit
LET wait = pageNum > 0 ? WAIT_NAVIGATION(amazon) : false
LET waitSelector = wait ? WAIT_ELEMENT(amazon, resultListSelector) : false
LET items = (
FOR el IN ELEMENTS(amazon, resultItemSelector)
LET priceTxtMain = INNER_TEXT(el, priceSelector)
LET priceTxt = priceTxtMain != "" ? priceTxtMain : INNER_TEXT(el, altPriceSelector)
RETURN {
title: INNER_TEXT(el, 'h2'),
vendor: INNER_TEXT(el, vendorSelector),
price: TO_FLOAT(SUBSTITUTE(priceTxt, "$", ""))
}
)
RETURN items
)
RETURN FLATTEN(result)

@ -276,41 +276,57 @@ func (v *visitor) doVisitForExpression(ctx *fql.ForExpressionContext, scope *sco
return forExp, nil
}
func (v *visitor) createLimit(ctx *fql.LimitClauseContext) (int, int, error) {
func (v *visitor) doVisitLimitClause(ctx *fql.LimitClauseContext, scope *scope) (core.Expression, core.Expression, error) {
var err error
var count int
var offset int
var count core.Expression
var offset core.Expression
intLiterals := ctx.AllIntegerLiteral()
clauseValues := ctx.AllLimitClauseValue()
if len(intLiterals) > 1 {
offset, err = v.parseInt(intLiterals[0])
if len(clauseValues) > 1 {
offset, err = v.doVisitLimitClauseValue(clauseValues[0].(*fql.LimitClauseValueContext), scope)
if err != nil {
return 0, 0, err
return nil, nil, err
}
count, err = v.parseInt(intLiterals[1])
count, err = v.doVisitLimitClauseValue(clauseValues[1].(*fql.LimitClauseValueContext), scope)
if err != nil {
return 0, 0, err
return nil, nil, err
}
} else {
count, err = strconv.Atoi(intLiterals[0].GetText())
count, err = v.doVisitLimitClauseValue(clauseValues[0].(*fql.LimitClauseValueContext), scope)
if err != nil {
return 0, 0, err
return nil, nil, err
}
offset = literals.NewIntLiteral(0)
}
return count, offset, nil
}
func (v *visitor) parseInt(node antlr.TerminalNode) (int, error) {
return strconv.Atoi(node.GetText())
func (v *visitor) doVisitLimitClauseValue(ctx *fql.LimitClauseValueContext, scope *scope) (core.Expression, error) {
literalCtx := ctx.IntegerLiteral()
if literalCtx != nil {
i, err := strconv.Atoi(literalCtx.GetText())
if err != nil {
return nil, err
}
return literals.NewIntLiteral(i), nil
}
paramCtx := ctx.Param()
return v.doVisitParamContext(paramCtx.(*fql.ParamContext), scope)
}
func (v *visitor) createFilter(ctx *fql.FilterClauseContext, scope *scope) (core.Expression, error) {
func (v *visitor) doVisitFilterClause(ctx *fql.FilterClauseContext, scope *scope) (core.Expression, error) {
exp := ctx.Expression().(*fql.ExpressionContext)
exps, err := v.doVisitAllExpressions(exp.AllExpression(), scope)
@ -342,7 +358,7 @@ func (v *visitor) createFilter(ctx *fql.FilterClauseContext, scope *scope) (core
return nil, core.Error(ErrInvalidToken, ctx.GetText())
}
func (v *visitor) createSort(ctx *fql.SortClauseContext, scope *scope) ([]*clauses.SorterExpression, error) {
func (v *visitor) doVisitSortClause(ctx *fql.SortClauseContext, scope *scope) ([]*clauses.SorterExpression, error) {
sortExpCtxs := ctx.AllSortClauseExpression()
res := make([]*clauses.SorterExpression, len(sortExpCtxs))
@ -377,7 +393,7 @@ func (v *visitor) createSort(ctx *fql.SortClauseContext, scope *scope) ([]*claus
return res, nil
}
func (v *visitor) createCollect(ctx *fql.CollectClauseContext, scope *scope, valVarName string) (*clauses.Collect, error) {
func (v *visitor) doVisitCollectClause(ctx *fql.CollectClauseContext, scope *scope, valVarName string) (*clauses.Collect, error) {
var err error
var selectors []*clauses.CollectSelector
var projection *clauses.CollectProjection
@ -396,7 +412,7 @@ func (v *visitor) createCollect(ctx *fql.CollectClauseContext, scope *scope, val
selectors = make([]*clauses.CollectSelector, 0, len(collectSelectors))
for _, cs := range collectSelectors {
selector, err := v.createCollectSelector(cs.(*fql.CollectSelectorContext), scope)
selector, err := v.doVisitCollectSelector(cs.(*fql.CollectSelectorContext), scope)
if err != nil {
return nil, err
@ -416,7 +432,7 @@ func (v *visitor) createCollect(ctx *fql.CollectClauseContext, scope *scope, val
// if projection expression is defined like WITH group = { foo: i.bar }
if projectionSelectorCtx != nil {
selector, err := v.createCollectSelector(projectionSelectorCtx.(*fql.CollectSelectorContext), scope)
selector, err := v.doVisitCollectSelector(projectionSelectorCtx.(*fql.CollectSelectorContext), scope)
if err != nil {
return nil, err
@ -495,7 +511,7 @@ func (v *visitor) createCollect(ctx *fql.CollectClauseContext, scope *scope, val
selectors := make([]*clauses.CollectAggregateSelector, 0, len(selectorCtxs))
for _, sc := range selectorCtxs {
selector, err := v.createCollectAggregateSelector(sc.(*fql.CollectAggregateSelectorContext), scope)
selector, err := v.doVisitCollectAggregateSelector(sc.(*fql.CollectAggregateSelectorContext), scope)
if err != nil {
return nil, err
@ -524,7 +540,7 @@ func (v *visitor) createCollect(ctx *fql.CollectClauseContext, scope *scope, val
return clauses.NewCollect(selectors, projection, count, aggregate)
}
func (v *visitor) createCollectSelector(ctx *fql.CollectSelectorContext, scope *scope) (*clauses.CollectSelector, error) {
func (v *visitor) doVisitCollectSelector(ctx *fql.CollectSelectorContext, scope *scope) (*clauses.CollectSelector, error) {
variable := ctx.Identifier().GetText()
exp, err := v.doVisitExpression(ctx.Expression().(*fql.ExpressionContext), scope)
@ -535,7 +551,7 @@ func (v *visitor) createCollectSelector(ctx *fql.CollectSelectorContext, scope *
return clauses.NewCollectSelector(variable, exp)
}
func (v *visitor) createCollectAggregateSelector(ctx *fql.CollectAggregateSelectorContext, scope *scope) (*clauses.CollectAggregateSelector, error) {
func (v *visitor) doVisitCollectAggregateSelector(ctx *fql.CollectAggregateSelectorContext, scope *scope) (*clauses.CollectAggregateSelector, error) {
variable := ctx.Identifier().GetText()
fnCtx := ctx.FunctionCallExpression()
@ -608,7 +624,7 @@ func (v *visitor) doVisitForExpressionClause(ctx *fql.ForExpressionClauseContext
limitCtx := ctx.LimitClause()
if limitCtx != nil {
limit, offset, err := v.createLimit(limitCtx.(*fql.LimitClauseContext))
limit, offset, err := v.doVisitLimitClause(limitCtx.(*fql.LimitClauseContext), scope)
if err != nil {
return nil, err
@ -622,7 +638,7 @@ func (v *visitor) doVisitForExpressionClause(ctx *fql.ForExpressionClauseContext
filterCtx := ctx.FilterClause()
if filterCtx != nil {
filterExp, err := v.createFilter(filterCtx.(*fql.FilterClauseContext), scope)
filterExp, err := v.doVisitFilterClause(filterCtx.(*fql.FilterClauseContext), scope)
if err != nil {
return nil, err
@ -637,7 +653,7 @@ func (v *visitor) doVisitForExpressionClause(ctx *fql.ForExpressionClauseContext
if sortCtx != nil {
sortCtx := sortCtx.(*fql.SortClauseContext)
sortExps, err := v.createSort(sortCtx, scope)
sortExps, err := v.doVisitSortClause(sortCtx, scope)
if err != nil {
return nil, err
@ -652,7 +668,7 @@ func (v *visitor) doVisitForExpressionClause(ctx *fql.ForExpressionClauseContext
if collectCtx != nil {
collectCtx := collectCtx.(*fql.CollectClauseContext)
params, err := v.createCollect(collectCtx, scope, valVarName)
params, err := v.doVisitCollectClause(collectCtx, scope, valVarName)
if err != nil {
return nil, err

@ -77,7 +77,12 @@ filterClause
;
limitClause
: Limit IntegerLiteral (Comma IntegerLiteral)?
: Limit limitClauseValue (Comma limitClauseValue)?
;
limitClauseValue
: IntegerLiteral
| param
;
sortClause

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

@ -112,6 +112,12 @@ func (s *BaseFqlParserListener) EnterLimitClause(ctx *LimitClauseContext) {}
// ExitLimitClause is called when production limitClause is exited.
func (s *BaseFqlParserListener) ExitLimitClause(ctx *LimitClauseContext) {}
// EnterLimitClauseValue is called when production limitClauseValue is entered.
func (s *BaseFqlParserListener) EnterLimitClauseValue(ctx *LimitClauseValueContext) {}
// ExitLimitClauseValue is called when production limitClauseValue is exited.
func (s *BaseFqlParserListener) ExitLimitClauseValue(ctx *LimitClauseValueContext) {}
// EnterSortClause is called when production sortClause is entered.
func (s *BaseFqlParserListener) EnterSortClause(ctx *SortClauseContext) {}

@ -67,6 +67,10 @@ func (v *BaseFqlParserVisitor) VisitLimitClause(ctx *LimitClauseContext) interfa
return v.VisitChildren(ctx)
}
func (v *BaseFqlParserVisitor) VisitLimitClauseValue(ctx *LimitClauseValueContext) interface{} {
return v.VisitChildren(ctx)
}
func (v *BaseFqlParserVisitor) VisitSortClause(ctx *SortClauseContext) interface{} {
return v.VisitChildren(ctx)
}

@ -52,6 +52,9 @@ type FqlParserListener interface {
// EnterLimitClause is called when entering the limitClause production.
EnterLimitClause(c *LimitClauseContext)
// EnterLimitClauseValue is called when entering the limitClauseValue production.
EnterLimitClauseValue(c *LimitClauseValueContext)
// EnterSortClause is called when entering the sortClause production.
EnterSortClause(c *SortClauseContext)
@ -208,6 +211,9 @@ type FqlParserListener interface {
// ExitLimitClause is called when exiting the limitClause production.
ExitLimitClause(c *LimitClauseContext)
// ExitLimitClauseValue is called when exiting the limitClauseValue production.
ExitLimitClauseValue(c *LimitClauseValueContext)
// ExitSortClause is called when exiting the sortClause production.
ExitSortClause(c *SortClauseContext)

@ -52,6 +52,9 @@ type FqlParserVisitor interface {
// Visit a parse tree produced by FqlParser#limitClause.
VisitLimitClause(ctx *LimitClauseContext) interface{}
// Visit a parse tree produced by FqlParser#limitClauseValue.
VisitLimitClauseValue(ctx *LimitClauseValueContext) interface{}
// Visit a parse tree produced by FqlParser#sortClause.
VisitSortClause(ctx *SortClauseContext) interface{}

@ -1,12 +1,14 @@
package collections
import (
"context"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
)
type (
Collection interface {
core.Value
Length() values.Int
}
@ -22,4 +24,54 @@ type (
Get(key values.String) (core.Value, values.Boolean)
Set(key values.String, value core.Value)
}
IterableCollection interface {
core.Value
Iterate(ctx context.Context) (CollectionIterator, error)
}
CollectionIterator interface {
Next(ctx context.Context) (value core.Value, key core.Value, err error)
}
collectionIteratorWrapper struct {
valVar string
keyVar string
values CollectionIterator
}
)
func NewCollectionIterator(
valVar,
keyVar string,
values CollectionIterator,
) (Iterator, error) {
return &collectionIteratorWrapper{valVar, keyVar, values}, nil
}
func (iterator *collectionIteratorWrapper) Next(ctx context.Context, scope *core.Scope) (*core.Scope, error) {
val, key, err := iterator.values.Next(ctx)
if err != nil {
return nil, err
}
// end of iteration
if val == values.None {
return nil, nil
}
nextScope := scope.Fork()
if err := nextScope.SetVariable(iterator.valVar, val); err != nil {
return nil, err
}
if iterator.keyVar != "" {
if err := nextScope.SetVariable(iterator.keyVar, key); err != nil {
return nil, err
}
}
return nextScope, nil
}

@ -21,6 +21,7 @@ const (
HTMLElementType Type = 8
HTMLDocumentType Type = 9
BinaryType Type = 10
CustomType Type = 99
)
var typestr = map[Type]string{
@ -35,6 +36,7 @@ var typestr = map[Type]string{
HTMLElementType: "HTMLElement",
HTMLDocumentType: "HTMLDocument",
BinaryType: "BinaryType",
CustomType: "CustomType",
}
func (t Type) String() string {

@ -9,15 +9,15 @@ import (
type LimitClause struct {
src core.SourceMap
dataSource collections.Iterable
count int
offset int
count core.Expression
offset core.Expression
}
func NewLimitClause(
src core.SourceMap,
dataSource collections.Iterable,
count int,
offset int,
count core.Expression,
offset core.Expression,
) (collections.Iterable, error) {
if dataSource == nil {
return nil, core.Error(core.ErrMissedArgument, "dataSource source")
@ -33,10 +33,34 @@ func (clause *LimitClause) Iterate(ctx context.Context, scope *core.Scope) (coll
return nil, core.SourceError(clause.src, err)
}
count, err := clause.count.Exec(ctx, scope)
if err != nil {
return nil, core.SourceError(clause.src, err)
}
offset, err := clause.offset.Exec(ctx, scope)
if err != nil {
return nil, core.SourceError(clause.src, err)
}
countInt, err := clause.parseValue(count)
if err != nil {
return nil, err
}
offsetInt, err := clause.parseValue(offset)
if err != nil {
return nil, err
}
iterator, err := collections.NewLimitIterator(
src,
clause.count,
clause.offset,
int(countInt),
int(offsetInt),
)
if err != nil {
@ -45,3 +69,15 @@ func (clause *LimitClause) Iterate(ctx context.Context, scope *core.Scope) (coll
return iterator, nil
}
func (clause *LimitClause) parseValue(val core.Value) (int, error) {
if val.Type() == core.IntType {
return val.Unwrap().(int), nil
}
if val.Type() == core.FloatType {
return int(val.Unwrap().(float64)), nil
}
return -1, core.TypeError(val.Type(), core.IntType, core.FloatType)
}

@ -49,6 +49,15 @@ func (ds *DataSource) Iterate(ctx context.Context, scope *core.Scope) (collectio
default:
// fallback to user defined types
switch data.(type) {
case collections.IterableCollection:
collection := data.(collections.IterableCollection)
iterator, err := collection.Iterate(ctx)
if err != nil {
return nil, err
}
return collections.NewCollectionIterator(ds.valVariable, ds.keyVariable, iterator)
case collections.KeyedCollection:
return collections.NewIndexedIterator(ds.valVariable, ds.keyVariable, data.(collections.IndexedCollection))
case collections.IndexedCollection:

@ -0,0 +1,126 @@
package expressions_test
import (
"context"
"github.com/MontFerret/ferret/pkg/runtime/expressions"
"github.com/MontFerret/ferret/pkg/runtime/values"
"testing"
"github.com/MontFerret/ferret/pkg/runtime/collections"
"github.com/MontFerret/ferret/pkg/runtime/core"
. "github.com/smartystreets/goconvey/convey"
)
type (
testIterableCollection struct {
values collections.IndexedCollection
}
testCollectionIterator struct {
values collections.IndexedCollection
position values.Int
}
TestDataSourceExpression func(ctx context.Context, scope *core.Scope) (core.Value, error)
)
func (ds TestDataSourceExpression) Exec(ctx context.Context, scope *core.Scope) (core.Value, error) {
return ds(ctx, scope)
}
func (c *testIterableCollection) MarshalJSON() ([]byte, error) {
return nil, core.ErrInvalidOperation
}
func (c *testIterableCollection) Type() core.Type {
return core.Type(11)
}
func (c *testIterableCollection) String() string {
return ""
}
func (c *testIterableCollection) Compare(other core.Value) int {
return 1
}
func (c *testIterableCollection) Unwrap() interface{} {
return nil
}
func (c *testIterableCollection) Hash() uint64 {
return 0
}
func (c *testIterableCollection) Copy() core.Value {
return c
}
func (c *testIterableCollection) Iterate(ctx context.Context) (collections.CollectionIterator, error) {
return &testCollectionIterator{c.values, -1}, nil
}
func (i *testCollectionIterator) Next(ctx context.Context) (core.Value, core.Value, error) {
i.position++
if i.position > i.values.Length() {
return values.None, values.None, nil
}
return i.values.Get(i.position), i.position, nil
}
func TestDataSource(t *testing.T) {
Convey(".Iterate", t, func() {
Convey("Should return custom iterable collection", func() {
arr := values.NewArrayWith(
values.NewInt(1),
values.NewInt(2),
values.NewInt(3),
values.NewInt(4),
values.NewInt(5),
values.NewInt(6),
values.NewInt(7),
values.NewInt(8),
values.NewInt(9),
values.NewInt(10),
)
ds, err := expressions.NewDataSource(
core.SourceMap{},
collections.DefaultValueVar,
collections.DefaultKeyVar,
TestDataSourceExpression(func(ctx context.Context, scope *core.Scope) (core.Value, error) {
return &testIterableCollection{arr}, nil
}),
)
So(err, ShouldBeNil)
rootScope, _ := core.NewRootScope()
ctx := context.Background()
scope := rootScope.Fork()
out, err := ds.Iterate(ctx, scope)
So(err, ShouldBeNil)
pos := -1
nextScope := scope
for {
pos++
nextScope, err = out.Next(ctx, nextScope.Fork())
So(err, ShouldBeNil)
if nextScope == nil {
break
}
actualV, _ := nextScope.GetVariable(collections.DefaultValueVar)
actualK, _ := nextScope.GetVariable(collections.DefaultKeyVar)
expectedV := arr.Get(values.Int(pos))
So(actualV, ShouldEqual, expectedV)
So(actualK, ShouldEqual, values.Int(pos))
}
So(pos, ShouldEqual, int(arr.Length()))
})
})
}

@ -40,7 +40,7 @@ func NewForExpression(
}, nil
}
func (e *ForExpression) AddLimit(src core.SourceMap, size, count int) error {
func (e *ForExpression) AddLimit(src core.SourceMap, size, count core.Expression) error {
limit, err := clauses.NewLimitClause(src, e.dataSource, size, count)
if err != nil {

@ -34,6 +34,7 @@ func NewLib() map[string]core.Function {
"INNER_TEXT_ALL": InnerTextAll,
"SELECT": Select,
"SCREENSHOT": Screenshot,
"PAGINATION": Pagination,
"PDF": PDF,
"DOWNLOAD": Download,
}

@ -0,0 +1,104 @@
package html
import (
"context"
"github.com/MontFerret/ferret/pkg/html/dynamic"
"github.com/MontFerret/ferret/pkg/runtime/collections"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/values"
)
// Pagination creates an iterator that goes through pages using CSS selector.
// The iterator starts from the current page i.e. it does not change the page on 1st iteration.
// That allows you to keep scraping logic inside FOR loop.
// @param doc (Document) - Target document.
// @param selector (String) - CSS selector for a pagination on the page.
func Pagination(_ context.Context, args ...core.Value) (core.Value, error) {
err := core.ValidateArgs(args, 2, 2)
if err != nil {
return values.None, err
}
doc, ok := args[0].(*dynamic.HTMLDocument)
if !ok {
return values.False, core.Errors(core.ErrInvalidType, ErrNotDynamic)
}
err = core.ValidateType(args[1], core.StringType)
if err != nil {
return values.None, err
}
selector := args[1].(values.String)
return &Paging{doc, selector}, nil
}
type (
Paging struct {
document *dynamic.HTMLDocument
selector values.String
}
PagingIterator struct {
document *dynamic.HTMLDocument
selector values.String
pos values.Int
}
)
func (p *Paging) MarshalJSON() ([]byte, error) {
return nil, core.ErrInvalidOperation
}
func (p *Paging) Type() core.Type {
return core.CustomType
}
func (p *Paging) String() string {
return core.CustomType.String()
}
func (p *Paging) Compare(_ core.Value) int {
return 1
}
func (p *Paging) Unwrap() interface{} {
return nil
}
func (p *Paging) Hash() uint64 {
return 0
}
func (p *Paging) Copy() core.Value {
return values.None
}
func (p *Paging) Iterate(_ context.Context) (collections.CollectionIterator, error) {
return &PagingIterator{p.document, p.selector, -1}, nil
}
func (i *PagingIterator) Next(_ context.Context) (core.Value, core.Value, error) {
i.pos++
if i.pos == 0 {
return values.ZeroInt, values.ZeroInt, nil
}
clicked, err := i.document.ClickBySelector(i.selector)
if err != nil {
return values.None, values.None, err
}
if clicked {
return i.pos, i.pos, nil
}
// terminate
return values.None, values.None, nil
}