2018-11-13 02:58:12 +02:00
|
|
|
package html
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2019-02-13 19:31:18 +02:00
|
|
|
|
2019-02-20 01:10:18 +02:00
|
|
|
"github.com/MontFerret/ferret/pkg/drivers"
|
2018-11-13 02:58:12 +02:00
|
|
|
"github.com/MontFerret/ferret/pkg/runtime/core"
|
|
|
|
"github.com/MontFerret/ferret/pkg/runtime/values"
|
2019-02-13 19:31:18 +02:00
|
|
|
"github.com/MontFerret/ferret/pkg/runtime/values/types"
|
2018-11-13 02:58:12 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
// Pagination creates an iterator that goes through pages using CSS selector.
|
|
|
|
// The iterator starts from the current page i.e. it does not change the page on 1st iteration.
|
|
|
|
// That allows you to keep scraping logic inside FOR loop.
|
2019-06-19 23:58:56 +02:00
|
|
|
// @param doc (Open) - Target document.
|
2018-11-13 02:58:12 +02:00
|
|
|
// @param selector (String) - CSS selector for a pagination on the page.
|
|
|
|
func Pagination(_ context.Context, args ...core.Value) (core.Value, error) {
|
|
|
|
err := core.ValidateArgs(args, 2, 2)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return values.None, err
|
|
|
|
}
|
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
doc, err := drivers.ToDocument(args[0])
|
2018-11-13 02:58:12 +02:00
|
|
|
|
2019-02-20 01:10:18 +02:00
|
|
|
if err != nil {
|
|
|
|
return values.None, err
|
2018-11-13 02:58:12 +02:00
|
|
|
}
|
|
|
|
|
2019-02-13 19:31:18 +02:00
|
|
|
err = core.ValidateType(args[1], types.String)
|
2018-11-13 02:58:12 +02:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return values.None, err
|
|
|
|
}
|
|
|
|
|
|
|
|
selector := args[1].(values.String)
|
|
|
|
|
|
|
|
return &Paging{doc, selector}, nil
|
|
|
|
}
|
|
|
|
|
2019-02-20 01:10:18 +02:00
|
|
|
var PagingType = core.NewType("paging")
|
2019-02-13 19:31:18 +02:00
|
|
|
|
2018-11-13 02:58:12 +02:00
|
|
|
type (
|
|
|
|
Paging struct {
|
2019-02-20 01:10:18 +02:00
|
|
|
document drivers.HTMLDocument
|
2018-11-13 02:58:12 +02:00
|
|
|
selector values.String
|
|
|
|
}
|
|
|
|
|
|
|
|
PagingIterator struct {
|
2019-02-20 01:10:18 +02:00
|
|
|
document drivers.HTMLDocument
|
2018-11-13 02:58:12 +02:00
|
|
|
selector values.String
|
|
|
|
pos values.Int
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
func (p *Paging) MarshalJSON() ([]byte, error) {
|
|
|
|
return nil, core.ErrInvalidOperation
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *Paging) Type() core.Type {
|
2019-02-13 19:31:18 +02:00
|
|
|
return PagingType
|
2018-11-13 02:58:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p *Paging) String() string {
|
2019-02-13 19:31:18 +02:00
|
|
|
return PagingType.String()
|
2018-11-13 02:58:12 +02:00
|
|
|
}
|
|
|
|
|
2019-02-13 19:31:18 +02:00
|
|
|
func (p *Paging) Compare(_ core.Value) int64 {
|
2018-11-13 02:58:12 +02:00
|
|
|
return 1
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *Paging) Unwrap() interface{} {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *Paging) Hash() uint64 {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *Paging) Copy() core.Value {
|
|
|
|
return values.None
|
|
|
|
}
|
|
|
|
|
2019-02-15 14:41:08 +02:00
|
|
|
func (p *Paging) Iterate(_ context.Context) (core.Iterator, error) {
|
2018-11-13 02:58:12 +02:00
|
|
|
return &PagingIterator{p.document, p.selector, -1}, nil
|
|
|
|
}
|
|
|
|
|
2019-02-21 04:24:05 +02:00
|
|
|
func (i *PagingIterator) Next(ctx context.Context) (core.Value, core.Value, error) {
|
2018-11-13 02:58:12 +02:00
|
|
|
i.pos++
|
|
|
|
|
|
|
|
if i.pos == 0 {
|
|
|
|
return values.ZeroInt, values.ZeroInt, nil
|
|
|
|
}
|
|
|
|
|
2019-07-26 19:22:06 +02:00
|
|
|
exists, err := i.document.ExistsBySelector(ctx, i.selector)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return values.None, values.None, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if !exists {
|
|
|
|
return values.None, values.None, core.ErrNoMoreData
|
2019-07-23 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2019-09-07 07:59:32 +02:00
|
|
|
err = i.document.GetElement().ClickBySelector(ctx, i.selector, 1)
|
2018-11-13 02:58:12 +02:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return values.None, values.None, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// terminate
|
2019-07-23 18:44:30 +02:00
|
|
|
return i.pos, i.pos, nil
|
2018-11-13 02:58:12 +02:00
|
|
|
}
|