1
0
mirror of https://github.com/MontFerret/ferret.git synced 2025-01-24 03:49:29 +02:00
ferret/examples/pagination.fql

85 lines
2.8 KiB
Plaintext
Raw Normal View History

LET baseURL = 'https://www.amazon.com/'
LET amazon = DOCUMENT(baseURL, { driver: "cdp" })
WAIT_ELEMENT(amazon, '#nav-search-submit-button')
INPUT(amazon, '#twotabsearchtextbox', @criteria)
CLICK(amazon, '#nav-search-submit-button')
WAITFOR EVENT "navigation" IN amazon OPTIONS { target: "www\.amazon\.com\/s/ref"} 50000
WAIT_ELEMENT(amazon, '[class*="template=PAGINATION"]')
LET paginator = ELEMENT(amazon, '[class*="-pagination"]')
LET foundPrefixes = (FOR cn IN SPLIT(paginator.attributes.class, " ")
FILTER cn LIKE "*-pagination*"
LIMIT 1
RETURN FIRST(SPLIT(cn, "-"))
)
LET prefix = FIRST(foundPrefixes)
T::NOT::EMPTY(prefix, "CSS prefix should not be empty")
PRINT("CSS Prefix is:", prefix)
LET paginationItems = paginator.length
LET variants = {
"s": {
nextBtnSelector: ".s-pagination-next",
pagersSelector: ".s-pagination-item:not(.s-pagination-next, .s-pagination-previous):last-of-type"
},
"a": {
nextBtnSelector: ".a-pagination .a-last",
pagersSelector: FMT("ul.a-pagination li:nth-of-type({})", paginator.length - 1)
}
}
LET selectors = variants[prefix]
T::NOT::NONE(selectors, "Supported CSS selectors not found")
LET spinner = FMT('[data-component-type="{0}-search-results"] .{0}-result-list-placeholder', prefix)
LET resultListSelector = FMT('[data-component-type="{}-search-results"]', prefix)
LET resultItemSelector = FMT('[data-component-type="{}-search-result"]', prefix)
LET pagersSelector = FMT('.{0}-pagination :not(.{0}-last)', prefix)
LET priceWholeSelector = '.a-price-whole'
LET priceFracSelector = '.a-price-fraction'
LET pagers = ELEMENTS(amazon, pagersSelector)
LET pages = LENGTH(pagers) > 0 ? TO_INT(INNER_TEXT(LAST(pagers))) : 0
2019-01-11 21:25:58 -05:00
PRINT("Found pages:", pages)
LET result = (
2019-01-11 21:08:59 -05:00
FOR pageNum IN 1..pages
LIMIT @pages
LET clicked = pageNum == 1 ? false : CLICK(amazon, selectors.nextBtnSelector)
LET waitSelector = clicked ? WAIT_NO_CLASS(amazon, spinner, 'aok-hidden') && WAIT_ELEMENT(amazon, resultItemSelector) : false
PRINT("page:", pageNum, "clicked", clicked)
LET found = ELEMENTS(amazon, resultItemSelector)
LET items = (
FOR el IN found
LET hasPrice = ELEMENT_EXISTS(el, priceWholeSelector)
LET priceWholeTxt = hasPrice ? FIRST(REGEX_MATCH(INNER_TEXT(el, priceWholeSelector), "[0-9]+")) : "0"
LET priceFracTxt = hasPrice ? FIRST(REGEX_MATCH(INNER_TEXT(el, priceFracSelector), "[0-9]+")) : "00"
LET price = TO_FLOAT(priceWholeTxt + "." + priceFracTxt)
LET anchor = ELEMENT(el, "a")
RETURN {
page: pageNum,
url: baseURL + anchor.attributes.href,
title: INNER_TEXT(el, 'h2'),
2019-01-12 12:21:38 -05:00
price
}
)
RETURN items
)
2019-01-11 21:08:59 -05:00
RETURN FLATTEN(result)