mirror of
https://github.com/MontFerret/ferret.git
synced 2025-01-10 03:16:58 +02:00
a17a0a1819
* Added support of FILTER clause in WAITFOR EVENT expression * Fixed exampl query * Fixed inability of using keywords as an object property
88 lines
2.8 KiB
Plaintext
88 lines
2.8 KiB
Plaintext
LET baseURL = 'https://www.amazon.com/'
|
|
LET amazon = DOCUMENT(baseURL, { driver: "cdp" })
|
|
|
|
WAIT_ELEMENT(amazon, '#nav-search-submit-button')
|
|
INPUT(amazon, '#twotabsearchtextbox', @criteria)
|
|
CLICK(amazon, '#nav-search-submit-button')
|
|
|
|
WAITFOR EVENT "navigation" IN amazon
|
|
OPTIONS { target: "www\.amazon\.com\/s/ref"}
|
|
TIMEOUT 50000
|
|
|
|
WAIT_ELEMENT(amazon, '[class*="template=PAGINATION"]')
|
|
|
|
LET paginator = ELEMENT(amazon, '[class*="-pagination"]')
|
|
LET foundPrefixes = (FOR cn IN SPLIT(paginator.attributes.class, " ")
|
|
FILTER cn LIKE "*-pagination*"
|
|
LIMIT 1
|
|
RETURN FIRST(SPLIT(cn, "-"))
|
|
)
|
|
|
|
LET prefix = FIRST(foundPrefixes)
|
|
T::NOT::EMPTY(prefix, "CSS prefix should not be empty")
|
|
PRINT("CSS Prefix is:", prefix)
|
|
|
|
LET paginationItems = paginator.length
|
|
|
|
LET variants = {
|
|
"s": {
|
|
nextBtnSelector: ".s-pagination-next",
|
|
pagersSelector: ".s-pagination-item:not(.s-pagination-next, .s-pagination-previous):last-of-type"
|
|
},
|
|
"a": {
|
|
nextBtnSelector: ".a-pagination .a-last",
|
|
pagersSelector: FMT("ul.a-pagination li:nth-of-type({})", paginator.length - 1)
|
|
}
|
|
}
|
|
|
|
LET selectors = variants[prefix]
|
|
|
|
T::NOT::NONE(selectors, "Supported CSS selectors not found")
|
|
|
|
LET spinner = FMT('[data-component-type="{0}-search-results"] .{0}-result-list-placeholder', prefix)
|
|
LET resultListSelector = FMT('[data-component-type="{}-search-results"]', prefix)
|
|
LET resultItemSelector = FMT('[data-component-type="{}-search-result"]', prefix)
|
|
|
|
LET pagersSelector = FMT('.{0}-pagination :not(.{0}-last)', prefix)
|
|
LET priceWholeSelector = '.a-price-whole'
|
|
LET priceFracSelector = '.a-price-fraction'
|
|
|
|
LET pagers = ELEMENTS(amazon, pagersSelector)
|
|
LET pages = LENGTH(pagers) > 0 ? TO_INT(INNER_TEXT(LAST(pagers))) : 0
|
|
|
|
PRINT("Found pages:", pages)
|
|
|
|
LET result = (
|
|
FOR pageNum IN 1..pages
|
|
LIMIT @pages
|
|
|
|
LET clicked = pageNum == 1 ? false : CLICK(amazon, selectors.nextBtnSelector)
|
|
LET waitSelector = clicked ? WAIT_NO_CLASS(amazon, spinner, 'aok-hidden') && WAIT_ELEMENT(amazon, resultItemSelector) : false
|
|
|
|
PRINT("page:", pageNum, "clicked", clicked)
|
|
|
|
LET found = ELEMENTS(amazon, resultItemSelector)
|
|
|
|
LET items = (
|
|
FOR el IN found
|
|
LET hasPrice = ELEMENT_EXISTS(el, priceWholeSelector)
|
|
LET priceWholeTxt = hasPrice ? FIRST(REGEX_MATCH(INNER_TEXT(el, priceWholeSelector), "[0-9]+")) : "0"
|
|
LET priceFracTxt = hasPrice ? FIRST(REGEX_MATCH(INNER_TEXT(el, priceFracSelector), "[0-9]+")) : "00"
|
|
LET price = TO_FLOAT(priceWholeTxt + "." + priceFracTxt)
|
|
LET anchor = ELEMENT(el, "a")
|
|
|
|
RETURN {
|
|
page: pageNum,
|
|
url: baseURL + anchor.attributes.href,
|
|
title: INNER_TEXT(el, 'h2'),
|
|
price
|
|
}
|
|
)
|
|
|
|
RETURN items
|
|
)
|
|
|
|
RETURN FLATTEN(result)
|
|
|
|
|