2020-06-17 23:37:01 +02:00
|
|
|
LET baseURL = 'https://www.amazon.com/'
|
|
|
|
LET amazon = DOCUMENT(baseURL, { driver: "cdp" })
|
2018-10-09 02:20:40 +02:00
|
|
|
|
2021-09-07 22:33:30 +02:00
|
|
|
WAIT_ELEMENT(amazon, '#nav-search-submit-button')
|
2018-10-09 02:20:40 +02:00
|
|
|
INPUT(amazon, '#twotabsearchtextbox', @criteria)
|
2021-09-02 17:09:48 +02:00
|
|
|
CLICK(amazon, '#nav-search-submit-button')
|
2018-10-09 02:20:40 +02:00
|
|
|
|
2021-10-23 05:54:11 +02:00
|
|
|
WAITFOR EVENT "navigation" IN amazon
|
2021-11-22 21:36:28 +02:00
|
|
|
FILTER current.url =~ "www\.amazon\.com\/s\?k="
|
2021-10-23 05:54:11 +02:00
|
|
|
TIMEOUT 50000
|
|
|
|
|
2021-09-07 22:33:30 +02:00
|
|
|
WAIT_ELEMENT(amazon, '[class*="template=PAGINATION"]')
|
|
|
|
|
|
|
|
LET paginator = ELEMENT(amazon, '[class*="-pagination"]')
|
|
|
|
LET foundPrefixes = (FOR cn IN SPLIT(paginator.attributes.class, " ")
|
|
|
|
FILTER cn LIKE "*-pagination*"
|
|
|
|
LIMIT 1
|
|
|
|
RETURN FIRST(SPLIT(cn, "-"))
|
|
|
|
)
|
|
|
|
|
|
|
|
LET prefix = FIRST(foundPrefixes)
|
|
|
|
T::NOT::EMPTY(prefix, "CSS prefix should not be empty")
|
|
|
|
PRINT("CSS Prefix is:", prefix)
|
|
|
|
|
|
|
|
LET paginationItems = paginator.length
|
|
|
|
|
|
|
|
LET variants = {
|
|
|
|
"s": {
|
|
|
|
nextBtnSelector: ".s-pagination-next",
|
|
|
|
pagersSelector: ".s-pagination-item:not(.s-pagination-next, .s-pagination-previous):last-of-type"
|
|
|
|
},
|
|
|
|
"a": {
|
|
|
|
nextBtnSelector: ".a-pagination .a-last",
|
|
|
|
pagersSelector: FMT("ul.a-pagination li:nth-of-type({})", paginator.length - 1)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LET selectors = variants[prefix]
|
|
|
|
|
|
|
|
T::NOT::NONE(selectors, "Supported CSS selectors not found")
|
|
|
|
|
|
|
|
LET spinner = FMT('[data-component-type="{0}-search-results"] .{0}-result-list-placeholder', prefix)
|
|
|
|
LET resultListSelector = FMT('[data-component-type="{}-search-results"]', prefix)
|
|
|
|
LET resultItemSelector = FMT('[data-component-type="{}-search-result"]', prefix)
|
|
|
|
|
|
|
|
LET pagersSelector = FMT('.{0}-pagination :not(.{0}-last)', prefix)
|
2020-06-17 23:37:01 +02:00
|
|
|
LET priceWholeSelector = '.a-price-whole'
|
|
|
|
LET priceFracSelector = '.a-price-fraction'
|
2021-09-07 22:33:30 +02:00
|
|
|
|
|
|
|
LET pagers = ELEMENTS(amazon, pagersSelector)
|
2019-06-19 23:58:56 +02:00
|
|
|
LET pages = LENGTH(pagers) > 0 ? TO_INT(INNER_TEXT(LAST(pagers))) : 0
|
2019-01-12 04:25:58 +02:00
|
|
|
|
2021-09-07 22:33:30 +02:00
|
|
|
PRINT("Found pages:", pages)
|
|
|
|
|
2018-10-09 02:20:40 +02:00
|
|
|
LET result = (
|
2019-01-12 04:08:59 +02:00
|
|
|
FOR pageNum IN 1..pages
|
2020-06-17 23:37:01 +02:00
|
|
|
LIMIT @pages
|
|
|
|
|
2021-09-07 22:33:30 +02:00
|
|
|
LET clicked = pageNum == 1 ? false : CLICK(amazon, selectors.nextBtnSelector)
|
|
|
|
LET waitSelector = clicked ? WAIT_NO_CLASS(amazon, spinner, 'aok-hidden') && WAIT_ELEMENT(amazon, resultItemSelector) : false
|
2018-10-09 02:20:40 +02:00
|
|
|
|
2019-06-19 23:58:56 +02:00
|
|
|
PRINT("page:", pageNum, "clicked", clicked)
|
|
|
|
|
2021-09-07 22:33:30 +02:00
|
|
|
LET found = ELEMENTS(amazon, resultItemSelector)
|
|
|
|
|
2018-10-09 02:20:40 +02:00
|
|
|
LET items = (
|
2021-09-07 22:33:30 +02:00
|
|
|
FOR el IN found
|
2020-06-17 23:37:01 +02:00
|
|
|
LET hasPrice = ELEMENT_EXISTS(el, priceWholeSelector)
|
|
|
|
LET priceWholeTxt = hasPrice ? FIRST(REGEX_MATCH(INNER_TEXT(el, priceWholeSelector), "[0-9]+")) : "0"
|
|
|
|
LET priceFracTxt = hasPrice ? FIRST(REGEX_MATCH(INNER_TEXT(el, priceFracSelector), "[0-9]+")) : "00"
|
|
|
|
LET price = TO_FLOAT(priceWholeTxt + "." + priceFracTxt)
|
|
|
|
LET anchor = ELEMENT(el, "a")
|
2018-10-11 18:39:03 +02:00
|
|
|
|
|
|
|
RETURN {
|
2021-09-02 17:09:48 +02:00
|
|
|
page: pageNum,
|
2020-06-17 23:37:01 +02:00
|
|
|
url: baseURL + anchor.attributes.href,
|
2018-10-11 18:39:03 +02:00
|
|
|
title: INNER_TEXT(el, 'h2'),
|
2019-01-12 19:21:38 +02:00
|
|
|
price
|
2018-10-11 18:39:03 +02:00
|
|
|
}
|
2018-10-09 02:20:40 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
RETURN items
|
|
|
|
)
|
|
|
|
|
2019-01-12 04:08:59 +02:00
|
|
|
RETURN FLATTEN(result)
|
2021-09-07 22:33:30 +02:00
|
|
|
|
|
|
|
|