1
0
mirror of https://github.com/MontFerret/ferret.git synced 2024-12-12 11:15:14 +02:00
ferret/examples/crawler.fql
Tim Voronov e6dd5689b4
Bugfix/e2e tests (#648)
* Fixed logger level

* Fixed WAITFOR EVENT parser

* Added tracing to Network Manager

* Updated logging

* Swtitched to value type of logger

* Added tracing

* Increased websocket maximum buffer size

* Ignore unimportant error message

* Added support of new CDP API for layouts

* Switched to value type of logger

* Added log level

* Fixed early context cancellation

* Updated example of 'click' action

* Switched to val for elements lookup

* Fixed unit tests

* Refactored 'eval' module

* Fixed SetStyle eval expression

* Fixed style deletion

* Updated logic of setting multiple styles
2021-09-02 11:09:48 -04:00

26 lines
749 B
Plaintext

LET doc = DOCUMENT('https://www.theverge.com/tech', {
driver: "cdp",
ignore: {
resources: [
{
url: "*",
type: "image"
}
]
}
})
WAIT_ELEMENT(doc, '.c-compact-river__entry', 5000)
LET articles = ELEMENTS(doc, '.c-entry-box--compact__image-wrapper')
LET links = (
FOR article IN articles
FILTER article.attributes?.href LIKE 'https://www.theverge.com/*'
RETURN article.attributes.href
)
FOR link IN links
// The Verge has pretty heavy pages, so let's increase the navigation wait time
NAVIGATE(doc, link, 20000)
WAIT_ELEMENT(doc, '.c-entry-content', 15000)
LET texter = ELEMENT(doc, '.c-entry-content')
RETURN texter.innerText