2015-07-17 10:37:55 +02:00
|
|
|
splash
|
|
|
|
======
|
|
|
|
|
2017-01-06 12:13:03 +02:00
|
|
|
[Splash][1] is a javascript rendering service with an HTTP API. It's a
|
2015-07-17 10:37:55 +02:00
|
|
|
lightweight browser with an HTTP API, implemented in Python using Twisted and
|
|
|
|
QT.
|
|
|
|
|
|
|
|
It's fast, lightweight and state-less which makes it easy to distribute.
|
|
|
|
|
|
|
|
## docker-compose.yml
|
|
|
|
|
2017-01-06 12:13:03 +02:00
|
|
|
```yaml
|
2015-07-17 10:37:55 +02:00
|
|
|
splash:
|
2015-09-21 04:22:39 +02:00
|
|
|
image: scrapinghub/splash
|
2017-01-21 21:05:30 +02:00
|
|
|
command: --maxrss 4096 --max-timeout 300
|
2015-07-17 10:37:55 +02:00
|
|
|
ports:
|
|
|
|
- "8050:8050"
|
|
|
|
- "8051:8051"
|
|
|
|
- "5023:5023"
|
|
|
|
volumes:
|
2017-01-21 21:05:30 +02:00
|
|
|
- ./data/filters:/etc/splash/filters
|
|
|
|
- ./data/js-profiles:/etc/splash/js-profiles
|
|
|
|
- ./data/lua_modules:/etc/splash/lua_modules
|
|
|
|
- ./data/proxy-profiles:/etc/splash/proxy-profiles
|
|
|
|
mem_limit: 4608M
|
2015-07-17 10:37:55 +02:00
|
|
|
restart: always
|
|
|
|
```
|
|
|
|
|
|
|
|
## server
|
|
|
|
|
|
|
|
```
|
|
|
|
$ cd ~/fig/splash
|
|
|
|
|
|
|
|
$ tree
|
|
|
|
.
|
|
|
|
├── docker-compose.yml
|
2017-01-06 12:13:03 +02:00
|
|
|
└── data
|
2015-07-17 10:37:55 +02:00
|
|
|
├── filters
|
|
|
|
│ ├── easylist.txt
|
|
|
|
│ └── default.txt
|
|
|
|
├── js-profiles
|
|
|
|
└── proxy-profiles
|
|
|
|
|
2017-01-06 12:13:03 +02:00
|
|
|
$ cat data/filters/default.txt
|
2015-07-17 10:37:55 +02:00
|
|
|
||fonts.googleapis.com^
|
|
|
|
||ajax.googleapis.com^
|
|
|
|
|
|
|
|
$ docker-compose up -d
|
|
|
|
```
|
|
|
|
|
|
|
|
> If `default.txt` file is present in `--filters-path` folder it is used by default
|
|
|
|
> when filters argument is not specified. Pass `filters=none` if you don’t want
|
|
|
|
> default filters to be applied.
|
|
|
|
|
|
|
|
## client
|
|
|
|
|
2018-02-07 05:06:05 +02:00
|
|
|
```lua
|
|
|
|
-- http-proxy.lua
|
|
|
|
|
|
|
|
function main(splash, args)
|
|
|
|
splash:on_request(function(request)
|
|
|
|
request:set_proxy{
|
|
|
|
host = "61.91.251.235",
|
|
|
|
port = 8080,
|
|
|
|
}
|
|
|
|
end)
|
|
|
|
assert(splash:go("http://ifconfig.co"))
|
|
|
|
assert(splash:wait(0.5))
|
|
|
|
return splash:png()
|
|
|
|
end
|
|
|
|
```
|
|
|
|
|
2018-02-05 10:01:39 +02:00
|
|
|
```lua
|
|
|
|
-- baidu-search.lua
|
|
|
|
|
|
|
|
function main(splash)
|
|
|
|
splash:go('https://www.baidu.com/')
|
|
|
|
splash:wait(1)
|
|
|
|
splash:select('#kw'):send_text('google')
|
|
|
|
splash:wait(1)
|
|
|
|
splash:select('#su'):click()
|
|
|
|
splash:wait(1)
|
|
|
|
splash:evaljs([[
|
|
|
|
$('#content_left').css('padding', '0');
|
|
|
|
]])
|
|
|
|
splash:set_viewport_full()
|
|
|
|
splash:wait(1)
|
|
|
|
return splash:select('#content_left'):png()
|
|
|
|
end
|
|
|
|
```
|
|
|
|
|
2017-01-06 12:13:03 +02:00
|
|
|
```lua
|
|
|
|
-- youtube-logo.lua
|
|
|
|
|
|
|
|
function main(splash)
|
|
|
|
splash:go('https://www.youtube.com/')
|
|
|
|
splash:wait(0.5)
|
|
|
|
local logo = splash:select('.logo')
|
|
|
|
return logo:png()
|
|
|
|
end
|
2015-07-17 10:37:55 +02:00
|
|
|
```
|
|
|
|
|
2017-01-21 21:05:30 +02:00
|
|
|
```lua
|
|
|
|
-- crop.lua
|
|
|
|
|
|
|
|
function main(splash)
|
|
|
|
local url = splash.args.url
|
|
|
|
local css1 = splash.args.css1
|
|
|
|
local css2 = splash.args.css2
|
|
|
|
assert(splash:go(url))
|
|
|
|
splash:set_viewport_full()
|
|
|
|
local box1 = splash:select(css1):bounds()
|
|
|
|
local box2 = splash:select(css2):bounds()
|
|
|
|
return splash:png{
|
|
|
|
region={
|
|
|
|
math.min(box1.left, box2.left),
|
|
|
|
math.min(box1.top, box2.top),
|
|
|
|
math.max(box1.right, box2.right),
|
|
|
|
math.max(box1.bottom, box2.bottom),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
```
|
|
|
|
|
2018-02-09 20:46:34 +02:00
|
|
|
View more [examples][2].
|
|
|
|
|
2017-01-06 12:13:03 +02:00
|
|
|
```bash
|
|
|
|
# whole page
|
2017-01-21 21:05:30 +02:00
|
|
|
$ http http://server:8050/render.png url==https://www.youtube.com/ > youtube.png
|
2017-01-06 12:13:03 +02:00
|
|
|
|
|
|
|
# only logo
|
|
|
|
$ http http://server:8050/execute lua_source=@youtube-logo.lua > youtube-logo.png
|
2017-01-21 21:05:30 +02:00
|
|
|
|
|
|
|
# only form
|
|
|
|
$ http http://server:8050/execute lua_source=@crop.lua \
|
|
|
|
url=https://www.facebook.com/ \
|
|
|
|
css1='#u_0_1' \
|
|
|
|
css2='#u_0_b' > facebook-form.png
|
2015-07-17 10:37:55 +02:00
|
|
|
```
|
|
|
|
|
|
|
|
[1]: http://splash.readthedocs.org/en/latest/
|
2018-02-09 20:46:34 +02:00
|
|
|
[2]: https://github.com/scrapinghub/splash/tree/master/splash/examples
|