mirror of
https://github.com/vimagick/dockerfiles.git
synced 2025-05-13 22:06:55 +02:00
add scrapyd
This commit is contained in:
parent
99a91fff79
commit
43dd686dd5
2
.dockerignore
Normal file
2
.dockerignore
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.git
|
||||||
|
.gitignore
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*~
|
||||||
|
*.swp
|
2
scrapyd/001-scrapyd
Normal file
2
scrapyd/001-scrapyd
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
[scrapyd]
|
||||||
|
items_dir =
|
42
scrapyd/Dockerfile
Normal file
42
scrapyd/Dockerfile
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#
|
||||||
|
# Dockerfile for scraypd
|
||||||
|
#
|
||||||
|
# References:
|
||||||
|
# - http://docs.docker.com/reference/builder/
|
||||||
|
# - http://doc.scrapy.org/en/latest/topics/ubuntu.html#topics-ubuntu
|
||||||
|
# - https://github.com/scrapy/scrapyd/blob/master/debian/scrapyd.upstart#L9-L11
|
||||||
|
# - http://pip.readthedocs.org/en/latest/installing.html
|
||||||
|
# - http://supervisord.org/index.html
|
||||||
|
#
|
||||||
|
|
||||||
|
FROM ubuntu:14.04
|
||||||
|
MAINTAINER kev
|
||||||
|
|
||||||
|
RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 627220E7
|
||||||
|
RUN echo 'deb http://archive.scrapy.org/ubuntu scrapy main' >/etc/apt/sources.list.d/scrapy.list
|
||||||
|
RUN apt-get update &&\
|
||||||
|
apt-get install -y\
|
||||||
|
git\
|
||||||
|
libpq-dev\
|
||||||
|
build-essential\
|
||||||
|
python-dev\
|
||||||
|
python-pip\
|
||||||
|
python-numpy\
|
||||||
|
python-txzmq\
|
||||||
|
scrapy-0.24\
|
||||||
|
scrapyd &&\
|
||||||
|
apt-get clean
|
||||||
|
RUN rm -rf /var/lib/apt/lists/*
|
||||||
|
RUN mkdir /var/log/supervisor/
|
||||||
|
|
||||||
|
ADD ./001-scrapyd /etc/scrapyd/conf.d/
|
||||||
|
ADD ./requirements.txt /tmp/
|
||||||
|
ADD ./supervisord.conf /etc/supervisor/supervisord.conf
|
||||||
|
|
||||||
|
WORKDIR /tmp
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
|
EXPOSE 6800 9001
|
||||||
|
|
||||||
|
CMD supervisord -c /etc/supervisor/supervisord.conf
|
||||||
|
|
17
scrapyd/README.md
Normal file
17
scrapyd/README.md
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
docker-scrapyd
|
||||||
|
==============
|
||||||
|
|
||||||
|
Dockerfile for building an image that runs [scrapyd][1].
|
||||||
|
Then monitor it with [supervisor][2].
|
||||||
|
|
||||||
|
## Building
|
||||||
|
|
||||||
|
$ docker build -t scrapyd .
|
||||||
|
|
||||||
|
## Running
|
||||||
|
|
||||||
|
$ docker run -p 6800:6800 -p 9001:9001 scrapyd
|
||||||
|
|
||||||
|
[1]: https://github.com/scrapy/scrapyd
|
||||||
|
[2]: http://admin:admin@localhost:9001
|
||||||
|
|
21
scrapyd/requirements.txt
Normal file
21
scrapyd/requirements.txt
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
--allow-all-external
|
||||||
|
--allow-unverified jsonpath
|
||||||
|
|
||||||
|
# parser
|
||||||
|
jsonpath
|
||||||
|
jsonschema
|
||||||
|
pyquery
|
||||||
|
pyparsing
|
||||||
|
git+https://github.com/scrapy/scrapely
|
||||||
|
|
||||||
|
# database
|
||||||
|
redis
|
||||||
|
pymongo
|
||||||
|
psycopg2
|
||||||
|
|
||||||
|
# others
|
||||||
|
requests
|
||||||
|
chardet
|
||||||
|
toolz
|
||||||
|
supervisor
|
||||||
|
|
20
scrapyd/supervisord.conf
Normal file
20
scrapyd/supervisord.conf
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
[inet_http_server]
|
||||||
|
port = :9001
|
||||||
|
username = admin
|
||||||
|
password = {SHA}d033e22ae348aeb5660fc2140aec35850c4da997
|
||||||
|
|
||||||
|
[rpcinterface:supervisor]
|
||||||
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
||||||
|
|
||||||
|
[supervisord]
|
||||||
|
logfile=/var/log/supervisor/supervisord.log
|
||||||
|
pidfile=/var/run/supervisord.pid
|
||||||
|
childlogdir=/var/log/supervisor
|
||||||
|
nodaemon=true
|
||||||
|
|
||||||
|
[supervisorctl]
|
||||||
|
serverurl=http://localhost:9001
|
||||||
|
|
||||||
|
[program:scrapyd]
|
||||||
|
command = /usr/bin/scrapyd -u scrapy -g nogroup --pidfile /var/run/scrapyd.pid -l /var/log/scrapyd/scrapyd.log
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user