mirror of
https://github.com/vimagick/dockerfiles.git
synced 2024-11-21 17:56:53 +02:00
add scrapyd
This commit is contained in:
parent
99a91fff79
commit
43dd686dd5
2
.dockerignore
Normal file
2
.dockerignore
Normal file
@ -0,0 +1,2 @@
|
||||
.git
|
||||
.gitignore
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
*~
|
||||
*.swp
|
2
scrapyd/001-scrapyd
Normal file
2
scrapyd/001-scrapyd
Normal file
@ -0,0 +1,2 @@
|
||||
[scrapyd]
|
||||
items_dir =
|
42
scrapyd/Dockerfile
Normal file
42
scrapyd/Dockerfile
Normal file
@ -0,0 +1,42 @@
|
||||
#
|
||||
# Dockerfile for scraypd
|
||||
#
|
||||
# References:
|
||||
# - http://docs.docker.com/reference/builder/
|
||||
# - http://doc.scrapy.org/en/latest/topics/ubuntu.html#topics-ubuntu
|
||||
# - https://github.com/scrapy/scrapyd/blob/master/debian/scrapyd.upstart#L9-L11
|
||||
# - http://pip.readthedocs.org/en/latest/installing.html
|
||||
# - http://supervisord.org/index.html
|
||||
#
|
||||
|
||||
FROM ubuntu:14.04
|
||||
MAINTAINER kev
|
||||
|
||||
RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 627220E7
|
||||
RUN echo 'deb http://archive.scrapy.org/ubuntu scrapy main' >/etc/apt/sources.list.d/scrapy.list
|
||||
RUN apt-get update &&\
|
||||
apt-get install -y\
|
||||
git\
|
||||
libpq-dev\
|
||||
build-essential\
|
||||
python-dev\
|
||||
python-pip\
|
||||
python-numpy\
|
||||
python-txzmq\
|
||||
scrapy-0.24\
|
||||
scrapyd &&\
|
||||
apt-get clean
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
RUN mkdir /var/log/supervisor/
|
||||
|
||||
ADD ./001-scrapyd /etc/scrapyd/conf.d/
|
||||
ADD ./requirements.txt /tmp/
|
||||
ADD ./supervisord.conf /etc/supervisor/supervisord.conf
|
||||
|
||||
WORKDIR /tmp
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
EXPOSE 6800 9001
|
||||
|
||||
CMD supervisord -c /etc/supervisor/supervisord.conf
|
||||
|
17
scrapyd/README.md
Normal file
17
scrapyd/README.md
Normal file
@ -0,0 +1,17 @@
|
||||
docker-scrapyd
|
||||
==============
|
||||
|
||||
Dockerfile for building an image that runs [scrapyd][1].
|
||||
Then monitor it with [supervisor][2].
|
||||
|
||||
## Building
|
||||
|
||||
$ docker build -t scrapyd .
|
||||
|
||||
## Running
|
||||
|
||||
$ docker run -p 6800:6800 -p 9001:9001 scrapyd
|
||||
|
||||
[1]: https://github.com/scrapy/scrapyd
|
||||
[2]: http://admin:admin@localhost:9001
|
||||
|
21
scrapyd/requirements.txt
Normal file
21
scrapyd/requirements.txt
Normal file
@ -0,0 +1,21 @@
|
||||
--allow-all-external
|
||||
--allow-unverified jsonpath
|
||||
|
||||
# parser
|
||||
jsonpath
|
||||
jsonschema
|
||||
pyquery
|
||||
pyparsing
|
||||
git+https://github.com/scrapy/scrapely
|
||||
|
||||
# database
|
||||
redis
|
||||
pymongo
|
||||
psycopg2
|
||||
|
||||
# others
|
||||
requests
|
||||
chardet
|
||||
toolz
|
||||
supervisor
|
||||
|
20
scrapyd/supervisord.conf
Normal file
20
scrapyd/supervisord.conf
Normal file
@ -0,0 +1,20 @@
|
||||
[inet_http_server]
|
||||
port = :9001
|
||||
username = admin
|
||||
password = {SHA}d033e22ae348aeb5660fc2140aec35850c4da997
|
||||
|
||||
[rpcinterface:supervisor]
|
||||
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
||||
|
||||
[supervisord]
|
||||
logfile=/var/log/supervisor/supervisord.log
|
||||
pidfile=/var/run/supervisord.pid
|
||||
childlogdir=/var/log/supervisor
|
||||
nodaemon=true
|
||||
|
||||
[supervisorctl]
|
||||
serverurl=http://localhost:9001
|
||||
|
||||
[program:scrapyd]
|
||||
command = /usr/bin/scrapyd -u scrapy -g nogroup --pidfile /var/run/scrapyd.pid -l /var/log/scrapyd/scrapyd.log
|
||||
|
Loading…
Reference in New Issue
Block a user