diff --git a/scrapy/Dockerfile b/scrapy/Dockerfile index e5298b7..15d54e8 100644 --- a/scrapy/Dockerfile +++ b/scrapy/Dockerfile @@ -14,6 +14,7 @@ RUN apt-get update \ libtool \ python \ python-dev \ + vim-tiny \ && mkdir libxml2 \ && curl -sSL ftp://xmlsoft.org/libxml2/libxml2-2.9.2.tar.gz | tar xz --strip 1 -C libxml2 \ && cd libxml2 \ @@ -33,8 +34,9 @@ RUN apt-get update \ && cd .. \ && rm -rf libxslt \ && curl -sSL https://bootstrap.pypa.io/get-pip.py | python \ - && pip install scrapy==1.0.0rc1 \ + && pip install scrapy==0.24.6 \ && curl -sSL https://github.com/scrapy/scrapy/raw/master/extras/scrapy_bash_completion -o /etc/bash_completion.d/scrapy_bash_completion \ + && echo 'source /etc/bash_completion.d/scrapy_bash_completion' >> /root/.bashrc \ && apt-get remove -y autoconf \ build-essential \ libffi-dev \ diff --git a/scrapy/README.md b/scrapy/README.md index 3021e0c..179da75 100644 --- a/scrapy/README.md +++ b/scrapy/README.md @@ -3,12 +3,18 @@ `Scrapy`: An open source and collaborative framework for extracting the data you need from websites. In a fast, simple, yet extensible way. -You can create a scrapy (v1.0.0rc1) project on top of this image. +This image is based on `debian:jessie`, and it only takes 278.6 MB. +You can create a scrapy (v0.24.6) project on top of this image. ## HOW-TO ``` -docker run --name scrapy -it vimagick/scrapy +$ docker run --name scrapy -it vimagick/scrapy +>>> scrapy startproject demo +>>> cd demo +>>> scrapy genspider example example.com +>>> scrapy edit example +>>> scrapy crawl example ``` ## TODO-LIST