diff --git a/scrapy/Dockerfile b/scrapy/Dockerfile deleted file mode 100644 index 15d54e8..0000000 --- a/scrapy/Dockerfile +++ /dev/null @@ -1,49 +0,0 @@ -# -# Dockerfile for scrapy -# - -FROM debian:jessie -MAINTAINER kev - -RUN apt-get update \ - && apt-get install -y autoconf \ - build-essential \ - curl \ - libffi-dev \ - libssl-dev \ - libtool \ - python \ - python-dev \ - vim-tiny \ - && mkdir libxml2 \ - && curl -sSL ftp://xmlsoft.org/libxml2/libxml2-2.9.2.tar.gz | tar xz --strip 1 -C libxml2 \ - && cd libxml2 \ - && ./configure --prefix=/usr \ - && make \ - && make install \ - && ldconfig \ - && cd .. \ - && rm -rf libxml2 \ - && mkdir libxslt \ - && curl -sSL https://git.gnome.org/browse/libxslt/snapshot/libxslt-1.1.28.tar.xz | tar xJ --strip 1 -C libxslt \ - && cd libxslt \ - && ./autogen.sh --prefix=/usr \ - && make \ - && make install \ - && ldconfig \ - && cd .. \ - && rm -rf libxslt \ - && curl -sSL https://bootstrap.pypa.io/get-pip.py | python \ - && pip install scrapy==0.24.6 \ - && curl -sSL https://github.com/scrapy/scrapy/raw/master/extras/scrapy_bash_completion -o /etc/bash_completion.d/scrapy_bash_completion \ - && echo 'source /etc/bash_completion.d/scrapy_bash_completion' >> /root/.bashrc \ - && apt-get remove -y autoconf \ - build-essential \ - libffi-dev \ - libssl-dev \ - libtool \ - python-dev \ - && apt-get autoremove -y \ - && rm -rf /var/lib/apt/lists/* - -CMD ["/bin/bash"] diff --git a/scrapy/Dockerfile.old b/scrapy/Dockerfile.old deleted file mode 100644 index d100de7..0000000 --- a/scrapy/Dockerfile.old +++ /dev/null @@ -1,12 +0,0 @@ -# -# Dockerfile for scrapy -# - -FROM ubuntu:14.04 -MAINTAINER kev - -RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 627220E7 -RUN echo 'deb http://archive.scrapy.org/ubuntu scrapy main' >/etc/apt/sources.list.d/scrapy.list -RUN apt-get update && apt-get install -y python-pip scrapy-0.24 scrapyd - -CMD bash diff --git a/scrapy/README.md b/scrapy/README.md deleted file mode 100644 index 179da75..0000000 --- a/scrapy/README.md +++ /dev/null @@ -1,27 +0,0 @@ -## WHAT-IS - -`Scrapy`: An open source and collaborative framework for extracting the data -you need from websites. In a fast, simple, yet extensible way. - -This image is based on `debian:jessie`, and it only takes 278.6 MB. -You can create a scrapy (v0.24.6) project on top of this image. - -## HOW-TO - -``` -$ docker run --name scrapy -it vimagick/scrapy ->>> scrapy startproject demo ->>> cd demo ->>> scrapy genspider example example.com ->>> scrapy edit example ->>> scrapy crawl example -``` - -## TODO-LIST - -- [x] build [libxml2][1]/[libxslt][2] from source -- [x] add [scrapy_bash_completion][3] script - -[1]: http://www.xmlsoft.org/downloads.html -[2]: http://git.gnome.org/browse/libxslt/ -[3]: https://github.com/scrapy/scrapy/raw/master/extras/scrapy_bash_completion