2
0
mirror of https://github.com/vimagick/dockerfiles.git synced 2025-12-24 06:28:23 +01:00

update tesseract

This commit is contained in:
kev
2019-12-07 13:00:54 +08:00
parent 0b1a12ca76
commit e0327b03be
2 changed files with 14 additions and 40 deletions

View File

@@ -1,45 +1,19 @@
#
# Dockerfile for tesseract
# Dockerfile for tesseract CJK
#
FROM debian:jessie
FROM alpine
MAINTAINER kev <noreply@easypi.pro>
RUN set -xe \
&& apt-get update \
&& apt-get install -y autoconf \
build-essential \
git \
libcairo2 \
libcairo2-dev \
libgomp1 \
libicu52 \
libicu-dev \
liblept4 \
libleptonica-dev \
libpango1.0-0 \
libpango1.0-dev \
libtool \
&& git clone https://github.com/tesseract-ocr/tesseract.git \
&& cd tesseract \
&& ./autogen.sh \
&& ./configure \
&& make install \
&& cd .. \
&& git clone https://github.com/tesseract-ocr/tessdata.git \
&& cd tessdata \
&& mv * /usr/local/share/tessdata/ \
&& cd .. \
&& apt-get purge --auto-remove -y autoconf \
build-essential \
git \
libcairo2-dev \
libicu-dev \
libleptonica-dev \
libpango1.0-dev \
libtool \
&& rm -rf tesseract tessdata /var/cache/apk/*
&& apk add --no-cache \
tesseract-ocr \
tesseract-ocr-data-chi_sim \
tesseract-ocr-data-chi_tra \
tesseract-ocr-data-jpn \
tesseract-ocr-data-kor \
&& tesseract --version \
&& tesseract --list-langs
ENTRYPOINT ["tesseract"]
CMD ["-h"]
CMD ["--help"]

View File

@@ -14,9 +14,9 @@ Quick Start
-----------
```
$ alias tesseract='docker run --rm -v `pwd`:/work -w /work vimagick/tesseract'
$ tesseract myscan.png out
$ cat out.txt
$ alias tesseract='docker run --rm -v `pwd`:/data -w /data vimagick/tesseract'
$ tesseract input.png output -l eng --psm 3
$ cat output.txt
```
[1]: https://github.com/tesseract-ocr/tesseract