diff --git a/Dockerfile b/Dockerfile index f498a09..79a3e16 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,50 +5,18 @@ # Developed by Mark Peng (markpeng.ntu at gmail) # -FROM ubuntu:12.04 +FROM debian:sid MAINTAINER guitarmind RUN apt-get update && apt-get install -y \ - autoconf \ - automake \ - autotools-dev \ - build-essential \ - checkinstall \ - libjpeg-dev \ - libpng-dev \ - libtiff-dev \ - libtool \ python \ python-imaging \ python-tornado \ - wget \ - zlib1g-dev + tesseract-ocr \ + tesseract-ocr-eng -RUN mkdir ~/temp \ - && cd ~/temp/ \ - && wget http://www.leptonica.org/source/leptonica-1.69.tar.gz \ - && tar -zxvf leptonica-1.69.tar.gz \ - && cd leptonica-1.69 \ - && ./configure \ - && make \ - && checkinstall \ - && ldconfig - -RUN cd ~/temp/ \ - && wget https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz \ - && tar xvf tesseract-ocr-3.02.02.tar.gz \ - && cd tesseract-ocr \ - && ./autogen.sh \ - && mkdir ~/local \ - && ./configure --prefix=$HOME/local/ \ - && make \ - && make install \ - && cd ~/local/share \ - && wget https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eng.tar.gz \ - && tar xvf tesseract-ocr-3.02.eng.tar.gz - -ENV TESSDATA_PREFIX /root/local/share/tesseract-ocr +ENV TESSDATA_PREFIX /usr/share/tesseract-ocr RUN mkdir -p /opt/ocr/static @@ -61,5 +29,4 @@ EXPOSE 1688 WORKDIR /opt/ocr -CMD ["python", "/opt/ocr/tesseractserver.py", "-p", "1688", "-b", "/root/local/lib", "-d", "/root/local/share/tesseract-ocr" ] - +CMD ["python", "/opt/ocr/tesseractserver.py", "-p", "1688", "-b", "/usr/lib", "-d", "/usr/share/tesseract-ocr" ] diff --git a/tesseractcapi.py b/tesseractcapi.py index 2681d8d..52392bd 100644 --- a/tesseractcapi.py +++ b/tesseractcapi.py @@ -32,23 +32,8 @@ """ class TesseactWrapper: def __init__(self, lang, libpath, tessdata): - libname_302 = libpath + "/libtesseract.so.3.0.2" - libname_303 = libpath + "/libtesseract.so.3.0.3" - libname_alt = "/libtesseract.so.3" - - try: - self.tesseract = ctypes.cdll.LoadLibrary(libname_302) - except: - try: - self.tesseract = ctypes.cdll.LoadLibrary(libname_303) - except: - try: - self.tesseract = ctypes.cdll.LoadLibrary(libname_alt) - except: - print("Trying to load '%s'..." % libname) - print("Trying to load '%s'..." % libname_alt) - print("Loading failed from the locations above.") - exit(1) + libname = libpath + '/libtesseract.so.3' + self.tesseract = ctypes.cdll.LoadLibrary(libname) self.tesseract.TessVersion.restype = ctypes.c_char_p tesseract_version = self.tesseract.TessVersion() @@ -59,10 +44,10 @@ def __init__(self, lang, libpath, tessdata): trimmed_version = tesseract_version[:(tesseract_version.index('.') + 3)] # We need to check library version because libtesseract.so.3 is symlink - # and can point to other version than 3.02 + # and can point to other version before 3.02 if float(trimmed_version) < 3.02: print("Found tesseract-ocr library version %s." % tesseract_version) - print("C-API is present only in version 3.02!") + print("C-API is present only in version above 3.02!") exit(2) self.api = self.tesseract.TessBaseAPICreate()