From 4968051b6dab4b9b2347633cb1b02cc64bd5f8c0 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 30 Sep 2014 15:41:00 -0400 Subject: [PATCH] Don't silence OCR errors even if Tesseract is optional otherwise the user won't know happened. Catch the OSError generic exception and return a friendlier "Tesseract not found" exception --- mayan/apps/ocr/backends/tesseract.py | 38 +++++++++++++++++----------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/mayan/apps/ocr/backends/tesseract.py b/mayan/apps/ocr/backends/tesseract.py index ba679c9b02..51cef200fb 100644 --- a/mayan/apps/ocr/backends/tesseract.py +++ b/mayan/apps/ocr/backends/tesseract.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import codecs +import errno import os import subprocess import tempfile @@ -25,23 +26,30 @@ class Tesseract(BackendBase): if language is not None: command.extend([u'-l', language]) - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - error_text = proc.stderr.read() - fs_cleanup(filepath) - fs_cleanup(ocr_output) - if language: - # If tesseract gives an error with a language parameter - # re-run it with no parameter again - return self.execute(input_filename, language=None) + try: + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + except OSError as exception: + if exception.errno == errno.ENOENT: + raise OCRError('Tesseract not found at %s' % TESSERACT_PATH) else: - raise OCRError(error_text) + raise + else: + return_code = proc.wait() + if return_code != 0: + error_text = proc.stderr.read() + fs_cleanup(filepath) + fs_cleanup(ocr_output) + if language: + # If tesseract gives an error with a language parameter + # re-run it with no parameter again + return self.execute(input_filename, language=None) + else: + raise OCRError(error_text) - fd = codecs.open(ocr_output, 'r', 'utf-8') - text = fd.read().strip() - fd.close() + fd = codecs.open(ocr_output, 'r', 'utf-8') + text = fd.read().strip() + fd.close() - os.unlink(filepath) + os.unlink(filepath) return text