From 4968051b6dab4b9b2347633cb1b02cc64bd5f8c0 Mon Sep 17 00:00:00 2001
From: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
Date: Tue, 30 Sep 2014 15:41:00 -0400
Subject: [PATCH] Don't silence OCR errors even if Tesseract is optional
 otherwise the user won't know happened. Catch the OSError generic exception
 and return a friendlier "Tesseract not found" exception

---
 mayan/apps/ocr/backends/tesseract.py | 38 +++++++++++++++++-----------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/mayan/apps/ocr/backends/tesseract.py b/mayan/apps/ocr/backends/tesseract.py
index ba679c9b02..51cef200fb 100644
--- a/mayan/apps/ocr/backends/tesseract.py
+++ b/mayan/apps/ocr/backends/tesseract.py
@@ -1,6 +1,7 @@
 from __future__ import absolute_import
 
 import codecs
+import errno
 import os
 import subprocess
 import tempfile
@@ -25,23 +26,30 @@ class Tesseract(BackendBase):
         if language is not None:
             command.extend([u'-l', language])
 
-        proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-        return_code = proc.wait()
-        if return_code != 0:
-            error_text = proc.stderr.read()
-            fs_cleanup(filepath)
-            fs_cleanup(ocr_output)
-            if language:
-                # If tesseract gives an error with a language parameter
-                # re-run it with no parameter again
-                return self.execute(input_filename, language=None)
+        try:
+            proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+        except OSError as exception:
+            if exception.errno == errno.ENOENT:
+                raise OCRError('Tesseract not found at %s' % TESSERACT_PATH)
             else:
-                raise OCRError(error_text)
+                raise
+        else:
+            return_code = proc.wait()
+            if return_code != 0:
+                error_text = proc.stderr.read()
+                fs_cleanup(filepath)
+                fs_cleanup(ocr_output)
+                if language:
+                    # If tesseract gives an error with a language parameter
+                    # re-run it with no parameter again
+                    return self.execute(input_filename, language=None)
+                else:
+                    raise OCRError(error_text)
 
-        fd = codecs.open(ocr_output, 'r', 'utf-8')
-        text = fd.read().strip()
-        fd.close()
+            fd = codecs.open(ocr_output, 'r', 'utf-8')
+            text = fd.read().strip()
+            fd.close()
 
-        os.unlink(filepath)
+            os.unlink(filepath)
 
         return text