Merge branch 'feature/polish_PR_52' into development

This commit is contained in:
Roberto Rosario
2014-09-30 15:48:42 -04:00
2 changed files with 32 additions and 21 deletions

View File

@@ -74,29 +74,32 @@ class Installation(SingletonModel):
def binary_dependencies(self):
namespace = PropertyNamespace('bins', _(u'Binary dependencies'))
tesseract = sh.Command(TESSERACT_PATH)
try:
namespace.add_property('tesseract', _(u'tesseract version'), tesseract('-v').stderr, report=True)
tesseract = sh.Command(TESSERACT_PATH)
except sh.CommandNotFound:
namespace.add_property('tesseract', _(u'tesseract version'), _(u'not found'), report=True)
except Exception:
namespace.add_property('tesseract', _(u'tesseract version'), _(u'error getting version'), report=True)
else:
namespace.add_property('tesseract', _(u'tesseract version'), tesseract('-v').stderr, report=True)
unpaper = sh.Command(UNPAPER_PATH)
try:
namespace.add_property('unpaper', _(u'unpaper version'), unpaper('-V').stdout, report=True)
unpaper = sh.Command(UNPAPER_PATH)
except sh.CommandNotFound:
namespace.add_property('unpaper', _(u'unpaper version'), _(u'not found'), report=True)
except Exception:
namespace.add_property('unpaper', _(u'unpaper version'), _(u'error getting version'), report=True)
else:
namespace.add_property('unpaper', _(u'unpaper version'), unpaper('-V').stdout, report=True)
pdftotext = sh.Command(PDFTOTEXT_PATH)
try:
namespace.add_property('pdftotext', _(u'pdftotext version'), pdftotext('-v').stderr, report=True)
pdftotext = sh.Command(PDFTOTEXT_PATH)
except sh.CommandNotFound:
namespace.add_property('pdftotext', _(u'pdftotext version'), _(u'not found'), report=True)
except Exception:
namespace.add_property('pdftotext', _(u'pdftotext version'), _(u'error getting version'), report=True)
else:
namespace.add_property('pdftotext', _(u'pdftotext version'), pdftotext('-v').stderr, report=True)
def mayan_properties(self):
namespace = PropertyNamespace('mayan', _(u'Mayan EDMS'))

View File

@@ -1,6 +1,7 @@
from __future__ import absolute_import
import codecs
import errno
import os
import subprocess
import tempfile
@@ -25,23 +26,30 @@ class Tesseract(BackendBase):
if language is not None:
command.extend([u'-l', language])
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
error_text = proc.stderr.read()
fs_cleanup(filepath)
fs_cleanup(ocr_output)
if language:
# If tesseract gives an error with a language parameter
# re-run it with no parameter again
return self.execute(input_filename, language=None)
try:
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
except OSError as exception:
if exception.errno == errno.ENOENT:
raise OCRError('Tesseract not found at %s' % TESSERACT_PATH)
else:
raise OCRError(error_text)
raise
else:
return_code = proc.wait()
if return_code != 0:
error_text = proc.stderr.read()
fs_cleanup(filepath)
fs_cleanup(ocr_output)
if language:
# If tesseract gives an error with a language parameter
# re-run it with no parameter again
return self.execute(input_filename, language=None)
else:
raise OCRError(error_text)
fd = codecs.open(ocr_output, 'r', 'utf-8')
text = fd.read().strip()
fd.close()
fd = codecs.open(ocr_output, 'r', 'utf-8')
text = fd.read().strip()
fd.close()
os.unlink(filepath)
os.unlink(filepath)
return text