PEP8 Cleanups, simple sintax errors fixes

This commit is contained in:
Roberto Rosario
2014-06-25 02:53:12 -04:00
parent 0209d13150
commit ac061f2203
33 changed files with 124 additions and 150 deletions

View File

@@ -4,14 +4,14 @@ import codecs
import os
import subprocess
import tempfile
import sys
from . import BackendBase
from ..conf.settings import TESSERACT_PATH
from ..exceptions import OCRError
def Tesseract(BackendBase):
def execute(input_filename, language=None):
class Tesseract(BackendBase):
def execute(self, input_filename, language=None):
"""
Execute the command line binary of tesseract
"""
@@ -20,7 +20,7 @@ def Tesseract(BackendBase):
ocr_output = os.extsep.join([filepath, u'txt'])
command = [unicode(TESSERACT_PATH), unicode(input_filename), unicode(filepath)]
if lang is not None:
if language is not None:
command.extend([u'-l', language])
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
@@ -29,12 +29,12 @@ def Tesseract(BackendBase):
error_text = proc.stderr.read()
cleanup(filepath)
cleanup(ocr_output)
if lang:
if language:
# If tesseract gives an error with a language parameter
# re-run it with no parameter again
return run_tesseract(input_filename, language=None)
return self.execute(input_filename, language=None)
else:
raise TesseractError(error_text)
raise OCRError(error_text)
fd = codecs.open(ocr_output, 'r', 'utf-8')
text = fd.read().strip()
@@ -43,3 +43,14 @@ def Tesseract(BackendBase):
os.unlink(filepath)
return text
# TODO: Reduntant, also in api.py
def cleanup(filename):
"""
Try to remove the given filename, ignoring non-existent files
"""
try:
os.remove(filename)
except OSError:
pass