Add new OCR backend using PyOCR. Remove current direct call Tesseract backend.
This commit is contained in:
@@ -1,59 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
import sh
|
||||
|
||||
from PIL import Image
|
||||
import pytesseract
|
||||
|
||||
from ..classes import OCRBackendBase
|
||||
from ..exceptions import OCRError
|
||||
from ..settings import setting_tesseract_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Tesseract(OCRBackendBase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Tesseract, self).__init__(*args, **kwargs)
|
||||
try:
|
||||
self.binary = sh.Command(setting_tesseract_path.value)
|
||||
except sh.CommandNotFound:
|
||||
self.binary = None
|
||||
|
||||
def get_languages(self):
|
||||
if self.binary:
|
||||
result = self.binary(list_langs=True)
|
||||
|
||||
return [
|
||||
language for language in result.stderr.split('\n') if language
|
||||
]
|
||||
else:
|
||||
return ()
|
||||
|
||||
def execute(self, *args, **kwargs):
|
||||
"""
|
||||
Execute the command line binary of tesseract
|
||||
"""
|
||||
super(Tesseract, self).execute(*args, **kwargs)
|
||||
|
||||
# TODO: pass tesseract binary path to the pytesseract
|
||||
image = Image.open(self.converter.get_page())
|
||||
try:
|
||||
result = pytesseract.image_to_string(
|
||||
image=image, lang=self.language
|
||||
)
|
||||
# If tesseract gives an error with a language parameter
|
||||
# re-run it with no language parameter
|
||||
except Exception as exception:
|
||||
error_message = 'Exception calling pytesseract with language option: {}; {}'.format(self.language, exception)
|
||||
|
||||
if self.binary:
|
||||
if self.language not in self.get_languages():
|
||||
error_message = '{}\nThe requested Tesseract language file for "{}" is not available and needs to be installed.\nIf using Debian or Ubuntu run: apt-get install tesseract-ocr-{}'.format(error_message, self.language, self.language)
|
||||
|
||||
logger.error(error_message)
|
||||
raise OCRError(error_message)
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user