Add workaround for Tesseract bug 1670

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2019-04-12 05:27:27 -04:00
parent 93c4814a89
commit 72311c73b5
4 changed files with 28 additions and 20 deletions

View File

@@ -32,7 +32,10 @@
65535.
* New default value for setting MIMETYPE_FILE_READ_SIZE is
1024.
* Add workaround for Tesseract bug 1670
https://github.com/tesseract-ocr/tesseract/issues/1670
https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291
https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104
3.1.11 (2019-04-XX)
===================

View File

@@ -56,6 +56,11 @@ Other changes
safe_dump to load and dump using the CSafeLoader and SafeLoader as fallback.
* Add SilenceLoggerTestCaseMixin to lower level of loggers
during tests.
* Add workaround for Tesseract bug 1670
https://github.com/tesseract-ocr/tesseract/issues/1670
https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291
https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104
Removals
--------

View File

@@ -2,19 +2,6 @@
Troubleshooting
###############
***********
Starting up
***********
Error !strcmp(locale, "C"):Error:Assert failed:in file baseapi.cpp, line 201
============================================================================
Cause by an issue with Tesseract 4.0 under Python 3.
Solution::
export LC_ALL=C
********
Database

View File

@@ -1,5 +1,7 @@
from __future__ import absolute_import, unicode_literals
from contextlib import contextmanager
import locale
import logging
from PIL import Image
@@ -12,6 +14,14 @@ from ..exceptions import OCRError
logger = logging.getLogger(__name__)
@contextmanager
def c_locale():
locale_current = locale.getlocale()
locale.setlocale(locale.LC_ALL, 'C')
yield
locale.setlocale(locale.LC_ALL, locale_current)
class PyOCR(OCRBackendBase):
def __init__(self, *args, **kwargs):
super(PyOCR, self).__init__(*args, **kwargs)
@@ -31,7 +41,9 @@ class PyOCR(OCRBackendBase):
logger.debug('Will use tool \'%s\'', self.tool.get_name())
self.languages = self.tool.get_available_languages()
with c_locale():
self.languages = self.tool.get_available_languages()
logger.debug('Available languages: %s', ', '.join(self.languages))
def execute(self, *args, **kwargs):
@@ -42,11 +54,12 @@ class PyOCR(OCRBackendBase):
image = Image.open(self.converter.get_page())
try:
result = self.tool.image_to_string(
image,
lang=self.language,
builder=pyocr.builders.TextBuilder()
)
with c_locale():
result = self.tool.image_to_string(
image,
lang=self.language,
builder=pyocr.builders.TextBuilder()
)
except Exception as exception:
error_message = (
'Exception calling pyocr with language option: {}; {}'