Add workaround for Tesseract bug 1670
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -32,7 +32,10 @@
|
||||
65535.
|
||||
* New default value for setting MIMETYPE_FILE_READ_SIZE is
|
||||
1024.
|
||||
|
||||
* Add workaround for Tesseract bug 1670
|
||||
https://github.com/tesseract-ocr/tesseract/issues/1670
|
||||
https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291
|
||||
https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104
|
||||
|
||||
3.1.11 (2019-04-XX)
|
||||
===================
|
||||
|
||||
@@ -56,6 +56,11 @@ Other changes
|
||||
safe_dump to load and dump using the CSafeLoader and SafeLoader as fallback.
|
||||
* Add SilenceLoggerTestCaseMixin to lower level of loggers
|
||||
during tests.
|
||||
* Add workaround for Tesseract bug 1670
|
||||
https://github.com/tesseract-ocr/tesseract/issues/1670
|
||||
https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291
|
||||
https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104
|
||||
|
||||
|
||||
Removals
|
||||
--------
|
||||
|
||||
@@ -2,19 +2,6 @@
|
||||
Troubleshooting
|
||||
###############
|
||||
|
||||
***********
|
||||
Starting up
|
||||
***********
|
||||
|
||||
Error !strcmp(locale, "C"):Error:Assert failed:in file baseapi.cpp, line 201
|
||||
============================================================================
|
||||
Cause by an issue with Tesseract 4.0 under Python 3.
|
||||
|
||||
Solution::
|
||||
|
||||
export LC_ALL=C
|
||||
|
||||
|
||||
|
||||
********
|
||||
Database
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from contextlib import contextmanager
|
||||
import locale
|
||||
import logging
|
||||
|
||||
from PIL import Image
|
||||
@@ -12,6 +14,14 @@ from ..exceptions import OCRError
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def c_locale():
|
||||
locale_current = locale.getlocale()
|
||||
locale.setlocale(locale.LC_ALL, 'C')
|
||||
yield
|
||||
locale.setlocale(locale.LC_ALL, locale_current)
|
||||
|
||||
|
||||
class PyOCR(OCRBackendBase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(PyOCR, self).__init__(*args, **kwargs)
|
||||
@@ -31,7 +41,9 @@ class PyOCR(OCRBackendBase):
|
||||
|
||||
logger.debug('Will use tool \'%s\'', self.tool.get_name())
|
||||
|
||||
self.languages = self.tool.get_available_languages()
|
||||
with c_locale():
|
||||
self.languages = self.tool.get_available_languages()
|
||||
|
||||
logger.debug('Available languages: %s', ', '.join(self.languages))
|
||||
|
||||
def execute(self, *args, **kwargs):
|
||||
@@ -42,11 +54,12 @@ class PyOCR(OCRBackendBase):
|
||||
|
||||
image = Image.open(self.converter.get_page())
|
||||
try:
|
||||
result = self.tool.image_to_string(
|
||||
image,
|
||||
lang=self.language,
|
||||
builder=pyocr.builders.TextBuilder()
|
||||
)
|
||||
with c_locale():
|
||||
result = self.tool.image_to_string(
|
||||
image,
|
||||
lang=self.language,
|
||||
builder=pyocr.builders.TextBuilder()
|
||||
)
|
||||
except Exception as exception:
|
||||
error_message = (
|
||||
'Exception calling pyocr with language option: {}; {}'
|
||||
|
||||
Reference in New Issue
Block a user