Add workaround for Tesseract bug 1670
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -32,7 +32,10 @@
|
|||||||
65535.
|
65535.
|
||||||
* New default value for setting MIMETYPE_FILE_READ_SIZE is
|
* New default value for setting MIMETYPE_FILE_READ_SIZE is
|
||||||
1024.
|
1024.
|
||||||
|
* Add workaround for Tesseract bug 1670
|
||||||
|
https://github.com/tesseract-ocr/tesseract/issues/1670
|
||||||
|
https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291
|
||||||
|
https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104
|
||||||
|
|
||||||
3.1.11 (2019-04-XX)
|
3.1.11 (2019-04-XX)
|
||||||
===================
|
===================
|
||||||
|
|||||||
@@ -56,6 +56,11 @@ Other changes
|
|||||||
safe_dump to load and dump using the CSafeLoader and SafeLoader as fallback.
|
safe_dump to load and dump using the CSafeLoader and SafeLoader as fallback.
|
||||||
* Add SilenceLoggerTestCaseMixin to lower level of loggers
|
* Add SilenceLoggerTestCaseMixin to lower level of loggers
|
||||||
during tests.
|
during tests.
|
||||||
|
* Add workaround for Tesseract bug 1670
|
||||||
|
https://github.com/tesseract-ocr/tesseract/issues/1670
|
||||||
|
https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291
|
||||||
|
https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104
|
||||||
|
|
||||||
|
|
||||||
Removals
|
Removals
|
||||||
--------
|
--------
|
||||||
|
|||||||
@@ -2,19 +2,6 @@
|
|||||||
Troubleshooting
|
Troubleshooting
|
||||||
###############
|
###############
|
||||||
|
|
||||||
***********
|
|
||||||
Starting up
|
|
||||||
***********
|
|
||||||
|
|
||||||
Error !strcmp(locale, "C"):Error:Assert failed:in file baseapi.cpp, line 201
|
|
||||||
============================================================================
|
|
||||||
Cause by an issue with Tesseract 4.0 under Python 3.
|
|
||||||
|
|
||||||
Solution::
|
|
||||||
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
********
|
********
|
||||||
Database
|
Database
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import absolute_import, unicode_literals
|
from __future__ import absolute_import, unicode_literals
|
||||||
|
|
||||||
|
from contextlib import contextmanager
|
||||||
|
import locale
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@@ -12,6 +14,14 @@ from ..exceptions import OCRError
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def c_locale():
|
||||||
|
locale_current = locale.getlocale()
|
||||||
|
locale.setlocale(locale.LC_ALL, 'C')
|
||||||
|
yield
|
||||||
|
locale.setlocale(locale.LC_ALL, locale_current)
|
||||||
|
|
||||||
|
|
||||||
class PyOCR(OCRBackendBase):
|
class PyOCR(OCRBackendBase):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(PyOCR, self).__init__(*args, **kwargs)
|
super(PyOCR, self).__init__(*args, **kwargs)
|
||||||
@@ -31,7 +41,9 @@ class PyOCR(OCRBackendBase):
|
|||||||
|
|
||||||
logger.debug('Will use tool \'%s\'', self.tool.get_name())
|
logger.debug('Will use tool \'%s\'', self.tool.get_name())
|
||||||
|
|
||||||
self.languages = self.tool.get_available_languages()
|
with c_locale():
|
||||||
|
self.languages = self.tool.get_available_languages()
|
||||||
|
|
||||||
logger.debug('Available languages: %s', ', '.join(self.languages))
|
logger.debug('Available languages: %s', ', '.join(self.languages))
|
||||||
|
|
||||||
def execute(self, *args, **kwargs):
|
def execute(self, *args, **kwargs):
|
||||||
@@ -42,11 +54,12 @@ class PyOCR(OCRBackendBase):
|
|||||||
|
|
||||||
image = Image.open(self.converter.get_page())
|
image = Image.open(self.converter.get_page())
|
||||||
try:
|
try:
|
||||||
result = self.tool.image_to_string(
|
with c_locale():
|
||||||
image,
|
result = self.tool.image_to_string(
|
||||||
lang=self.language,
|
image,
|
||||||
builder=pyocr.builders.TextBuilder()
|
lang=self.language,
|
||||||
)
|
builder=pyocr.builders.TextBuilder()
|
||||||
|
)
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
error_message = (
|
error_message = (
|
||||||
'Exception calling pyocr with language option: {}; {}'
|
'Exception calling pyocr with language option: {}; {}'
|
||||||
|
|||||||
Reference in New Issue
Block a user