diff --git a/HISTORY.rst b/HISTORY.rst index d4bfd1b893..0d04e872b8 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -32,7 +32,10 @@ 65535. * New default value for setting MIMETYPE_FILE_READ_SIZE is 1024. - +* Add workaround for Tesseract bug 1670 + https://github.com/tesseract-ocr/tesseract/issues/1670 + https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291 + https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104 3.1.11 (2019-04-XX) =================== diff --git a/docs/releases/3.2.rst b/docs/releases/3.2.rst index 565e89f8d8..78ffed157b 100644 --- a/docs/releases/3.2.rst +++ b/docs/releases/3.2.rst @@ -56,6 +56,11 @@ Other changes safe_dump to load and dump using the CSafeLoader and SafeLoader as fallback. * Add SilenceLoggerTestCaseMixin to lower level of loggers during tests. +* Add workaround for Tesseract bug 1670 + https://github.com/tesseract-ocr/tesseract/issues/1670 + https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291 + https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104 + Removals -------- diff --git a/docs/topics/troubleshooting.rst b/docs/topics/troubleshooting.rst index 34d72421de..fe7d4a8ce7 100644 --- a/docs/topics/troubleshooting.rst +++ b/docs/topics/troubleshooting.rst @@ -2,19 +2,6 @@ Troubleshooting ############### -*********** -Starting up -*********** - -Error !strcmp(locale, "C"):Error:Assert failed:in file baseapi.cpp, line 201 -============================================================================ -Cause by an issue with Tesseract 4.0 under Python 3. - -Solution:: - - export LC_ALL=C - - ******** Database diff --git a/mayan/apps/ocr/backends/pyocr.py b/mayan/apps/ocr/backends/pyocr.py index 9224d95293..d4e051e97b 100644 --- a/mayan/apps/ocr/backends/pyocr.py +++ b/mayan/apps/ocr/backends/pyocr.py @@ -1,5 +1,7 @@ from __future__ import absolute_import, unicode_literals +from contextlib import contextmanager +import locale import logging from PIL import Image @@ -12,6 +14,14 @@ from ..exceptions import OCRError logger = logging.getLogger(__name__) +@contextmanager +def c_locale(): + locale_current = locale.getlocale() + locale.setlocale(locale.LC_ALL, 'C') + yield + locale.setlocale(locale.LC_ALL, locale_current) + + class PyOCR(OCRBackendBase): def __init__(self, *args, **kwargs): super(PyOCR, self).__init__(*args, **kwargs) @@ -31,7 +41,9 @@ class PyOCR(OCRBackendBase): logger.debug('Will use tool \'%s\'', self.tool.get_name()) - self.languages = self.tool.get_available_languages() + with c_locale(): + self.languages = self.tool.get_available_languages() + logger.debug('Available languages: %s', ', '.join(self.languages)) def execute(self, *args, **kwargs): @@ -42,11 +54,12 @@ class PyOCR(OCRBackendBase): image = Image.open(self.converter.get_page()) try: - result = self.tool.image_to_string( - image, - lang=self.language, - builder=pyocr.builders.TextBuilder() - ) + with c_locale(): + result = self.tool.image_to_string( + image, + lang=self.language, + builder=pyocr.builders.TextBuilder() + ) except Exception as exception: error_message = ( 'Exception calling pyocr with language option: {}; {}'