Add workaround for Tesseract bug 1670

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
2019-04-12 05:27:27 -04:00
parent 93c4814a89
commit 72311c73b5
4 changed files with 28 additions and 20 deletions
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -32,7 +32,10 @@
  65535.
 * New default value for setting MIMETYPE_FILE_READ_SIZE is
  1024.
-
+* Add workaround for Tesseract bug 1670
+  https://github.com/tesseract-ocr/tesseract/issues/1670
+  https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291
+  https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104

 3.1.11 (2019-04-XX)
 ===================
--- a/docs/releases/3.2.rst
+++ b/docs/releases/3.2.rst
@@ -56,6 +56,11 @@ Other changes
  safe_dump to load and dump using the CSafeLoader and SafeLoader as fallback.
 * Add SilenceLoggerTestCaseMixin to lower level of loggers
  during tests.
+* Add workaround for Tesseract bug 1670
+  https://github.com/tesseract-ocr/tesseract/issues/1670
+  https://github.com/tesseract-ocr/tesseract/commit/3292484f67af8bdda23aa5e510918d0115785291
+  https://gitlab.gnome.org/World/OpenPaperwork/pyocr/issues/104
+

 Removals
 --------
--- a/docs/topics/troubleshooting.rst
+++ b/docs/topics/troubleshooting.rst
@@ -2,19 +2,6 @@
 Troubleshooting
 ###############

-***********
-Starting up
-***********
-
-Error !strcmp(locale, "C"):Error:Assert failed:in file baseapi.cpp, line 201
-============================================================================
-Cause by an issue with Tesseract 4.0 under Python 3.
-
-Solution::
-
-    export LC_ALL=C
-
-

 ********
 Database
--- a/mayan/apps/ocr/backends/pyocr.py
+++ b/mayan/apps/ocr/backends/pyocr.py
@@ -1,5 +1,7 @@
 from __future__ import absolute_import, unicode_literals

+from contextlib import contextmanager
+import locale
 import logging

 from PIL import Image
@@ -12,6 +14,14 @@ from ..exceptions import OCRError
 logger = logging.getLogger(__name__)


+@contextmanager
+def c_locale():
+    locale_current = locale.getlocale()
+    locale.setlocale(locale.LC_ALL, 'C')
+    yield
+    locale.setlocale(locale.LC_ALL, locale_current)
+
+
 class PyOCR(OCRBackendBase):
    def __init__(self, *args, **kwargs):
        super(PyOCR, self).__init__(*args, **kwargs)
@@ -31,7 +41,9 @@ class PyOCR(OCRBackendBase):

        logger.debug('Will use tool \'%s\'', self.tool.get_name())

-        self.languages = self.tool.get_available_languages()
+        with c_locale():
+            self.languages = self.tool.get_available_languages()
+
        logger.debug('Available languages: %s', ', '.join(self.languages))

    def execute(self, *args, **kwargs):
@@ -42,11 +54,12 @@ class PyOCR(OCRBackendBase):

        image = Image.open(self.converter.get_page())
        try:
-            result = self.tool.image_to_string(
-                image,
-                lang=self.language,
-                builder=pyocr.builders.TextBuilder()
-            )
+            with c_locale():
+                result = self.tool.image_to_string(
+                    image,
+                    lang=self.language,
+                    builder=pyocr.builders.TextBuilder()
+                )
        except Exception as exception:
            error_message = (
                'Exception calling pyocr with language option: {}; {}'