diff --git a/mayan/apps/ocr/tests.py b/mayan/apps/ocr/tests.py index 5db2166e10..dbe628b012 100644 --- a/mayan/apps/ocr/tests.py +++ b/mayan/apps/ocr/tests.py @@ -9,7 +9,7 @@ from django.utils import unittest from documents.models import Document, DocumentType from .api import do_document_ocr -from .models import DocumentQueue +from .models import DocumentQueue, QueueDocument TEST_DOCUMENT_PATH = os.path.join(settings.BASE_DIR, 'contrib', 'sample_documents', 'title_page.png') @@ -18,11 +18,7 @@ class DocumentSearchTestCase(unittest.TestCase): def setUp(self): # Start the OCR queue self.default_queue = DocumentQueue.objects.get(name='default') - - def test_do_document_ocr(self): - self.document_type = DocumentType(name='test doc type') - self.document_type.save() - + self.document_type = DocumentType.objects.create(name='test doc type') self.document = Document( document_type=self.document_type, description='description', @@ -32,11 +28,57 @@ class DocumentSearchTestCase(unittest.TestCase): with open(TEST_DOCUMENT_PATH) as file_object: self.document.new_version(file=File(file_object, name='title_page.png')) + # Clear OCR queue + QueueDocument.objects.all().delete() + + def reload_ocr_runtime(self, language): + """ + Forces the reloading of the language_backend for different languages + """ + + from ocr.conf import settings + from ocr import runtime + + setattr(settings, 'LANGUAGE', language) + + reload(runtime) + from .runtime import language_backend + self.assertEqual(unicode(language_backend.__class__), u"".format(language)) + + def _test_ocr_language_issue_16(self, language): + """ + Reusable OCR test for a specific language + """ + + self.reload_ocr_runtime(language) + + # Clear the document's first page content + first_page = self.document.pages.first() + first_page.content = '' + first_page.save() + + # Make sure no documents are queued for OCR + self.failUnlessEqual(self.default_queue.queuedocument_set.count(), 0) + DocumentQueue.objects.queue_document(self.document) + # Make sure our document is queued for OCR self.failUnlessEqual(self.default_queue.queuedocument_set.count(), 1) - do_document_ocr(self.default_queue.queuedocument_set.all()[0]) + do_document_ocr(self.default_queue.queuedocument_set.first()) - self.assertTrue(u'Mayan EDMS' in self.document.pages.all()[0].content) + # Make sure content was extracted + self.assertTrue(u'Mayan EDMS' in self.document.pages.first().content) + + def test_ocr_language_german(self): + self._test_ocr_language_issue_16('deu') + + def test_ocr_language_english(self): + self._test_ocr_language_issue_16('eng') + + def test_ocr_language_russian(self): + self._test_ocr_language_issue_16('rus') + + def test_ocr_language_spanish(self): + self._test_ocr_language_issue_16('spa') def tearDown(self): self.document.delete()