from __future__ import absolute_import from django.core.files.base import File from django.test import TransactionTestCase from documents.models import Document, DocumentType from documents.tests import TEST_SMALL_DOCUMENT_PATH, TEST_DOCUMENT_TYPE from .models import DocumentQueue, QueueDocument class DocumentOCRTestCase(TransactionTestCase): def setUp(self): self.document_type = DocumentType.objects.create(name=TEST_DOCUMENT_TYPE, ocr=False) with open(TEST_SMALL_DOCUMENT_PATH) as file_object: self.document = Document.objects.new_document(file_object=File(file_object), document_type=self.document_type)[0].document DocumentQueue.objects.get_or_create(name='default') # Clear OCR queue QueueDocument.objects.all().delete() def _test_ocr_language_issue_16(self, language, result): """ Reusable OCR test for a specific language """ # Clear the document's first page content and switch language self.document.language = language self.document.save() first_page = self.document.pages.first() first_page.content = '' first_page.save() # Queue document for OCR self.document.submit_for_ocr() # Make sure content was extracted self.assertTrue(result in self.document.pages.first().content) def test_ocr_language_backends_deu(self): self._test_ocr_language_issue_16('deu', 'Mayan EDMS') def test_ocr_language_backends_end(self): self._test_ocr_language_issue_16('eng', 'Mayan EDMS') def test_ocr_language_backends_spa(self): self._test_ocr_language_issue_16('spa', 'Mayan EDMS') def test_ocr_language_backends_rus(self): self._test_ocr_language_issue_16('rus', '') def tearDown(self): self.document.delete() self.document_type.delete()