diff --git a/mayan/apps/ocr/tests.py b/mayan/apps/ocr/tests.py index 969a76b685..c61a588820 100644 --- a/mayan/apps/ocr/tests.py +++ b/mayan/apps/ocr/tests.py @@ -1,83 +1,49 @@ from __future__ import absolute_import -import os - from django.conf import settings from django.core.files.base import File from django.test import TestCase from documents.models import Document, DocumentType +from documents.tests import TEST_SMALL_DOCUMENT_PATH, TEST_DOCUMENT_TYPE from .api import do_document_ocr from .models import DocumentQueue, QueueDocument -TEST_DOCUMENT_PATH = os.path.join(settings.BASE_DIR, 'contrib', 'sample_documents', 'title_page.png') - class DocumentSearchTestCase(TestCase): def setUp(self): - # Start the OCR queue - self.default_queue = DocumentQueue.objects.get(name='default') - self.document_type = DocumentType.objects.create(name='test doc type') - self.document = Document( - document_type=self.document_type, - description='description', - ) - self.document.save() + self.document_type = DocumentType.objects.create(name=TEST_DOCUMENT_TYPE, ocr=False) - with open(TEST_DOCUMENT_PATH) as file_object: - self.document.new_version(file_object=File(file_object, name='title_page.png')) + with open(TEST_SMALL_DOCUMENT_PATH) as file_object: + self.document = Document.objects.new_document(file_object=File(file_object), document_type=self.document_type)[0].document # Clear OCR queue QueueDocument.objects.all().delete() - def reload_ocr_runtime(self, language): - """ - Forces the reloading of the language_backend for different languages - """ - - from ocr import runtime, settings - - setattr(settings, 'LANGUAGE', language) - - reload(runtime) - from .runtime import language_backend - self.assertEqual(unicode(language_backend.__class__), u"".format(language)) - - def _test_ocr_language_issue_16(self, language): + def _test_ocr_language_issue_16(self, language, result): """ Reusable OCR test for a specific language """ - self.reload_ocr_runtime(language) - - # Clear the document's first page content + # Clear the document's first page content and switch language + self.document.language = language + self.document.save() first_page = self.document.pages.first() first_page.content = '' first_page.save() - # Make sure no documents are queued for OCR - self.failUnlessEqual(self.default_queue.queuedocument_set.count(), 0) - DocumentQueue.objects.queue_document(self.document) - # Make sure our document is queued for OCR - self.failUnlessEqual(self.default_queue.queuedocument_set.count(), 1) - - do_document_ocr(self.default_queue.queuedocument_set.first()) + # Queue document for OCR + self.document.submit_for_ocr() # Make sure content was extracted - self.assertTrue(u'Mayan EDMS' in self.document.pages.first().content) + self.assertTrue(result in self.document.pages.first().content) - def test_ocr_language_german(self): - self._test_ocr_language_issue_16('deu') - - def test_ocr_language_english(self): - self._test_ocr_language_issue_16('eng') - - def test_ocr_language_russian(self): - self._test_ocr_language_issue_16('rus') - - def test_ocr_language_spanish(self): - self._test_ocr_language_issue_16('spa') + def test_ocr_language_backends(self): + self._test_ocr_language_issue_16('deu', 'Mayan EDMS') + self._test_ocr_language_issue_16('eng', 'Mayan EDMS') + self._test_ocr_language_issue_16('spa', 'Mayan EDMS') + self._test_ocr_language_issue_16('rus', '') def tearDown(self): self.document.delete()