Update and improve ocr app tests
This commit is contained in:
@@ -1,83 +1,49 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.files.base import File
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.models import Document, DocumentType
|
||||
from documents.tests import TEST_SMALL_DOCUMENT_PATH, TEST_DOCUMENT_TYPE
|
||||
|
||||
from .api import do_document_ocr
|
||||
from .models import DocumentQueue, QueueDocument
|
||||
|
||||
TEST_DOCUMENT_PATH = os.path.join(settings.BASE_DIR, 'contrib', 'sample_documents', 'title_page.png')
|
||||
|
||||
|
||||
class DocumentSearchTestCase(TestCase):
|
||||
def setUp(self):
|
||||
# Start the OCR queue
|
||||
self.default_queue = DocumentQueue.objects.get(name='default')
|
||||
self.document_type = DocumentType.objects.create(name='test doc type')
|
||||
self.document = Document(
|
||||
document_type=self.document_type,
|
||||
description='description',
|
||||
)
|
||||
self.document.save()
|
||||
self.document_type = DocumentType.objects.create(name=TEST_DOCUMENT_TYPE, ocr=False)
|
||||
|
||||
with open(TEST_DOCUMENT_PATH) as file_object:
|
||||
self.document.new_version(file_object=File(file_object, name='title_page.png'))
|
||||
with open(TEST_SMALL_DOCUMENT_PATH) as file_object:
|
||||
self.document = Document.objects.new_document(file_object=File(file_object), document_type=self.document_type)[0].document
|
||||
|
||||
# Clear OCR queue
|
||||
QueueDocument.objects.all().delete()
|
||||
|
||||
def reload_ocr_runtime(self, language):
|
||||
"""
|
||||
Forces the reloading of the language_backend for different languages
|
||||
"""
|
||||
|
||||
from ocr import runtime, settings
|
||||
|
||||
setattr(settings, 'LANGUAGE', language)
|
||||
|
||||
reload(runtime)
|
||||
from .runtime import language_backend
|
||||
self.assertEqual(unicode(language_backend.__class__), u"<class 'ocr.lang.{0}.LanguageBackend'>".format(language))
|
||||
|
||||
def _test_ocr_language_issue_16(self, language):
|
||||
def _test_ocr_language_issue_16(self, language, result):
|
||||
"""
|
||||
Reusable OCR test for a specific language
|
||||
"""
|
||||
|
||||
self.reload_ocr_runtime(language)
|
||||
|
||||
# Clear the document's first page content
|
||||
# Clear the document's first page content and switch language
|
||||
self.document.language = language
|
||||
self.document.save()
|
||||
first_page = self.document.pages.first()
|
||||
first_page.content = ''
|
||||
first_page.save()
|
||||
|
||||
# Make sure no documents are queued for OCR
|
||||
self.failUnlessEqual(self.default_queue.queuedocument_set.count(), 0)
|
||||
DocumentQueue.objects.queue_document(self.document)
|
||||
# Make sure our document is queued for OCR
|
||||
self.failUnlessEqual(self.default_queue.queuedocument_set.count(), 1)
|
||||
|
||||
do_document_ocr(self.default_queue.queuedocument_set.first())
|
||||
# Queue document for OCR
|
||||
self.document.submit_for_ocr()
|
||||
|
||||
# Make sure content was extracted
|
||||
self.assertTrue(u'Mayan EDMS' in self.document.pages.first().content)
|
||||
self.assertTrue(result in self.document.pages.first().content)
|
||||
|
||||
def test_ocr_language_german(self):
|
||||
self._test_ocr_language_issue_16('deu')
|
||||
|
||||
def test_ocr_language_english(self):
|
||||
self._test_ocr_language_issue_16('eng')
|
||||
|
||||
def test_ocr_language_russian(self):
|
||||
self._test_ocr_language_issue_16('rus')
|
||||
|
||||
def test_ocr_language_spanish(self):
|
||||
self._test_ocr_language_issue_16('spa')
|
||||
def test_ocr_language_backends(self):
|
||||
self._test_ocr_language_issue_16('deu', 'Mayan EDMS')
|
||||
self._test_ocr_language_issue_16('eng', 'Mayan EDMS')
|
||||
self._test_ocr_language_issue_16('spa', 'Mayan EDMS')
|
||||
self._test_ocr_language_issue_16('rus', '')
|
||||
|
||||
def tearDown(self):
|
||||
self.document.delete()
|
||||
|
||||
Reference in New Issue
Block a user