From aa0f48b1a017827e01329d466e0b0dcac5ccb851 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 8 Jun 2016 19:29:20 -0400 Subject: [PATCH] Update OCR app to use organizations. --- mayan/apps/ocr/api_views.py | 6 +- mayan/apps/ocr/classes.py | 2 +- mayan/apps/ocr/forms.py | 2 +- mayan/apps/ocr/handlers.py | 2 +- mayan/apps/ocr/managers.py | 37 ++++++++ mayan/apps/ocr/models.py | 23 ++++- mayan/apps/ocr/parsers.py | 2 +- mayan/apps/ocr/tasks.py | 8 +- mayan/apps/ocr/tests/test_api.py | 24 ++--- mayan/apps/ocr/tests/test_models.py | 22 +++-- .../apps/ocr/tests/test_organization_views.py | 95 +++++++++++++++++++ mayan/apps/ocr/tests/test_parsers.py | 21 ++-- mayan/apps/ocr/views.py | 16 ++-- 13 files changed, 205 insertions(+), 55 deletions(-) create mode 100644 mayan/apps/ocr/managers.py create mode 100644 mayan/apps/ocr/tests/test_organization_views.py diff --git a/mayan/apps/ocr/api_views.py b/mayan/apps/ocr/api_views.py index ded56e8ed8..5dcff11aa6 100644 --- a/mayan/apps/ocr/api_views.py +++ b/mayan/apps/ocr/api_views.py @@ -16,7 +16,7 @@ class APIDocumentOCRView(generics.GenericAPIView): 'POST': (permission_ocr_document,) } permission_classes = (MayanPermission,) - queryset = Document.objects.all() + queryset = Document.on_organization.all() def get_serializer_class(self): return None @@ -44,7 +44,7 @@ class APIDocumentVersionOCRView(generics.GenericAPIView): 'POST': (permission_ocr_document,) } permission_classes = (MayanPermission,) - queryset = DocumentVersion.objects.all() + queryset = DocumentVersion.on_organization.all() def get_serializer_class(self): return None @@ -83,7 +83,7 @@ class APIDocumentPageContentView(generics.RetrieveAPIView): } permission_classes = (MayanPermission,) serializer_class = DocumentPageContentSerializer - queryset = DocumentPage.objects.all() + queryset = DocumentPage.on_organization.all() def retrieve(self, request, *args, **kwargs): instance = self.get_object() diff --git a/mayan/apps/ocr/classes.py b/mayan/apps/ocr/classes.py index 3693b263f8..c9d33831f3 100644 --- a/mayan/apps/ocr/classes.py +++ b/mayan/apps/ocr/classes.py @@ -60,7 +60,7 @@ class OCRBackendBase(object): image = document_page.get_image() try: - document_page_content, created = DocumentPageContent.objects.get_or_create( + document_page_content, created = DocumentPageContent.on_organization.get_or_create( document_page=document_page ) document_page_content.content = self.execute( diff --git a/mayan/apps/ocr/forms.py b/mayan/apps/ocr/forms.py index 939e19a741..5342ec484f 100644 --- a/mayan/apps/ocr/forms.py +++ b/mayan/apps/ocr/forms.py @@ -57,5 +57,5 @@ class DocumentContentForm(forms.Form): class DocumentTypeSelectForm(forms.Form): document_type = forms.ModelChoiceField( - queryset=DocumentType.objects.all(), label=('Document type') + queryset=DocumentType.on_organization.all(), label=('Document type') ) diff --git a/mayan/apps/ocr/handlers.py b/mayan/apps/ocr/handlers.py index 5e41ae6074..d012a9b9e5 100644 --- a/mayan/apps/ocr/handlers.py +++ b/mayan/apps/ocr/handlers.py @@ -20,6 +20,6 @@ def initialize_new_ocr_settings(sender, instance, **kwargs): DocumentTypeSettings = get_model('ocr', 'DocumentTypeSettings') if kwargs['created']: - DocumentTypeSettings.objects.create( + DocumentTypeSettings.on_organization.create( document_type=instance, auto_ocr=setting_auto_ocr.value ) diff --git a/mayan/apps/ocr/managers.py b/mayan/apps/ocr/managers.py new file mode 100644 index 0000000000..6902ff1178 --- /dev/null +++ b/mayan/apps/ocr/managers.py @@ -0,0 +1,37 @@ +from __future__ import unicode_literals + +from django.apps import apps +from django.db import models + + +class OrganizationDocumentTypeSettingsManager(models.Manager): + def get_queryset(self): + DocumentType = apps.get_model('documents', 'DocumentType') + + return super( + OrganizationDocumentTypeSettingsManager, self + ).get_queryset().filter( + document_type__in=DocumentType.on_organization.all(), + ) + + +class OrganizationDocumentVersionOCRErrorManager(models.Manager): + def get_queryset(self): + DocumentVersion = apps.get_model('documents', 'DocumentVersion') + + return super( + OrganizationDocumentVersionOCRErrorManager, self + ).get_queryset().filter( + document_version__in=DocumentVersion.on_organization.all(), + ) + + +class OrganizationDocumentPageContentManager(models.Manager): + def get_queryset(self): + DocumentPage = apps.get_model('documents', 'DocumentPage') + + return super( + OrganizationDocumentPageContentManager, self + ).get_queryset().filter( + document_page__in=DocumentPage.on_organization.all(), + ) diff --git a/mayan/apps/ocr/models.py b/mayan/apps/ocr/models.py index 8c44d317ca..3fda33db8e 100644 --- a/mayan/apps/ocr/models.py +++ b/mayan/apps/ocr/models.py @@ -6,6 +6,12 @@ from django.utils.translation import ugettext_lazy as _ from documents.models import DocumentPage, DocumentType, DocumentVersion +from .managers import ( + OrganizationDocumentTypeSettingsManager, + OrganizationDocumentVersionOCRErrorManager, + OrganizationDocumentPageContentManager +) + class DocumentTypeSettings(models.Model): """ @@ -20,6 +26,9 @@ class DocumentTypeSettings(models.Model): verbose_name=_('Automatically queue newly created documents for OCR.') ) + objects = models.Manager() + on_organization = OrganizationDocumentTypeSettingsManager() + class Meta: verbose_name = _('Document type settings') verbose_name_plural = _('Document types settings') @@ -35,14 +44,17 @@ class DocumentVersionOCRError(models.Model): ) result = models.TextField(blank=True, null=True, verbose_name=_('Result')) - def __str__(self): - return unicode(self.document_version) + objects = models.Manager() + on_organization = OrganizationDocumentVersionOCRErrorManager() class Meta: ordering = ('datetime_submitted',) verbose_name = _('Document Version OCR Error') verbose_name_plural = _('Document Version OCR Errors') + def __str__(self): + return unicode(self.document_version) + @python_2_unicode_compatible class DocumentPageContent(models.Model): @@ -55,9 +67,12 @@ class DocumentPageContent(models.Model): ) content = models.TextField(blank=True, verbose_name=_('Content')) - def __str__(self): - return unicode(self.document_page) + objects = models.Manager() + on_organization = OrganizationDocumentPageContentManager() class Meta: verbose_name = _('Document page content') verbose_name_plural = _('Document pages contents') + + def __str__(self): + return unicode(self.document_page) diff --git a/mayan/apps/ocr/parsers.py b/mayan/apps/ocr/parsers.py index bc21031037..276f72625d 100644 --- a/mayan/apps/ocr/parsers.py +++ b/mayan/apps/ocr/parsers.py @@ -92,7 +92,7 @@ class Parser(object): file_object = document_page.document_version.get_intermidiate_file() try: - document_page_content, created = DocumentPageContent.objects.get_or_create( + document_page_content, created = DocumentPageContent.on_organization.get_or_create( document_page=document_page ) document_page_content.content = self.execute( diff --git a/mayan/apps/ocr/tasks.py b/mayan/apps/ocr/tasks.py index 7406a1bed3..2694d39366 100644 --- a/mayan/apps/ocr/tasks.py +++ b/mayan/apps/ocr/tasks.py @@ -35,7 +35,9 @@ def task_do_ocr(self, document_version_pk): logger.debug('acquired lock: %s', lock_id) document_version = None try: - document_version = DocumentVersion.objects.get(pk=document_version_pk) + document_version = DocumentVersion.on_organization.get( + pk=document_version_pk + ) logger.info( 'Starting document OCR for document version: %s', document_version @@ -53,7 +55,7 @@ def task_do_ocr(self, document_version_pk): exception ) if document_version: - entry, created = DocumentVersionOCRError.objects.get_or_create( + entry, created = DocumentVersionOCRError.on_organization.get_or_create( document_version=document_version ) @@ -72,7 +74,7 @@ def task_do_ocr(self, document_version_pk): 'OCR complete for document version: %s', document_version ) try: - entry = DocumentVersionOCRError.objects.get( + entry = DocumentVersionOCRError.on_organization.get( document_version=document_version ) except DocumentVersionOCRError.DoesNotExist: diff --git a/mayan/apps/ocr/tests/test_api.py b/mayan/apps/ocr/tests/test_api.py index 1194a76748..af869c7a0d 100644 --- a/mayan/apps/ocr/tests/test_api.py +++ b/mayan/apps/ocr/tests/test_api.py @@ -1,36 +1,23 @@ from __future__ import unicode_literals -import json - -from django.contrib.auth import get_user_model from django.core.urlresolvers import reverse from rest_framework import status -from rest_framework.test import APITestCase from documents.models import DocumentType from documents.tests import TEST_DOCUMENT_TYPE, TEST_SMALL_DOCUMENT_PATH -from user_management.tests import ( - TEST_ADMIN_EMAIL, TEST_ADMIN_PASSWORD, TEST_ADMIN_USERNAME -) +from rest_api.tests import GenericAPITestCase -class OCRAPITestCase(APITestCase): +class OCRAPITestCase(GenericAPITestCase): """ Test the OCR app API endpoints """ def setUp(self): - self.admin_user = get_user_model().objects.create_superuser( - username=TEST_ADMIN_USERNAME, email=TEST_ADMIN_EMAIL, - password=TEST_ADMIN_PASSWORD - ) + super(OCRAPITestCase, self).setUp() - self.client.login( - username=TEST_ADMIN_USERNAME, password=TEST_ADMIN_PASSWORD - ) - - self.document_type = DocumentType.objects.create( + self.document_type = DocumentType.on_organization.create( label=TEST_DOCUMENT_TYPE ) @@ -41,6 +28,7 @@ class OCRAPITestCase(APITestCase): def tearDown(self): self.document_type.delete() + super(OCRAPITestCase, self).tearDown() def test_submit_document(self): response = self.client.post( @@ -81,5 +69,5 @@ class OCRAPITestCase(APITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertTrue( - 'Mayan EDMS Documentation' in json.loads(response.content)['content'] + 'Mayan EDMS Documentation' in response.data['content'] ) diff --git a/mayan/apps/ocr/tests/test_models.py b/mayan/apps/ocr/tests/test_models.py index 39daf68e79..e68eb5aabc 100644 --- a/mayan/apps/ocr/tests/test_models.py +++ b/mayan/apps/ocr/tests/test_models.py @@ -2,30 +2,31 @@ from __future__ import unicode_literals -from django.core.files.base import File -from django.test import TestCase - from documents.models import DocumentType from documents.settings import setting_language_choices from documents.tests import ( TEST_DEU_DOCUMENT_PATH, TEST_DOCUMENT_TYPE, TEST_SMALL_DOCUMENT_PATH ) +from organizations.tests import OrganizationTestCase -class DocumentOCRTestCase(TestCase): +class DocumentOCRTestCase(OrganizationTestCase): def setUp(self): - self.document_type = DocumentType.objects.create( + super(DocumentOCRTestCase, self).setUp() + + self.document_type = DocumentType.on_organization.create( label=TEST_DOCUMENT_TYPE ) with open(TEST_SMALL_DOCUMENT_PATH) as file_object: self.document = self.document_type.new_document( - file_object=File(file_object), + file_object=file_object, ) def tearDown(self): self.document.delete() self.document_type.delete() + super(DocumentOCRTestCase, self).tearDown() def test_ocr_language_backends_end(self): content = self.document.pages.first().ocr_content.content @@ -33,9 +34,11 @@ class DocumentOCRTestCase(TestCase): self.assertTrue('Mayan EDMS Documentation' in content) -class GermanOCRSupportTestCase(TestCase): +class GermanOCRSupportTestCase(OrganizationTestCase): def setUp(self): - self.document_type = DocumentType.objects.create( + super(GermanOCRSupportTestCase, self).setUp() + + self.document_type = DocumentType.on_organization.create( label=TEST_DOCUMENT_TYPE ) @@ -49,11 +52,12 @@ class GermanOCRSupportTestCase(TestCase): with open(TEST_DEU_DOCUMENT_PATH) as file_object: self.document = self.document_type.new_document( - file_object=File(file_object), language=language_code + file_object=file_object, language=language_code ) def tearDown(self): self.document_type.delete() + super(GermanOCRSupportTestCase, self).tearDown() def test_ocr_language_backends_end(self): content = self.document.pages.first().ocr_content.content diff --git a/mayan/apps/ocr/tests/test_organization_views.py b/mayan/apps/ocr/tests/test_organization_views.py new file mode 100644 index 0000000000..6d217b70d2 --- /dev/null +++ b/mayan/apps/ocr/tests/test_organization_views.py @@ -0,0 +1,95 @@ +from __future__ import unicode_literals + +from django.test import override_settings + +from documents.models import DocumentType +from documents.tests.literals import ( + TEST_DOCUMENT_TYPE, TEST_SMALL_DOCUMENT_PATH +) +from organizations.tests.test_organization_views import OrganizationViewTestCase + +from ..models import DocumentPageContent + + +@override_settings(OCR_AUTO_OCR=False) +class OrganizationOCRViewTestCase(OrganizationViewTestCase): + def create_document_type(self): + with self.settings(ORGANIZATION_ID=self.organization_a.pk): + self.document_type = DocumentType.on_organization.create( + label=TEST_DOCUMENT_TYPE + ) + + def create_document(self): + self.create_document_type() + with self.settings(ORGANIZATION_ID=self.organization_a.pk): + + with open(TEST_SMALL_DOCUMENT_PATH) as file_object: + self.document = self.document_type.new_document( + file_object=file_object + ) + + def test_document_content_view(self): + self.create_document() + self.document.submit_for_ocr() + + with self.settings(ORGANIZATION_ID=self.organization_a.pk): + response = self.get( + 'ocr:document_content', args=(self.document.pk,) + ) + self.assertContains(response, text='Mayan', status_code=200) + + with self.settings(ORGANIZATION_ID=self.organization_b.pk): + response = self.get( + 'ocr:document_content', args=(self.document.pk,) + ) + self.assertEqual(response.status_code, 404) + + def test_document_submit_view(self): + self.create_document() + + with self.settings(ORGANIZATION_ID=self.organization_a.pk): + response = self.post( + 'ocr:document_submit', args=(self.document.pk,), follow=True + ) + self.assertContains(response, text='uccess', status_code=200) + + with self.settings(ORGANIZATION_ID=self.organization_b.pk): + response = self.post( + 'ocr:document_submit', args=(self.document.pk,), follow=True + ) + self.assertEqual(response.status_code, 404) + + def test_document_submit_all_view(self): + self.create_document() + + with self.settings(ORGANIZATION_ID=self.organization_b.pk): + self.post('ocr:document_submit_all', follow=True) + + with self.assertRaises(DocumentPageContent.DoesNotExist): + # Use .objects manager to make sure we get all document pages + # and that it indeed doesn't exists = no OCR happened. + DocumentPageContent.objects.get( + document_page=self.document.pages.first() + ) + + with self.settings(ORGANIZATION_ID=self.organization_a.pk): + self.post('ocr:document_submit_all', follow=True) + + self.assertIn( + 'Mayan', self.document.pages.first().ocr_content.content + ) + + def test_document_type_ocr_settings_view(self): + self.create_document_type() + + with self.settings(ORGANIZATION_ID=self.organization_a.pk): + response = self.get( + 'ocr:document_type_ocr_settings', args=(self.document_type.pk,) + ) + self.assertEqual(response.status_code, 200) + + with self.settings(ORGANIZATION_ID=self.organization_b.pk): + response = self.get( + 'ocr:document_type_ocr_settings', args=(self.document_type.pk,) + ) + self.assertEqual(response.status_code, 404) diff --git a/mayan/apps/ocr/tests/test_parsers.py b/mayan/apps/ocr/tests/test_parsers.py index ff2ddb629f..e8eddad222 100644 --- a/mayan/apps/ocr/tests/test_parsers.py +++ b/mayan/apps/ocr/tests/test_parsers.py @@ -1,32 +1,34 @@ from __future__ import unicode_literals -from django.core.files.base import File -from django.test import TestCase, override_settings +from django.test import override_settings from documents.models import DocumentType from documents.tests import ( TEST_DOCUMENT_PATH, TEST_DOCUMENT_TYPE, TEST_HYBRID_DOCUMENT_PATH ) +from organizations.tests import OrganizationTestCase from ..classes import TextExtractor from ..parsers import PDFMinerParser, PopplerParser @override_settings(OCR_AUTO_OCR=False) -class ParserTestCase(TestCase): +class ParserTestCase(OrganizationTestCase): def setUp(self): + super(ParserTestCase, self).setUp() - self.document_type = DocumentType.objects.create( + self.document_type = DocumentType.on_organization.create( label=TEST_DOCUMENT_TYPE ) with open(TEST_DOCUMENT_PATH) as file_object: self.document = self.document_type.new_document( - file_object=File(file_object) + file_object=file_object ) def tearDown(self): self.document_type.delete() + super(ParserTestCase, self).tearDown() def test_pdfminer_parser(self): parser = PDFMinerParser() @@ -48,19 +50,22 @@ class ParserTestCase(TestCase): @override_settings(OCR_AUTO_OCR=False) -class TextExtractorTestCase(TestCase): +class TextExtractorTestCase(OrganizationTestCase): def setUp(self): - self.document_type = DocumentType.objects.create( + super(TextExtractorTestCase, self).setUp() + + self.document_type = DocumentType.on_organization.create( label=TEST_DOCUMENT_TYPE ) with open(TEST_HYBRID_DOCUMENT_PATH) as file_object: self.document = self.document_type.new_document( - file_object=File(file_object) + file_object=file_object ) def tearDown(self): self.document_type.delete() + super(TextExtractorTestCase, self).tearDown() def test_text_extractor(self): TextExtractor.process_document_version( diff --git a/mayan/apps/ocr/views.py b/mayan/apps/ocr/views.py index 2c4f237867..28e7abf340 100644 --- a/mayan/apps/ocr/views.py +++ b/mayan/apps/ocr/views.py @@ -32,7 +32,7 @@ class DocumentAllSubmitView(ConfirmView): def view_action(self): count = 0 - for document in Document.objects.all(): + for document in Document.on_organization.all(): document.submit_for_ocr() count += 1 @@ -49,7 +49,7 @@ class DocumentSubmitView(ConfirmView): } def get_object(self): - return Document.objects.get(pk=self.kwargs['pk']) + return get_object_or_404(Document.on_organization, pk=self.kwargs['pk']) def object_action(self, instance): try: @@ -77,7 +77,6 @@ class DocumentSubmitView(ConfirmView): class DocumentSubmitManyView(MultipleInstanceActionMixin, DocumentSubmitView): - model = Document success_message = '%(count)d document submitted to the OCR queue.' success_message_plural = '%(count)d documents submitted to the OCR queue.' @@ -87,6 +86,9 @@ class DocumentSubmitManyView(MultipleInstanceActionMixin, DocumentSubmitView): 'title': _('Submit the selected documents to the OCR queue?') } + def get_queryset(self): + return Document.on_organization.all() + class DocumentTypeSubmitView(FormView): form_class = DocumentTypeSelectForm @@ -122,7 +124,7 @@ class DocumentTypeSettingsEditView(SingleObjectEditView): def get_object(self, queryset=None): return get_object_or_404( - DocumentType, pk=self.kwargs['pk'] + DocumentType.on_organization, pk=self.kwargs['pk'] ).ocr_settings def get_extra_context(self): @@ -135,7 +137,6 @@ class DocumentTypeSettingsEditView(SingleObjectEditView): class DocumentOCRContent(SingleObjectDetailView): form_class = DocumentContentForm - model = Document object_permission = permission_ocr_content_view def dispatch(self, request, *args, **kwargs): @@ -153,6 +154,9 @@ class DocumentOCRContent(SingleObjectDetailView): 'title': _('OCR result for document: %s') % self.get_object(), } + def get_queryset(self): + return Document.on_organization.all() + class EntryListView(SingleObjectListView): extra_context = { @@ -162,4 +166,4 @@ class EntryListView(SingleObjectListView): view_permission = permission_ocr_document def get_queryset(self): - return DocumentVersionOCRError.objects.all() + return DocumentVersionOCRError.on_organization.all()