diff --git a/mayan/apps/ocr/api_views.py b/mayan/apps/ocr/api_views.py index 69b3c44507..80793ae694 100644 --- a/mayan/apps/ocr/api_views.py +++ b/mayan/apps/ocr/api_views.py @@ -2,95 +2,104 @@ from __future__ import absolute_import, unicode_literals from django.shortcuts import get_object_or_404 -from rest_framework import generics, status +from rest_framework import status +from rest_framework.decorators import action from rest_framework.response import Response from mayan.apps.documents.models import Document, DocumentVersion -from mayan.apps.rest_api.permissions import MayanPermission +from mayan.apps.rest_api.viewsets import MayanAPIViewSet -from .models import DocumentPageOCRContent from .permissions import permission_ocr_content_view, permission_ocr_document -from .serializers import DocumentPageOCRContentSerializer +from .serializers import ( + DocumentOCRSerializer, DocumentPageOCRContentSerializer, + DocumentVersionOCRSerializer +) -class APIDocumentOCRView(generics.GenericAPIView): - """ - post: Submit a document for OCR. - """ - mayan_object_permissions = { - 'POST': (permission_ocr_document,) +class DocumentOCRAPIViewSet(MayanAPIViewSet): + lookup_url_kwarg = 'document_id' + object_permission_map = { + 'ocr_content': permission_ocr_content_view, + 'ocr_submit': permission_ocr_document, } - permission_classes = (MayanPermission,) queryset = Document.objects.all() + serializer_class = DocumentOCRSerializer - def get_serializer(self, *args, **kwargs): - return None - - def get_serializer_class(self): - return None - - def post(self, request, *args, **kwargs): - self.get_object().submit_for_ocr() - return Response(status=status.HTTP_202_ACCEPTED) - - -class APIDocumentVersionOCRView(generics.GenericAPIView): - """ - post: Submit a document version for OCR. - """ - lookup_url_kwarg = 'document_version_pk' - mayan_object_permissions = { - 'POST': (permission_ocr_document,) - } - permission_classes = (MayanPermission,) - queryset = DocumentVersion.objects.all() - - def get_document(self): - return get_object_or_404(klass=Document, pk=self.kwargs['document_pk']) - - def get_queryset(self): - return self.get_document().versions.all() - - def get_serializer(self, *args, **kwargs): - return None - - def get_serializer_class(self): - return None - - def post(self, request, *args, **kwargs): - self.get_object().submit_for_ocr() - return Response(status=status.HTTP_202_ACCEPTED) - - -class APIDocumentPageOCRContentView(generics.RetrieveAPIView): - """ - get: Returns the OCR content of the selected document page. - """ - lookup_url_kwarg = 'document_page_pk' - mayan_object_permissions = { - 'GET': (permission_ocr_content_view,), - } - permission_classes = (MayanPermission,) - serializer_class = DocumentPageOCRContentSerializer - - def get_document(self): - return get_object_or_404(klass=Document, pk=self.kwargs['document_pk']) - - def get_document_version(self): - return get_object_or_404( - klass=self.get_document().versions.all(), pk=self.kwargs['document_version_pk'] + @action( + detail=True, url_name='ocr-content', url_path='ocr' + ) + def ocr_content(self, request, *args, **kwargs): + instance = self.get_object() + serializer = self.get_serializer(instance) + headers = self.get_success_headers(data=serializer.data) + return Response( + serializer.data, status=status.HTTP_200_OK, headers=headers ) - def get_queryset(self): - return self.get_document_version().pages.all() - - def retrieve(self, request, *args, **kwargs): + @action( + detail=True, methods=('post',), url_name='ocr-submit', + url_path='ocr/submit' + ) + def ocr_submit(self, request, *args, **kwargs): instance = self.get_object() + instance.submit_for_ocr(_user=request.user) + return Response( + data=None, status=status.HTTP_202_ACCEPTED + ) - try: - ocr_content = instance.ocr_content - except DocumentPageOCRContent.DoesNotExist: - ocr_content = DocumentPageOCRContent.objects.none() - serializer = self.get_serializer(ocr_content) - return Response(serializer.data) +class DocumentVersionOCRAPIViewSet(MayanAPIViewSet): + lookup_url_kwarg = 'document_version_id' + object_permission_map = { + 'ocr_content': permission_ocr_content_view, + 'ocr_submit': permission_ocr_document, + } + queryset = DocumentVersion.objects.all() + serializer_class = DocumentVersionOCRSerializer + + @action( + detail=True, url_name='ocr-content', url_path='ocr' + ) + def ocr_content(self, request, *args, **kwargs): + instance = self.get_object() + serializer = self.get_serializer(instance) + headers = self.get_success_headers(data=serializer.data) + return Response( + serializer.data, status=status.HTTP_200_OK, headers=headers + ) + + @action( + detail=True, methods=('post',), url_name='ocr-submit', + url_path='ocr/submit' + ) + def ocr_submit(self, request, *args, **kwargs): + instance = self.get_object() + instance.submit_for_ocr(_user=request.user) + return Response( + data=None, status=status.HTTP_202_ACCEPTED + ) + + +class DocumentPageOCRAPIViewSet(MayanAPIViewSet): + lookup_url_kwarg = 'document_page_id' + object_permission_map = { + 'ocr_content': permission_ocr_content_view, + } + serializer_class = DocumentPageOCRContentSerializer + + def get_queryset(self): + return get_object_or_404( + klass=DocumentVersion, document_id=self.kwargs['document_id'], + pk=self.kwargs['document_version_id'] + ).pages.all() + + @action( + detail=True, url_name='content', url_path='ocr' + ) + def ocr_content(self, request, *args, **kwargs): + instance = self.get_object() + serializer = self.get_serializer(instance) + headers = self.get_success_headers(data=serializer.data) + return Response( + serializer.data, status=status.HTTP_200_OK, headers=headers + ) diff --git a/mayan/apps/ocr/apps.py b/mayan/apps/ocr/apps.py index 11646c75d5..639c349d88 100644 --- a/mayan/apps/ocr/apps.py +++ b/mayan/apps/ocr/apps.py @@ -17,6 +17,8 @@ from mayan.apps.common.classes import ModelAttribute, ModelField from mayan.apps.documents.search import document_search, document_page_search from mayan.apps.documents.signals import post_version_upload from mayan.apps.navigation import SourceColumn +from mayan.apps.rest_api.fields import HyperlinkField +from mayan.apps.rest_api.serializers import LazyExtraFieldsSerializerMixin from mayan.celery import app from .handlers import ( @@ -31,8 +33,9 @@ from .links import ( link_entry_list ) from .methods import ( - method_document_ocr_submit, method_document_version_ocr_submit, - method_get_document_ocr_content, method_get_document_version_ocr_content + method_document_get_ocr_content, method_document_page_get_ocr_content, + method_document_ocr_submit, method_document_version_get_ocr_content, + method_document_version_ocr_submit ) from .permissions import ( permission_document_type_ocr_setup, permission_ocr_document, @@ -75,19 +78,98 @@ class OCRApp(MayanAppConfig): Document.add_to_class( name='get_ocr_content', - value=method_get_document_ocr_content + value=method_document_get_ocr_content ) Document.add_to_class( name='submit_for_ocr', value=method_document_ocr_submit ) + DocumentPage.add_to_class( + name='get_ocr_content', value=method_document_page_get_ocr_content + ) DocumentVersion.add_to_class( name='get_ocr_content', - value=method_get_document_version_ocr_content + value=method_document_version_get_ocr_content ) DocumentVersion.add_to_class( name='submit_for_ocr', value=method_document_version_ocr_submit ) + LazyExtraFieldsSerializerMixin.add_field( + dotted_path='mayan.apps.documents.serializers.DocumentPageSerializer', + field_name='ocr_content_url', + field=HyperlinkField( + view_kwargs=( + { + 'lookup_field': 'document_version__document_id', + 'lookup_url_kwarg': 'document_id', + }, + { + 'lookup_field': 'document_version_id', + 'lookup_url_kwarg': 'document_version_id', + }, + { + 'lookup_field': 'pk', + 'lookup_url_kwarg': 'document_page_id', + } + ), + view_name='rest_api:document_page-ocr-content' + ) + ) + + LazyExtraFieldsSerializerMixin.add_field( + dotted_path='mayan.apps.documents.serializers.DocumentSerializer', + field_name='ocr_content_url', + field=HyperlinkField( + lookup_url_kwarg='document_id', + view_name='rest_api:document-ocr-content' + ) + ) + + LazyExtraFieldsSerializerMixin.add_field( + dotted_path='mayan.apps.documents.serializers.DocumentSerializer', + field_name='ocr_submit_url', + field=HyperlinkField( + lookup_url_kwarg='document_id', + view_name='rest_api:document-ocr-submit' + ) + ) + + LazyExtraFieldsSerializerMixin.add_field( + dotted_path='mayan.apps.documents.serializers.DocumentVersionSerializer', + field_name='ocr_submit_url', + field=HyperlinkField( + view_kwargs=( + { + 'lookup_field': 'document_id', + 'lookup_url_kwarg': 'document_id', + }, + { + 'lookup_field': 'pk', + 'lookup_url_kwarg': 'document_version_id', + } + ), + view_name='rest_api:document_version-ocr-submit' + ) + ) + + LazyExtraFieldsSerializerMixin.add_field( + dotted_path='mayan.apps.documents.serializers.DocumentVersionSerializer', + field_name='ocr_content_url', + field=HyperlinkField( + view_kwargs=( + { + 'lookup_field': 'document_id', + 'lookup_url_kwarg': 'document_id', + }, + { + 'lookup_field': 'pk', + 'lookup_url_kwarg': 'document_version_id', + } + ), + view_name='rest_api:document_version-ocr-content' + ) + ) + ModelAttribute(model=Document, name='get_ocr_content') ModelField( diff --git a/mayan/apps/ocr/methods.py b/mayan/apps/ocr/methods.py index 0ab9eafd89..5c286bf283 100644 --- a/mayan/apps/ocr/methods.py +++ b/mayan/apps/ocr/methods.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from datetime import timedelta +from django.apps import apps from django.utils.timezone import now from django.utils.translation import ugettext_lazy as _ @@ -9,43 +10,56 @@ from mayan.apps.common.settings import settings_db_sync_task_delay from .events import event_ocr_document_version_submit from .tasks import task_do_ocr -from .utils import get_document_version_content_iterator -def method_document_ocr_submit(self): - latest_version = self.latest_version - # Don't error out if document has no version - if latest_version: - latest_version.submit_for_ocr() - - -def method_document_version_ocr_submit(self): - event_ocr_document_version_submit.commit( - action_object=self.document, target=self - ) - - task_do_ocr.apply_async( - eta=now() + timedelta(seconds=settings_db_sync_task_delay.value), - kwargs={'document_version_pk': self.pk}, - ) - - -def method_get_document_ocr_content(self): +def method_document_get_ocr_content(self): latest_version = self.latest_version # Don't error out if document has no version if latest_version: return latest_version.get_ocr_content() -method_get_document_ocr_content.short_description = _( +method_document_get_ocr_content.short_description = _( 'get_ocr_content()' ) -method_get_document_ocr_content.help_text = _( +method_document_get_ocr_content.help_text = _( 'Return the OCR content of the document.' ) -def method_get_document_version_ocr_content(self): - return ' '.join( - get_document_version_content_iterator(document_version=self) +def method_document_ocr_submit(self, _user=None): + latest_version = self.latest_version + # Don't error out if document has no version + if latest_version: + latest_version.submit_for_ocr(_user=_user) + + +def method_document_page_get_ocr_content(self): + DocumentPageOCRContent = apps.get_model( + app_label='ocr', model_name='DocumentPageOCRContent' + ) + + try: + page_content = self.ocr_content.content + except DocumentPageOCRContent.DoesNotExist: + return '' + return page_content + + +def method_document_version_get_ocr_content(self): + result = [] + for page in self.pages.all(): + result.append(page.get_ocr_content()) + + return ''.join(result) + + +def method_document_version_ocr_submit(self, _user=None): + event_ocr_document_version_submit.commit( + action_object=self.document, actor=_user, target=self + ) + + task_do_ocr.apply_async( + eta=now() + timedelta(seconds=settings_db_sync_task_delay.value), + kwargs={'document_version_pk': self.pk}, ) diff --git a/mayan/apps/ocr/serializers.py b/mayan/apps/ocr/serializers.py index 3d9c06c18d..3842f028ac 100644 --- a/mayan/apps/ocr/serializers.py +++ b/mayan/apps/ocr/serializers.py @@ -2,10 +2,20 @@ from __future__ import unicode_literals from rest_framework import serializers -from .models import DocumentPageOCRContent + +class DocumentOCRSerializer(serializers.Serializer): + text = serializers.CharField( + read_only=True, source='get_ocr_content' + ) -class DocumentPageOCRContentSerializer(serializers.ModelSerializer): - class Meta: - fields = ('content',) - model = DocumentPageOCRContent +class DocumentPageOCRContentSerializer(serializers.Serializer): + text = serializers.CharField( + read_only=True, source='get_ocr_content' + ) + + +class DocumentVersionOCRSerializer(serializers.Serializer): + text = serializers.CharField( + read_only=True, source='get_ocr_content' + ) diff --git a/mayan/apps/ocr/tests/test_api.py b/mayan/apps/ocr/tests/test_api.py index d50c6e1953..c6c1adb274 100644 --- a/mayan/apps/ocr/tests/test_api.py +++ b/mayan/apps/ocr/tests/test_api.py @@ -9,48 +9,45 @@ from ..permissions import ( permission_ocr_document, permission_ocr_content_view, ) -TEST_DOCUMENT_CONTENT = 'Mayan EDMS Documentation' +from .literals import TEST_DOCUMENT_CONTENT class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase): - """ - Test the OCR app API endpoints - """ - def setUp(self): - super(OCRAPITestCase, self).setUp() - self.login_user() - def _request_document_ocr_submit_view(self): return self.post( - viewname='rest_api:document-ocr-submit-view', + viewname='rest_api:document-ocr-submit', kwargs={'document_id': self.document.pk} ) - def test_submit_document_no_access(self): + def test_submit_document_no_permission(self): response = self._request_document_ocr_submit_view() - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertFalse(hasattr(self.document.pages.first(), 'ocr_content')) + #TODO: mock OCR here def test_submit_document_with_access(self): self.grant_access( permission=permission_ocr_document, obj=self.document ) response = self._request_document_ocr_submit_view() self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) + self.assertTrue(hasattr(self.document.pages.first(), 'ocr_content')) def _request_document_version_ocr_submit_view(self): return self.post( - viewname='rest_api:document-version-ocr-submit-view', + viewname='rest_api:document_version-ocr-submit', kwargs={ 'document_id': self.document.pk, 'document_version_id': self.document.latest_version.pk } ) - def test_submit_document_version_no_access(self): + def test_submit_document_version_no_permission(self): response = self._request_document_version_ocr_submit_view() - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertFalse(hasattr(self.document.pages.first(), 'ocr_content')) def test_submit_document_version_with_access(self): @@ -59,29 +56,84 @@ class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase): ) response = self._request_document_version_ocr_submit_view() self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) + self.assertTrue(hasattr(self.document.pages.first(), 'ocr_content')) - def _request_document_page_content_view(self): + def _request_document_content_view(self): return self.get( - viewname='rest_api:document-page-ocr-content-view', + viewname='rest_api:document-ocr-content', kwargs={ - 'document_id': self.document.pk, - 'document_version_id': self.document.latest_version.pk, - 'document_page_id': self.document.latest_version.pages.first().pk + 'document_id': self.test_document.pk, } ) - def test_get_document_version_page_content_no_access(self): + def test_get_document_content_no_permission(self): + response = self._request_document_content_view() + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + + def test_get_document_content_with_access(self): + self.document.submit_for_ocr() + self.grant_access( + permission=permission_ocr_content_view, obj=self.document + ) + + response = self._request_document_content_view() + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertTrue( + TEST_DOCUMENT_CONTENT in response.data['text'] + ) + + def _request_document_page_content_view(self): + latest_version = self.test_document.latest_version + + return self.get( + viewname='rest_api:document_page-ocr-content', + kwargs={ + 'document_id': self.test_document.pk, + 'document_version_id': latest_version.pk, + 'document_page_id': latest_version.pages.first().pk + } + ) + + def test_get_document_version_page_content_no_permission(self): response = self._request_document_page_content_view() - self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) def test_get_document_version_page_content_with_access(self): self.document.submit_for_ocr() self.grant_access( permission=permission_ocr_content_view, obj=self.document ) + response = self._request_document_page_content_view() self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertTrue( - TEST_DOCUMENT_CONTENT in response.data['content'] + TEST_DOCUMENT_CONTENT in response.data['text'] + ) + + def _request_document_version_content_view(self): + latest_version = self.test_document.latest_version + + return self.get( + viewname='rest_api:document_version-ocr-content', + kwargs={ + 'document_id': self.test_document.pk, + 'document_version_id': latest_version.pk, + } + ) + + def test_get_document_version_version_content_no_permission(self): + response = self._request_document_version_content_view() + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + + def test_get_document_version_version_content_with_access(self): + self.document.submit_for_ocr() + self.grant_access( + permission=permission_ocr_content_view, obj=self.document + ) + + response = self._request_document_version_content_view() + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertTrue( + TEST_DOCUMENT_CONTENT in response.data['text'] ) diff --git a/mayan/apps/ocr/urls.py b/mayan/apps/ocr/urls.py index 86fe690972..cb4caf2b12 100644 --- a/mayan/apps/ocr/urls.py +++ b/mayan/apps/ocr/urls.py @@ -3,8 +3,8 @@ from __future__ import unicode_literals from django.conf.urls import url from .api_views import ( - APIDocumentOCRView, APIDocumentPageOCRContentView, - APIDocumentVersionOCRView + DocumentPageOCRAPIViewSet, DocumentOCRAPIViewSet, + DocumentVersionOCRAPIViewSet ) from .views import ( DocumentOCRContentView, DocumentOCRDownloadView, @@ -55,20 +55,17 @@ urlpatterns = [ ) ] -api_urls = [ - url( - regex=r'^documents/(?P\d+)/ocr/submit/$', - name='document-ocr-submit-view', - view=APIDocumentOCRView.as_view() - ), - url( - regex=r'^documents/(?P\d+)/versions/(?P\d+)/ocr/$', - name='document-version-ocr-submit-view', - view=APIDocumentVersionOCRView.as_view() - ), - url( - regex=r'^documents/(?P\d+)/versions/(?P\d+)/pages/(?P\d+)/ocr/$', - name='document-page-ocr-content-view', - view=APIDocumentPageOCRContentView.as_view() - ) -] +api_router_entries = ( + { + 'prefix': r'documents', + 'viewset': DocumentOCRAPIViewSet, 'basename': 'document' + }, + { + 'prefix': r'documents/(?P\d+)/document_versions', + 'viewset': DocumentVersionOCRAPIViewSet, 'basename': 'document_version' + }, + { + 'prefix': r'documents/(?P\d+)/document_versions/(?P\d+)/document_pages', + 'viewset': DocumentPageOCRAPIViewSet, 'basename': 'document_page-ocr' + } +) diff --git a/mayan/apps/ocr/views.py b/mayan/apps/ocr/views.py index 83ab4b6ca7..addaff1ca3 100644 --- a/mayan/apps/ocr/views.py +++ b/mayan/apps/ocr/views.py @@ -12,6 +12,7 @@ from mayan.apps.common.generics import ( SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView ) from mayan.apps.documents.forms import DocumentTypeFilteredSelectForm +from mayan.apps.documents.mixins import RecentDocumentMixin from mayan.apps.documents.models import Document, DocumentPage, DocumentType from .forms import DocumentPageOCRContentForm, DocumentOCRContentForm @@ -23,19 +24,12 @@ from .permissions import ( from .utils import get_document_content_iterator -class DocumentOCRContentView(SingleObjectDetailView): +class DocumentOCRContentView(RecentDocumentMixin, SingleObjectDetailView): form_class = DocumentOCRContentForm model = Document object_permission = permission_ocr_content_view pk_url_kwarg = 'document_id' - def dispatch(self, request, *args, **kwargs): - result = super(DocumentOCRContentView, self).dispatch( - request, *args, **kwargs - ) - self.get_object().add_as_recent_document_for_user(user=request.user) - return result - def get_extra_context(self): return { 'document': self.get_object(), @@ -45,17 +39,17 @@ class DocumentOCRContentView(SingleObjectDetailView): } -class DocumentOCRDownloadView(SingleObjectDownloadView): +class DocumentOCRDownloadView(RecentDocumentMixin, SingleObjectDownloadView): model = Document object_permission = permission_ocr_content_view pk_url_kwarg = 'document_id' def get_file(self): file_object = DocumentOCRDownloadView.TextIteratorIO( - iterator=get_document_content_iterator(document=self.get_object()) + iterator=get_document_content_iterator(document=self.object) ) return DocumentOCRDownloadView.VirtualFile( - file=file_object, name='{}-OCR'.format(self.get_object()) + file=file_object, name='{}-OCR'.format(self.object) ) @@ -78,28 +72,22 @@ class DocumentOCRErrorsListView(SingleObjectListView): return self.get_document().latest_version.ocr_errors.all() -class DocumentPageOCRContentView(SingleObjectDetailView): +class DocumentPageOCRContentView(RecentDocumentMixin, SingleObjectDetailView): form_class = DocumentPageOCRContentForm model = DocumentPage object_permission = permission_ocr_content_view pk_url_kwarg = 'document_page_id' - def dispatch(self, request, *args, **kwargs): - result = super(DocumentPageOCRContentView, self).dispatch( - request, *args, **kwargs - ) - self.get_object().document.add_as_recent_document_for_user( - user=request.user - ) - return result - def get_extra_context(self): return { 'hide_labels': True, - 'object': self.get_object(), - 'title': _('OCR result for document page: %s') % self.get_object(), + 'object': self.object, + 'title': _('OCR result for document page: %s') % self.object, } + def get_recent_document(self): + return self.object.document + class DocumentSubmitView(MultipleObjectConfirmActionView): model = Document