Refactor the OCR app API
This refactor adds two new endpoints to view the OCR content of versions and documents. Signed-off-by: Roberto Rosario <Roberto.Rosario.Gonzalez@mayan-edms.com>
This commit is contained in:
@@ -2,95 +2,104 @@ from __future__ import absolute_import, unicode_literals
|
|||||||
|
|
||||||
from django.shortcuts import get_object_or_404
|
from django.shortcuts import get_object_or_404
|
||||||
|
|
||||||
from rest_framework import generics, status
|
from rest_framework import status
|
||||||
|
from rest_framework.decorators import action
|
||||||
from rest_framework.response import Response
|
from rest_framework.response import Response
|
||||||
|
|
||||||
from mayan.apps.documents.models import Document, DocumentVersion
|
from mayan.apps.documents.models import Document, DocumentVersion
|
||||||
from mayan.apps.rest_api.permissions import MayanPermission
|
from mayan.apps.rest_api.viewsets import MayanAPIViewSet
|
||||||
|
|
||||||
from .models import DocumentPageOCRContent
|
|
||||||
from .permissions import permission_ocr_content_view, permission_ocr_document
|
from .permissions import permission_ocr_content_view, permission_ocr_document
|
||||||
from .serializers import DocumentPageOCRContentSerializer
|
from .serializers import (
|
||||||
|
DocumentOCRSerializer, DocumentPageOCRContentSerializer,
|
||||||
|
DocumentVersionOCRSerializer
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class APIDocumentOCRView(generics.GenericAPIView):
|
class DocumentOCRAPIViewSet(MayanAPIViewSet):
|
||||||
"""
|
lookup_url_kwarg = 'document_id'
|
||||||
post: Submit a document for OCR.
|
object_permission_map = {
|
||||||
"""
|
'ocr_content': permission_ocr_content_view,
|
||||||
mayan_object_permissions = {
|
'ocr_submit': permission_ocr_document,
|
||||||
'POST': (permission_ocr_document,)
|
|
||||||
}
|
}
|
||||||
permission_classes = (MayanPermission,)
|
|
||||||
queryset = Document.objects.all()
|
queryset = Document.objects.all()
|
||||||
|
serializer_class = DocumentOCRSerializer
|
||||||
|
|
||||||
def get_serializer(self, *args, **kwargs):
|
@action(
|
||||||
return None
|
detail=True, url_name='ocr-content', url_path='ocr'
|
||||||
|
)
|
||||||
def get_serializer_class(self):
|
def ocr_content(self, request, *args, **kwargs):
|
||||||
return None
|
instance = self.get_object()
|
||||||
|
serializer = self.get_serializer(instance)
|
||||||
def post(self, request, *args, **kwargs):
|
headers = self.get_success_headers(data=serializer.data)
|
||||||
self.get_object().submit_for_ocr()
|
return Response(
|
||||||
return Response(status=status.HTTP_202_ACCEPTED)
|
serializer.data, status=status.HTTP_200_OK, headers=headers
|
||||||
|
|
||||||
|
|
||||||
class APIDocumentVersionOCRView(generics.GenericAPIView):
|
|
||||||
"""
|
|
||||||
post: Submit a document version for OCR.
|
|
||||||
"""
|
|
||||||
lookup_url_kwarg = 'document_version_pk'
|
|
||||||
mayan_object_permissions = {
|
|
||||||
'POST': (permission_ocr_document,)
|
|
||||||
}
|
|
||||||
permission_classes = (MayanPermission,)
|
|
||||||
queryset = DocumentVersion.objects.all()
|
|
||||||
|
|
||||||
def get_document(self):
|
|
||||||
return get_object_or_404(klass=Document, pk=self.kwargs['document_pk'])
|
|
||||||
|
|
||||||
def get_queryset(self):
|
|
||||||
return self.get_document().versions.all()
|
|
||||||
|
|
||||||
def get_serializer(self, *args, **kwargs):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_serializer_class(self):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def post(self, request, *args, **kwargs):
|
|
||||||
self.get_object().submit_for_ocr()
|
|
||||||
return Response(status=status.HTTP_202_ACCEPTED)
|
|
||||||
|
|
||||||
|
|
||||||
class APIDocumentPageOCRContentView(generics.RetrieveAPIView):
|
|
||||||
"""
|
|
||||||
get: Returns the OCR content of the selected document page.
|
|
||||||
"""
|
|
||||||
lookup_url_kwarg = 'document_page_pk'
|
|
||||||
mayan_object_permissions = {
|
|
||||||
'GET': (permission_ocr_content_view,),
|
|
||||||
}
|
|
||||||
permission_classes = (MayanPermission,)
|
|
||||||
serializer_class = DocumentPageOCRContentSerializer
|
|
||||||
|
|
||||||
def get_document(self):
|
|
||||||
return get_object_or_404(klass=Document, pk=self.kwargs['document_pk'])
|
|
||||||
|
|
||||||
def get_document_version(self):
|
|
||||||
return get_object_or_404(
|
|
||||||
klass=self.get_document().versions.all(), pk=self.kwargs['document_version_pk']
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_queryset(self):
|
@action(
|
||||||
return self.get_document_version().pages.all()
|
detail=True, methods=('post',), url_name='ocr-submit',
|
||||||
|
url_path='ocr/submit'
|
||||||
def retrieve(self, request, *args, **kwargs):
|
)
|
||||||
|
def ocr_submit(self, request, *args, **kwargs):
|
||||||
instance = self.get_object()
|
instance = self.get_object()
|
||||||
|
instance.submit_for_ocr(_user=request.user)
|
||||||
|
return Response(
|
||||||
|
data=None, status=status.HTTP_202_ACCEPTED
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
|
||||||
ocr_content = instance.ocr_content
|
|
||||||
except DocumentPageOCRContent.DoesNotExist:
|
|
||||||
ocr_content = DocumentPageOCRContent.objects.none()
|
|
||||||
|
|
||||||
serializer = self.get_serializer(ocr_content)
|
class DocumentVersionOCRAPIViewSet(MayanAPIViewSet):
|
||||||
return Response(serializer.data)
|
lookup_url_kwarg = 'document_version_id'
|
||||||
|
object_permission_map = {
|
||||||
|
'ocr_content': permission_ocr_content_view,
|
||||||
|
'ocr_submit': permission_ocr_document,
|
||||||
|
}
|
||||||
|
queryset = DocumentVersion.objects.all()
|
||||||
|
serializer_class = DocumentVersionOCRSerializer
|
||||||
|
|
||||||
|
@action(
|
||||||
|
detail=True, url_name='ocr-content', url_path='ocr'
|
||||||
|
)
|
||||||
|
def ocr_content(self, request, *args, **kwargs):
|
||||||
|
instance = self.get_object()
|
||||||
|
serializer = self.get_serializer(instance)
|
||||||
|
headers = self.get_success_headers(data=serializer.data)
|
||||||
|
return Response(
|
||||||
|
serializer.data, status=status.HTTP_200_OK, headers=headers
|
||||||
|
)
|
||||||
|
|
||||||
|
@action(
|
||||||
|
detail=True, methods=('post',), url_name='ocr-submit',
|
||||||
|
url_path='ocr/submit'
|
||||||
|
)
|
||||||
|
def ocr_submit(self, request, *args, **kwargs):
|
||||||
|
instance = self.get_object()
|
||||||
|
instance.submit_for_ocr(_user=request.user)
|
||||||
|
return Response(
|
||||||
|
data=None, status=status.HTTP_202_ACCEPTED
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentPageOCRAPIViewSet(MayanAPIViewSet):
|
||||||
|
lookup_url_kwarg = 'document_page_id'
|
||||||
|
object_permission_map = {
|
||||||
|
'ocr_content': permission_ocr_content_view,
|
||||||
|
}
|
||||||
|
serializer_class = DocumentPageOCRContentSerializer
|
||||||
|
|
||||||
|
def get_queryset(self):
|
||||||
|
return get_object_or_404(
|
||||||
|
klass=DocumentVersion, document_id=self.kwargs['document_id'],
|
||||||
|
pk=self.kwargs['document_version_id']
|
||||||
|
).pages.all()
|
||||||
|
|
||||||
|
@action(
|
||||||
|
detail=True, url_name='content', url_path='ocr'
|
||||||
|
)
|
||||||
|
def ocr_content(self, request, *args, **kwargs):
|
||||||
|
instance = self.get_object()
|
||||||
|
serializer = self.get_serializer(instance)
|
||||||
|
headers = self.get_success_headers(data=serializer.data)
|
||||||
|
return Response(
|
||||||
|
serializer.data, status=status.HTTP_200_OK, headers=headers
|
||||||
|
)
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ from mayan.apps.common.classes import ModelAttribute, ModelField
|
|||||||
from mayan.apps.documents.search import document_search, document_page_search
|
from mayan.apps.documents.search import document_search, document_page_search
|
||||||
from mayan.apps.documents.signals import post_version_upload
|
from mayan.apps.documents.signals import post_version_upload
|
||||||
from mayan.apps.navigation import SourceColumn
|
from mayan.apps.navigation import SourceColumn
|
||||||
|
from mayan.apps.rest_api.fields import HyperlinkField
|
||||||
|
from mayan.apps.rest_api.serializers import LazyExtraFieldsSerializerMixin
|
||||||
from mayan.celery import app
|
from mayan.celery import app
|
||||||
|
|
||||||
from .handlers import (
|
from .handlers import (
|
||||||
@@ -31,8 +33,9 @@ from .links import (
|
|||||||
link_entry_list
|
link_entry_list
|
||||||
)
|
)
|
||||||
from .methods import (
|
from .methods import (
|
||||||
method_document_ocr_submit, method_document_version_ocr_submit,
|
method_document_get_ocr_content, method_document_page_get_ocr_content,
|
||||||
method_get_document_ocr_content, method_get_document_version_ocr_content
|
method_document_ocr_submit, method_document_version_get_ocr_content,
|
||||||
|
method_document_version_ocr_submit
|
||||||
)
|
)
|
||||||
from .permissions import (
|
from .permissions import (
|
||||||
permission_document_type_ocr_setup, permission_ocr_document,
|
permission_document_type_ocr_setup, permission_ocr_document,
|
||||||
@@ -75,19 +78,98 @@ class OCRApp(MayanAppConfig):
|
|||||||
|
|
||||||
Document.add_to_class(
|
Document.add_to_class(
|
||||||
name='get_ocr_content',
|
name='get_ocr_content',
|
||||||
value=method_get_document_ocr_content
|
value=method_document_get_ocr_content
|
||||||
)
|
)
|
||||||
Document.add_to_class(
|
Document.add_to_class(
|
||||||
name='submit_for_ocr', value=method_document_ocr_submit
|
name='submit_for_ocr', value=method_document_ocr_submit
|
||||||
)
|
)
|
||||||
|
DocumentPage.add_to_class(
|
||||||
|
name='get_ocr_content', value=method_document_page_get_ocr_content
|
||||||
|
)
|
||||||
DocumentVersion.add_to_class(
|
DocumentVersion.add_to_class(
|
||||||
name='get_ocr_content',
|
name='get_ocr_content',
|
||||||
value=method_get_document_version_ocr_content
|
value=method_document_version_get_ocr_content
|
||||||
)
|
)
|
||||||
DocumentVersion.add_to_class(
|
DocumentVersion.add_to_class(
|
||||||
name='submit_for_ocr', value=method_document_version_ocr_submit
|
name='submit_for_ocr', value=method_document_version_ocr_submit
|
||||||
)
|
)
|
||||||
|
|
||||||
|
LazyExtraFieldsSerializerMixin.add_field(
|
||||||
|
dotted_path='mayan.apps.documents.serializers.DocumentPageSerializer',
|
||||||
|
field_name='ocr_content_url',
|
||||||
|
field=HyperlinkField(
|
||||||
|
view_kwargs=(
|
||||||
|
{
|
||||||
|
'lookup_field': 'document_version__document_id',
|
||||||
|
'lookup_url_kwarg': 'document_id',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'lookup_field': 'document_version_id',
|
||||||
|
'lookup_url_kwarg': 'document_version_id',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'lookup_field': 'pk',
|
||||||
|
'lookup_url_kwarg': 'document_page_id',
|
||||||
|
}
|
||||||
|
),
|
||||||
|
view_name='rest_api:document_page-ocr-content'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
LazyExtraFieldsSerializerMixin.add_field(
|
||||||
|
dotted_path='mayan.apps.documents.serializers.DocumentSerializer',
|
||||||
|
field_name='ocr_content_url',
|
||||||
|
field=HyperlinkField(
|
||||||
|
lookup_url_kwarg='document_id',
|
||||||
|
view_name='rest_api:document-ocr-content'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
LazyExtraFieldsSerializerMixin.add_field(
|
||||||
|
dotted_path='mayan.apps.documents.serializers.DocumentSerializer',
|
||||||
|
field_name='ocr_submit_url',
|
||||||
|
field=HyperlinkField(
|
||||||
|
lookup_url_kwarg='document_id',
|
||||||
|
view_name='rest_api:document-ocr-submit'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
LazyExtraFieldsSerializerMixin.add_field(
|
||||||
|
dotted_path='mayan.apps.documents.serializers.DocumentVersionSerializer',
|
||||||
|
field_name='ocr_submit_url',
|
||||||
|
field=HyperlinkField(
|
||||||
|
view_kwargs=(
|
||||||
|
{
|
||||||
|
'lookup_field': 'document_id',
|
||||||
|
'lookup_url_kwarg': 'document_id',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'lookup_field': 'pk',
|
||||||
|
'lookup_url_kwarg': 'document_version_id',
|
||||||
|
}
|
||||||
|
),
|
||||||
|
view_name='rest_api:document_version-ocr-submit'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
LazyExtraFieldsSerializerMixin.add_field(
|
||||||
|
dotted_path='mayan.apps.documents.serializers.DocumentVersionSerializer',
|
||||||
|
field_name='ocr_content_url',
|
||||||
|
field=HyperlinkField(
|
||||||
|
view_kwargs=(
|
||||||
|
{
|
||||||
|
'lookup_field': 'document_id',
|
||||||
|
'lookup_url_kwarg': 'document_id',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'lookup_field': 'pk',
|
||||||
|
'lookup_url_kwarg': 'document_version_id',
|
||||||
|
}
|
||||||
|
),
|
||||||
|
view_name='rest_api:document_version-ocr-content'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
ModelAttribute(model=Document, name='get_ocr_content')
|
ModelAttribute(model=Document, name='get_ocr_content')
|
||||||
|
|
||||||
ModelField(
|
ModelField(
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
|
from django.apps import apps
|
||||||
from django.utils.timezone import now
|
from django.utils.timezone import now
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
@@ -9,43 +10,56 @@ from mayan.apps.common.settings import settings_db_sync_task_delay
|
|||||||
|
|
||||||
from .events import event_ocr_document_version_submit
|
from .events import event_ocr_document_version_submit
|
||||||
from .tasks import task_do_ocr
|
from .tasks import task_do_ocr
|
||||||
from .utils import get_document_version_content_iterator
|
|
||||||
|
|
||||||
|
|
||||||
def method_document_ocr_submit(self):
|
def method_document_get_ocr_content(self):
|
||||||
latest_version = self.latest_version
|
|
||||||
# Don't error out if document has no version
|
|
||||||
if latest_version:
|
|
||||||
latest_version.submit_for_ocr()
|
|
||||||
|
|
||||||
|
|
||||||
def method_document_version_ocr_submit(self):
|
|
||||||
event_ocr_document_version_submit.commit(
|
|
||||||
action_object=self.document, target=self
|
|
||||||
)
|
|
||||||
|
|
||||||
task_do_ocr.apply_async(
|
|
||||||
eta=now() + timedelta(seconds=settings_db_sync_task_delay.value),
|
|
||||||
kwargs={'document_version_pk': self.pk},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def method_get_document_ocr_content(self):
|
|
||||||
latest_version = self.latest_version
|
latest_version = self.latest_version
|
||||||
# Don't error out if document has no version
|
# Don't error out if document has no version
|
||||||
if latest_version:
|
if latest_version:
|
||||||
return latest_version.get_ocr_content()
|
return latest_version.get_ocr_content()
|
||||||
|
|
||||||
|
|
||||||
method_get_document_ocr_content.short_description = _(
|
method_document_get_ocr_content.short_description = _(
|
||||||
'get_ocr_content()'
|
'get_ocr_content()'
|
||||||
)
|
)
|
||||||
method_get_document_ocr_content.help_text = _(
|
method_document_get_ocr_content.help_text = _(
|
||||||
'Return the OCR content of the document.'
|
'Return the OCR content of the document.'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def method_get_document_version_ocr_content(self):
|
def method_document_ocr_submit(self, _user=None):
|
||||||
return ' '.join(
|
latest_version = self.latest_version
|
||||||
get_document_version_content_iterator(document_version=self)
|
# Don't error out if document has no version
|
||||||
|
if latest_version:
|
||||||
|
latest_version.submit_for_ocr(_user=_user)
|
||||||
|
|
||||||
|
|
||||||
|
def method_document_page_get_ocr_content(self):
|
||||||
|
DocumentPageOCRContent = apps.get_model(
|
||||||
|
app_label='ocr', model_name='DocumentPageOCRContent'
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
page_content = self.ocr_content.content
|
||||||
|
except DocumentPageOCRContent.DoesNotExist:
|
||||||
|
return ''
|
||||||
|
return page_content
|
||||||
|
|
||||||
|
|
||||||
|
def method_document_version_get_ocr_content(self):
|
||||||
|
result = []
|
||||||
|
for page in self.pages.all():
|
||||||
|
result.append(page.get_ocr_content())
|
||||||
|
|
||||||
|
return ''.join(result)
|
||||||
|
|
||||||
|
|
||||||
|
def method_document_version_ocr_submit(self, _user=None):
|
||||||
|
event_ocr_document_version_submit.commit(
|
||||||
|
action_object=self.document, actor=_user, target=self
|
||||||
|
)
|
||||||
|
|
||||||
|
task_do_ocr.apply_async(
|
||||||
|
eta=now() + timedelta(seconds=settings_db_sync_task_delay.value),
|
||||||
|
kwargs={'document_version_pk': self.pk},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -2,10 +2,20 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
|
|
||||||
from .models import DocumentPageOCRContent
|
|
||||||
|
class DocumentOCRSerializer(serializers.Serializer):
|
||||||
|
text = serializers.CharField(
|
||||||
|
read_only=True, source='get_ocr_content'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DocumentPageOCRContentSerializer(serializers.ModelSerializer):
|
class DocumentPageOCRContentSerializer(serializers.Serializer):
|
||||||
class Meta:
|
text = serializers.CharField(
|
||||||
fields = ('content',)
|
read_only=True, source='get_ocr_content'
|
||||||
model = DocumentPageOCRContent
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentVersionOCRSerializer(serializers.Serializer):
|
||||||
|
text = serializers.CharField(
|
||||||
|
read_only=True, source='get_ocr_content'
|
||||||
|
)
|
||||||
|
|||||||
@@ -9,48 +9,45 @@ from ..permissions import (
|
|||||||
permission_ocr_document, permission_ocr_content_view,
|
permission_ocr_document, permission_ocr_content_view,
|
||||||
)
|
)
|
||||||
|
|
||||||
TEST_DOCUMENT_CONTENT = 'Mayan EDMS Documentation'
|
from .literals import TEST_DOCUMENT_CONTENT
|
||||||
|
|
||||||
|
|
||||||
class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase):
|
class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase):
|
||||||
"""
|
|
||||||
Test the OCR app API endpoints
|
|
||||||
"""
|
|
||||||
def setUp(self):
|
|
||||||
super(OCRAPITestCase, self).setUp()
|
|
||||||
self.login_user()
|
|
||||||
|
|
||||||
def _request_document_ocr_submit_view(self):
|
def _request_document_ocr_submit_view(self):
|
||||||
return self.post(
|
return self.post(
|
||||||
viewname='rest_api:document-ocr-submit-view',
|
viewname='rest_api:document-ocr-submit',
|
||||||
kwargs={'document_id': self.document.pk}
|
kwargs={'document_id': self.document.pk}
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_submit_document_no_access(self):
|
def test_submit_document_no_permission(self):
|
||||||
response = self._request_document_ocr_submit_view()
|
response = self._request_document_ocr_submit_view()
|
||||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
|
||||||
|
|
||||||
self.assertFalse(hasattr(self.document.pages.first(), 'ocr_content'))
|
self.assertFalse(hasattr(self.document.pages.first(), 'ocr_content'))
|
||||||
|
|
||||||
|
#TODO: mock OCR here
|
||||||
def test_submit_document_with_access(self):
|
def test_submit_document_with_access(self):
|
||||||
self.grant_access(
|
self.grant_access(
|
||||||
permission=permission_ocr_document, obj=self.document
|
permission=permission_ocr_document, obj=self.document
|
||||||
)
|
)
|
||||||
response = self._request_document_ocr_submit_view()
|
response = self._request_document_ocr_submit_view()
|
||||||
self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED)
|
self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED)
|
||||||
|
|
||||||
self.assertTrue(hasattr(self.document.pages.first(), 'ocr_content'))
|
self.assertTrue(hasattr(self.document.pages.first(), 'ocr_content'))
|
||||||
|
|
||||||
def _request_document_version_ocr_submit_view(self):
|
def _request_document_version_ocr_submit_view(self):
|
||||||
return self.post(
|
return self.post(
|
||||||
viewname='rest_api:document-version-ocr-submit-view',
|
viewname='rest_api:document_version-ocr-submit',
|
||||||
kwargs={
|
kwargs={
|
||||||
'document_id': self.document.pk,
|
'document_id': self.document.pk,
|
||||||
'document_version_id': self.document.latest_version.pk
|
'document_version_id': self.document.latest_version.pk
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_submit_document_version_no_access(self):
|
def test_submit_document_version_no_permission(self):
|
||||||
response = self._request_document_version_ocr_submit_view()
|
response = self._request_document_version_ocr_submit_view()
|
||||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
|
||||||
|
|
||||||
self.assertFalse(hasattr(self.document.pages.first(), 'ocr_content'))
|
self.assertFalse(hasattr(self.document.pages.first(), 'ocr_content'))
|
||||||
|
|
||||||
def test_submit_document_version_with_access(self):
|
def test_submit_document_version_with_access(self):
|
||||||
@@ -59,29 +56,84 @@ class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase):
|
|||||||
)
|
)
|
||||||
response = self._request_document_version_ocr_submit_view()
|
response = self._request_document_version_ocr_submit_view()
|
||||||
self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED)
|
self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED)
|
||||||
|
|
||||||
self.assertTrue(hasattr(self.document.pages.first(), 'ocr_content'))
|
self.assertTrue(hasattr(self.document.pages.first(), 'ocr_content'))
|
||||||
|
|
||||||
def _request_document_page_content_view(self):
|
def _request_document_content_view(self):
|
||||||
return self.get(
|
return self.get(
|
||||||
viewname='rest_api:document-page-ocr-content-view',
|
viewname='rest_api:document-ocr-content',
|
||||||
kwargs={
|
kwargs={
|
||||||
'document_id': self.document.pk,
|
'document_id': self.test_document.pk,
|
||||||
'document_version_id': self.document.latest_version.pk,
|
|
||||||
'document_page_id': self.document.latest_version.pages.first().pk
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_get_document_version_page_content_no_access(self):
|
def test_get_document_content_no_permission(self):
|
||||||
|
response = self._request_document_content_view()
|
||||||
|
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
|
||||||
|
|
||||||
|
def test_get_document_content_with_access(self):
|
||||||
|
self.document.submit_for_ocr()
|
||||||
|
self.grant_access(
|
||||||
|
permission=permission_ocr_content_view, obj=self.document
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self._request_document_content_view()
|
||||||
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
|
self.assertTrue(
|
||||||
|
TEST_DOCUMENT_CONTENT in response.data['text']
|
||||||
|
)
|
||||||
|
|
||||||
|
def _request_document_page_content_view(self):
|
||||||
|
latest_version = self.test_document.latest_version
|
||||||
|
|
||||||
|
return self.get(
|
||||||
|
viewname='rest_api:document_page-ocr-content',
|
||||||
|
kwargs={
|
||||||
|
'document_id': self.test_document.pk,
|
||||||
|
'document_version_id': latest_version.pk,
|
||||||
|
'document_page_id': latest_version.pages.first().pk
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_get_document_version_page_content_no_permission(self):
|
||||||
response = self._request_document_page_content_view()
|
response = self._request_document_page_content_view()
|
||||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
|
||||||
|
|
||||||
def test_get_document_version_page_content_with_access(self):
|
def test_get_document_version_page_content_with_access(self):
|
||||||
self.document.submit_for_ocr()
|
self.document.submit_for_ocr()
|
||||||
self.grant_access(
|
self.grant_access(
|
||||||
permission=permission_ocr_content_view, obj=self.document
|
permission=permission_ocr_content_view, obj=self.document
|
||||||
)
|
)
|
||||||
|
|
||||||
response = self._request_document_page_content_view()
|
response = self._request_document_page_content_view()
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
TEST_DOCUMENT_CONTENT in response.data['content']
|
TEST_DOCUMENT_CONTENT in response.data['text']
|
||||||
|
)
|
||||||
|
|
||||||
|
def _request_document_version_content_view(self):
|
||||||
|
latest_version = self.test_document.latest_version
|
||||||
|
|
||||||
|
return self.get(
|
||||||
|
viewname='rest_api:document_version-ocr-content',
|
||||||
|
kwargs={
|
||||||
|
'document_id': self.test_document.pk,
|
||||||
|
'document_version_id': latest_version.pk,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_get_document_version_version_content_no_permission(self):
|
||||||
|
response = self._request_document_version_content_view()
|
||||||
|
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
|
||||||
|
|
||||||
|
def test_get_document_version_version_content_with_access(self):
|
||||||
|
self.document.submit_for_ocr()
|
||||||
|
self.grant_access(
|
||||||
|
permission=permission_ocr_content_view, obj=self.document
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self._request_document_version_content_view()
|
||||||
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
|
self.assertTrue(
|
||||||
|
TEST_DOCUMENT_CONTENT in response.data['text']
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -3,8 +3,8 @@ from __future__ import unicode_literals
|
|||||||
from django.conf.urls import url
|
from django.conf.urls import url
|
||||||
|
|
||||||
from .api_views import (
|
from .api_views import (
|
||||||
APIDocumentOCRView, APIDocumentPageOCRContentView,
|
DocumentPageOCRAPIViewSet, DocumentOCRAPIViewSet,
|
||||||
APIDocumentVersionOCRView
|
DocumentVersionOCRAPIViewSet
|
||||||
)
|
)
|
||||||
from .views import (
|
from .views import (
|
||||||
DocumentOCRContentView, DocumentOCRDownloadView,
|
DocumentOCRContentView, DocumentOCRDownloadView,
|
||||||
@@ -55,20 +55,17 @@ urlpatterns = [
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
api_urls = [
|
api_router_entries = (
|
||||||
url(
|
{
|
||||||
regex=r'^documents/(?P<document_id>\d+)/ocr/submit/$',
|
'prefix': r'documents',
|
||||||
name='document-ocr-submit-view',
|
'viewset': DocumentOCRAPIViewSet, 'basename': 'document'
|
||||||
view=APIDocumentOCRView.as_view()
|
},
|
||||||
),
|
{
|
||||||
url(
|
'prefix': r'documents/(?P<document_id>\d+)/document_versions',
|
||||||
regex=r'^documents/(?P<document_id>\d+)/versions/(?P<document_version_id>\d+)/ocr/$',
|
'viewset': DocumentVersionOCRAPIViewSet, 'basename': 'document_version'
|
||||||
name='document-version-ocr-submit-view',
|
},
|
||||||
view=APIDocumentVersionOCRView.as_view()
|
{
|
||||||
),
|
'prefix': r'documents/(?P<document_id>\d+)/document_versions/(?P<document_version_id>\d+)/document_pages',
|
||||||
url(
|
'viewset': DocumentPageOCRAPIViewSet, 'basename': 'document_page-ocr'
|
||||||
regex=r'^documents/(?P<document_id>\d+)/versions/(?P<document_version_id>\d+)/pages/(?P<document_page_id>\d+)/ocr/$',
|
}
|
||||||
name='document-page-ocr-content-view',
|
)
|
||||||
view=APIDocumentPageOCRContentView.as_view()
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from mayan.apps.common.generics import (
|
|||||||
SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView
|
SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView
|
||||||
)
|
)
|
||||||
from mayan.apps.documents.forms import DocumentTypeFilteredSelectForm
|
from mayan.apps.documents.forms import DocumentTypeFilteredSelectForm
|
||||||
|
from mayan.apps.documents.mixins import RecentDocumentMixin
|
||||||
from mayan.apps.documents.models import Document, DocumentPage, DocumentType
|
from mayan.apps.documents.models import Document, DocumentPage, DocumentType
|
||||||
|
|
||||||
from .forms import DocumentPageOCRContentForm, DocumentOCRContentForm
|
from .forms import DocumentPageOCRContentForm, DocumentOCRContentForm
|
||||||
@@ -23,19 +24,12 @@ from .permissions import (
|
|||||||
from .utils import get_document_content_iterator
|
from .utils import get_document_content_iterator
|
||||||
|
|
||||||
|
|
||||||
class DocumentOCRContentView(SingleObjectDetailView):
|
class DocumentOCRContentView(RecentDocumentMixin, SingleObjectDetailView):
|
||||||
form_class = DocumentOCRContentForm
|
form_class = DocumentOCRContentForm
|
||||||
model = Document
|
model = Document
|
||||||
object_permission = permission_ocr_content_view
|
object_permission = permission_ocr_content_view
|
||||||
pk_url_kwarg = 'document_id'
|
pk_url_kwarg = 'document_id'
|
||||||
|
|
||||||
def dispatch(self, request, *args, **kwargs):
|
|
||||||
result = super(DocumentOCRContentView, self).dispatch(
|
|
||||||
request, *args, **kwargs
|
|
||||||
)
|
|
||||||
self.get_object().add_as_recent_document_for_user(user=request.user)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def get_extra_context(self):
|
def get_extra_context(self):
|
||||||
return {
|
return {
|
||||||
'document': self.get_object(),
|
'document': self.get_object(),
|
||||||
@@ -45,17 +39,17 @@ class DocumentOCRContentView(SingleObjectDetailView):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class DocumentOCRDownloadView(SingleObjectDownloadView):
|
class DocumentOCRDownloadView(RecentDocumentMixin, SingleObjectDownloadView):
|
||||||
model = Document
|
model = Document
|
||||||
object_permission = permission_ocr_content_view
|
object_permission = permission_ocr_content_view
|
||||||
pk_url_kwarg = 'document_id'
|
pk_url_kwarg = 'document_id'
|
||||||
|
|
||||||
def get_file(self):
|
def get_file(self):
|
||||||
file_object = DocumentOCRDownloadView.TextIteratorIO(
|
file_object = DocumentOCRDownloadView.TextIteratorIO(
|
||||||
iterator=get_document_content_iterator(document=self.get_object())
|
iterator=get_document_content_iterator(document=self.object)
|
||||||
)
|
)
|
||||||
return DocumentOCRDownloadView.VirtualFile(
|
return DocumentOCRDownloadView.VirtualFile(
|
||||||
file=file_object, name='{}-OCR'.format(self.get_object())
|
file=file_object, name='{}-OCR'.format(self.object)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -78,28 +72,22 @@ class DocumentOCRErrorsListView(SingleObjectListView):
|
|||||||
return self.get_document().latest_version.ocr_errors.all()
|
return self.get_document().latest_version.ocr_errors.all()
|
||||||
|
|
||||||
|
|
||||||
class DocumentPageOCRContentView(SingleObjectDetailView):
|
class DocumentPageOCRContentView(RecentDocumentMixin, SingleObjectDetailView):
|
||||||
form_class = DocumentPageOCRContentForm
|
form_class = DocumentPageOCRContentForm
|
||||||
model = DocumentPage
|
model = DocumentPage
|
||||||
object_permission = permission_ocr_content_view
|
object_permission = permission_ocr_content_view
|
||||||
pk_url_kwarg = 'document_page_id'
|
pk_url_kwarg = 'document_page_id'
|
||||||
|
|
||||||
def dispatch(self, request, *args, **kwargs):
|
|
||||||
result = super(DocumentPageOCRContentView, self).dispatch(
|
|
||||||
request, *args, **kwargs
|
|
||||||
)
|
|
||||||
self.get_object().document.add_as_recent_document_for_user(
|
|
||||||
user=request.user
|
|
||||||
)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def get_extra_context(self):
|
def get_extra_context(self):
|
||||||
return {
|
return {
|
||||||
'hide_labels': True,
|
'hide_labels': True,
|
||||||
'object': self.get_object(),
|
'object': self.object,
|
||||||
'title': _('OCR result for document page: %s') % self.get_object(),
|
'title': _('OCR result for document page: %s') % self.object,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def get_recent_document(self):
|
||||||
|
return self.object.document
|
||||||
|
|
||||||
|
|
||||||
class DocumentSubmitView(MultipleObjectConfirmActionView):
|
class DocumentSubmitView(MultipleObjectConfirmActionView):
|
||||||
model = Document
|
model = Document
|
||||||
|
|||||||
Reference in New Issue
Block a user