Finish the document parsing app.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
2017-08-24 03:54:07 -04:00
parent e9591c92f9
commit a7eaf6b368
25 changed files with 423 additions and 639 deletions
--- a/mayan/apps/document_parsing/views.py
+++ b/mayan/apps/document_parsing/views.py
@@ -4,137 +4,27 @@ from django.contrib import messages
 from django.http import HttpResponseRedirect
 from django.shortcuts import get_object_or_404
 from django.urls import reverse
-from django.utils.translation import ugettext_lazy as _
+from django.utils.translation import ugettext_lazy as _, ungettext

-from acls.models import AccessControlList
 from common.generics import (
-    ConfirmView, FormView, SingleObjectDetailView, SingleObjectDownloadView,
-    SingleObjectEditView, SingleObjectListView
+    FormView, MultipleObjectConfirmActionView, SingleObjectDetailView,
+    SingleObjectDownloadView, SingleObjectListView
 )
-from common.mixins import MultipleInstanceActionMixin
-from documents.models import Document, DocumentType
+from documents.models import Document

 from .forms import DocumentContentForm, DocumentTypeSelectForm
-from .models import DocumentVersionOCRError
-from .permissions import (
-    permission_ocr_content_view, permission_ocr_document,
-    permission_document_type_ocr_setup
-)
-from .utils import get_document_ocr_content
+from .models import DocumentVersionParseError
+from .permissions import permission_content_view, permission_parse_document
+from .utils import get_document_content


-class DocumentAllSubmitView(ConfirmView):
-    extra_context = {'title': _('Submit all documents for OCR?')}
-
-    def get_post_action_redirect(self):
-        return reverse('common:tools_list')
-
-    def view_action(self):
-        count = 0
-        for document in Document.objects.all():
-            document.submit_for_ocr()
-            count += 1
-
-        messages.success(
-            self.request, _('%d documents added to the OCR queue.') % count
-        )
-
-
-class DocumentSubmitView(ConfirmView):
-    def get_extra_context(self):
-        return {
-            'object': self.get_object(),
-            'title': _('Submit "%s" to the OCR queue?') % self.get_object()
-        }
-
-    def get_object(self):
-        return Document.objects.get(pk=self.kwargs['pk'])
-
-    def object_action(self, instance):
-        AccessControlList.objects.check_access(
-            permissions=permission_ocr_document, user=self.request.user,
-            obj=instance
-        )
-
-        instance.submit_for_ocr()
-
-    def view_action(self):
-        instance = self.get_object()
-
-        self.object_action(instance=instance)
-
-        messages.success(
-            self.request,
-            _('Document: %(document)s was added to the OCR queue.') % {
-                'document': instance
-            }
-        )
-
-
-class DocumentSubmitManyView(MultipleInstanceActionMixin, DocumentSubmitView):
-    model = Document
-    success_message = '%(count)d document submitted to the OCR queue.'
-    success_message_plural = '%(count)d documents submitted to the OCR queue.'
-
-    def get_extra_context(self):
-        # Override the base class method
-        return {
-            'title': _('Submit the selected documents to the OCR queue?')
-        }
-
-
-class DocumentTypeSubmitView(FormView):
-    form_class = DocumentTypeSelectForm
-    extra_context = {
-        'title': _('Submit all documents of a type for OCR')
-    }
-
-    def get_post_action_redirect(self):
-        return reverse('common:tools_list')
-
-    def form_valid(self, form):
-        count = 0
-        for document in form.cleaned_data['document_type'].documents.all():
-            document.submit_for_ocr()
-            count += 1
-
-        messages.success(
-            self.request, _(
-                '%(count)d documents of type "%(document_type)s" added to the '
-                'OCR queue.'
-            ) % {
-                'count': count,
-                'document_type': form.cleaned_data['document_type']
-            }
-        )
-
-        return HttpResponseRedirect(self.get_success_url())
-
-
-class DocumentTypeSettingsEditView(SingleObjectEditView):
-    fields = ('auto_ocr',)
-    view_permission = permission_document_type_ocr_setup
-
-    def get_object(self, queryset=None):
-        return get_object_or_404(
-            DocumentType, pk=self.kwargs['pk']
-        ).ocr_settings
-
-    def get_extra_context(self):
-        return {
-            'title': _(
-                'Edit OCR settings for document type: %s'
-            ) % self.get_object().document_type
-        }
-
-
-class DocumentOCRContent(SingleObjectDetailView):
+class DocumentContentView(SingleObjectDetailView):
    form_class = DocumentContentForm
    model = Document
-    object_permission = permission_ocr_content_view
+    object_permission = permission_content_view

    def dispatch(self, request, *args, **kwargs):
-        result = super(DocumentOCRContent, self).dispatch(
+        result = super(DocumentContentView, self).dispatch(
            request, *args, **kwargs
        )
        self.get_object().add_as_recent_document_for_user(request.user)
@@ -145,23 +35,25 @@ class DocumentOCRContent(SingleObjectDetailView):
            'document': self.get_object(),
            'hide_labels': True,
            'object': self.get_object(),
-            'title': _('OCR result for document: %s') % self.get_object(),
+            'title': _('Content for document: %s') % self.get_object(),
        }


-class EntryListView(SingleObjectListView):
-    extra_context = {
-        'hide_object': True,
-        'title': _('OCR errors'),
-    }
-    view_permission = permission_ocr_document
+class DocumentContentDownloadView(SingleObjectDownloadView):
+    model = Document
+    object_permission = permission_content_view

-    def get_object_list(self):
-        return DocumentVersionOCRError.objects.all()
+    def get_file(self):
+        file_object = DocumentContentDownloadView.TextIteratorIO(
+            iterator=get_document_content(document=self.get_object())
+        )
+        return DocumentContentDownloadView.VirtualFile(
+            file=file_object, name='{}-content'.format(self.get_object())
+        )


-class DocumentOCRErrorsListView(SingleObjectListView):
-    view_permission = permission_ocr_document
+class DocumentParsingErrorsListView(SingleObjectListView):
+    view_permission = permission_content_view

    def get_document(self):
        return get_object_or_404(Document, pk=self.kwargs['pk'])
@@ -170,21 +62,93 @@ class DocumentOCRErrorsListView(SingleObjectListView):
        return {
            'hide_object': True,
            'object': self.get_document(),
-            'title': _('OCR errors for document: %s') % self.get_document(),
+            'title': _(
+                'Parsing errors for document: %s'
+            ) % self.get_document(),
        }

    def get_object_list(self):
-        return self.get_document().latest_version.ocr_errors.all()
+        return self.get_document().latest_version.parsing_errors.all()


-class DocumentOCRDownloadView(SingleObjectDownloadView):
+class DocumentSubmitView(MultipleObjectConfirmActionView):
    model = Document
-    object_permission = permission_ocr_content_view
+    object_permission = permission_parse_document
+    success_message = _(
+        '%(count)d document added to the parsing queue'
+    )
+    success_message_plural = _(
+        '%(count)d documents added to the parsing queue'
+    )

-    def get_file(self):
-        file_object = DocumentOCRDownloadView.TextIteratorIO(
-            iterator=get_document_ocr_content(document=self.get_object())
-        )
-        return DocumentOCRDownloadView.VirtualFile(
-            file=file_object, name='{}-OCR'.format(self.get_object())
+    def get_extra_context(self):
+        queryset = self.get_queryset()
+
+        result = {
+            'title': ungettext(
+                singular='Submit %(count)d document to the parsing queue?',
+                plural='Submit %(count)d documents to the parsing queue',
+                number=queryset.count()
+            ) % {
+                'count': queryset.count(),
+            }
+        }
+
+        if queryset.count() == 1:
+            result.update(
+                {
+                    'object': queryset.first(),
+                    'title': _(
+                        'Submit document "%s" to the parsing queue'
+                    ) % queryset.first()
+                }
+            )
+
+        return result
+
+    def object_action(self, instance, form=None):
+        instance.submit_for_parsing()
+
+
+class DocumentTypeSubmitView(FormView):
+    form_class = DocumentTypeSelectForm
+    extra_context = {
+        'title': _('Submit all documents of a type for parsing')
+    }
+
+    def get_form_extra_kwargs(self):
+        return {
+            'user': self.request.user
+        }
+
+    def get_post_action_redirect(self):
+        return reverse('common:tools_list')
+
+    def form_valid(self, form):
+        count = 0
+        for document in form.cleaned_data['document_type'].documents.all():
+            document.submit_for_parsing()
+            count += 1
+
+        messages.success(
+            self.request, _(
+                '%(count)d documents of type "%(document_type)s" added to the '
+                'parsing queue.'
+            ) % {
+                'count': count,
+                'document_type': form.cleaned_data['document_type']
+            }
        )
+
+        return HttpResponseRedirect(self.get_success_url())
+
+
+class ParseErrorListView(SingleObjectListView):
+    extra_context = {
+        'hide_object': True,
+        'title': _('Parsing errors'),
+    }
+    view_permission = permission_content_view
+
+    def get_object_list(self):
+        return DocumentVersionParseError.objects.all()