Files
mayan-edms/mayan/apps/ocr/views.py
Roberto Rosario 0699ad0556 Add support for new document page structure
Documents now have their own dedicated DocumentPage
submodel. The old DocumentPage is now called DocumentVersionPage.
This allows mappings between document pages and document version
pages, allowing renumbering, appending pages.
DocumentPages have a content_object to map them to any other
object. For now they only map to DocumentVersionPages.
New option added to the version upload form to append the
pages of the new version.
A new view was added to just append new pages with wraps the
new document version upload form and hides the append pages
checkbox set to True.
Add a new action, reset_pages to reset the pages of the
document to those of the latest version.

Missing: appending tests, checks for proper content_object in OCR and
document parsing.

Author: Roberto Rosario <roberto.rosario@mayan-edms.com>
Date:   Thu Oct 11 12:00:25 2019 -0400
2019-10-10 11:55:42 -04:00

250 lines
7.8 KiB
Python

from __future__ import absolute_import, unicode_literals
from django.contrib import messages
from django.http import HttpResponseRedirect
from django.shortcuts import get_object_or_404
from django.urls import reverse, reverse_lazy
from django.utils.translation import ugettext_lazy as _, ungettext
from mayan.apps.common.generics import (
FormView, MultipleObjectConfirmActionView, SingleObjectDetailView,
SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView
)
from mayan.apps.common.mixins import ExternalObjectMixin
from mayan.apps.documents.forms import DocumentTypeFilteredSelectForm
from mayan.apps.documents.models import (
Document, DocumentPage, DocumentType, DocumentVersionPage
)
from .forms import (
DocumentPageOCRContentForm, DocumentOCRContentForm,
DocumentVersionPageOCRContentForm
)
from .models import DocumentVersionPageOCRContent, DocumentVersionOCRError
from .permissions import (
permission_ocr_content_view, permission_ocr_document,
permission_document_type_ocr_setup
)
from .utils import get_document_ocr_content
class DocumentOCRContentDeleteView(MultipleObjectConfirmActionView):
model = Document
object_permission = permission_ocr_document
success_message = 'Deleted OCR content of %(count)d document.'
success_message_plural = 'Deleted OCR content of %(count)d documents.'
def get_extra_context(self):
queryset = self.object_list
result = {
'title': ungettext(
singular='Delete the OCR content of the selected document?',
plural='Delete the OCR content of the selected documents?',
number=queryset.count()
)
}
if queryset.count() == 1:
result['object'] = queryset.first()
return result
def object_action(self, form, instance):
DocumentVersionPageOCRContent.objects.delete_content_for(
document=instance, user=self.request.user
)
class DocumentOCRContentView(SingleObjectDetailView):
form_class = DocumentOCRContentForm
model = Document
object_permission = permission_ocr_content_view
def dispatch(self, request, *args, **kwargs):
result = super(DocumentOCRContentView, self).dispatch(
request, *args, **kwargs
)
self.get_object().add_as_recent_document_for_user(user=request.user)
return result
def get_extra_context(self):
return {
'document': self.get_object(),
'hide_labels': True,
'object': self.get_object(),
'title': _('OCR result for document: %s') % self.get_object(),
}
class DocumentPageOCRContentView(SingleObjectDetailView):
form_class = DocumentPageOCRContentForm
model = DocumentPage
object_permission = permission_ocr_content_view
def dispatch(self, request, *args, **kwargs):
result = super(DocumentPageOCRContentView, self).dispatch(
request, *args, **kwargs
)
self.get_object().document.add_as_recent_document_for_user(
user=request.user
)
return result
def get_extra_context(self):
return {
'hide_labels': True,
'object': self.get_object(),
'title': _('OCR result for document page: %s') % self.get_object(),
}
class DocumentVersionPageOCRContentView(SingleObjectDetailView):
form_class = DocumentVersionPageOCRContentForm
model = DocumentVersionPage
object_permission = permission_ocr_content_view
def dispatch(self, request, *args, **kwargs):
result = super(DocumentVersionPageOCRContentView, self).dispatch(
request, *args, **kwargs
)
self.get_object().document.add_as_recent_document_for_user(
user=request.user
)
return result
def get_extra_context(self):
return {
'hide_labels': True,
'object': self.get_object(),
'title': _(
'OCR result for document version page: %s'
) % self.get_object(),
}
class DocumentSubmitView(MultipleObjectConfirmActionView):
model = Document
object_permission = permission_ocr_document
success_message = '%(count)d document submitted to the OCR queue.'
success_message_plural = '%(count)d documents submitted to the OCR queue.'
def get_extra_context(self):
queryset = self.object_list
result = {
'title': ungettext(
singular='Submit the selected document to the OCR queue?',
plural='Submit the selected documents to the OCR queue?',
number=queryset.count()
)
}
if queryset.count() == 1:
result['object'] = queryset.first()
return result
def object_action(self, form, instance):
instance.submit_for_ocr()
class DocumentTypeSubmitView(FormView):
extra_context = {
'title': _('Submit all documents of a type for OCR')
}
form_class = DocumentTypeFilteredSelectForm
post_action_redirect = reverse_lazy(viewname='common:tools_list')
def form_valid(self, form):
count = 0
for document_type in form.cleaned_data['document_type']:
for document in document_type.documents.all():
document.submit_for_ocr()
count += 1
messages.success(
message=_(
'%(count)d documents added to the OCR queue.'
) % {
'count': count,
}, request=self.request
)
return HttpResponseRedirect(redirect_to=self.get_success_url())
def get_form_extra_kwargs(self):
return {
'allow_multiple': True,
'permission': permission_ocr_document,
'user': self.request.user
}
def get_post_action_redirect(self):
return reverse(viewname='common:tools_list')
class DocumentTypeSettingsEditView(ExternalObjectMixin, SingleObjectEditView):
external_object_class = DocumentType
external_object_permission = permission_document_type_ocr_setup
external_object_pk_url_kwarg = 'pk'
fields = ('auto_ocr',)
post_action_redirect = reverse_lazy(
viewname='documents:document_type_list'
)
def get_document_type(self):
return self.external_object
def get_extra_context(self):
return {
'object': self.get_document_type(),
'title': _(
'Edit OCR settings for document type: %s.'
) % self.get_document_type()
}
def get_object(self, queryset=None):
return self.get_document_type().ocr_settings
class EntryListView(SingleObjectListView):
extra_context = {
'hide_object': True,
'title': _('OCR errors'),
}
view_permission = permission_document_type_ocr_setup
def get_source_queryset(self):
return DocumentVersionOCRError.objects.all()
class DocumentOCRErrorsListView(SingleObjectListView):
object_permission = permission_ocr_document
def get_document(self):
return get_object_or_404(klass=Document, pk=self.kwargs['pk'])
def get_extra_context(self):
return {
'hide_object': True,
'object': self.get_document(),
'title': _('OCR errors for document: %s') % self.get_document(),
}
def get_source_queryset(self):
return self.get_document().latest_version.ocr_errors.all()
class DocumentOCRDownloadView(SingleObjectDownloadView):
model = Document
object_permission = permission_ocr_content_view
def get_file(self):
file_object = DocumentOCRDownloadView.TextIteratorIO(
iterator=get_document_ocr_content(document=self.get_object())
)
return DocumentOCRDownloadView.VirtualFile(
file=file_object, name='{}-OCR'.format(self.get_object())
)