Update OCR app

Add keyword arguments. Update URL parameters to the "_id" form.
Updated view tests.

Signed-off-by: Roberto Rosario <Roberto.Rosario.Gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2019-01-21 19:24:00 -04:00
parent 50333d1326
commit 83a9b5a60a
15 changed files with 119 additions and 80 deletions

View File

@@ -38,7 +38,7 @@ class APIDocumentVersionOCRView(generics.GenericAPIView):
"""
post: Submit a document version for OCR.
"""
lookup_url_kwarg = 'version_pk'
lookup_url_kwarg = 'document_version_pk'
mayan_object_permissions = {
'POST': (permission_ocr_document,)
}
@@ -66,7 +66,7 @@ class APIDocumentPageOCRContentView(generics.RetrieveAPIView):
"""
get: Returns the OCR content of the selected document page.
"""
lookup_url_kwarg = 'page_pk'
lookup_url_kwarg = 'document_page_pk'
mayan_object_permissions = {
'GET': (permission_ocr_content_view,),
}
@@ -78,7 +78,7 @@ class APIDocumentPageOCRContentView(generics.RetrieveAPIView):
def get_document_version(self):
return get_object_or_404(
klass=self.get_document().versions.all(), pk=self.kwargs['version_pk']
klass=self.get_document().versions.all(), pk=self.kwargs['document_version_pk']
)
def get_queryset(self):

View File

@@ -4,7 +4,7 @@ from django.utils.translation import ugettext_lazy as _
from mayan.apps.events import EventTypeNamespace
namespace = EventTypeNamespace(name='ocr', label=_('OCR'))
namespace = EventTypeNamespace(label=_('OCR'), name='ocr')
event_ocr_document_version_submit = namespace.add_event_type(
label=_('Document version submitted for OCR'),

View File

@@ -3,6 +3,6 @@ from __future__ import unicode_literals
class OCRError(Exception):
"""
Raised by the OCR backend
Raised by the OCR backend for unexpected events that stop the
OCR processing.
"""
pass

View File

@@ -16,17 +16,20 @@ from .permissions import (
)
link_document_page_ocr_content = Link(
args='resolved_object.id', icon_class=icon_document_content,
icon_class=icon_document_content,
kwargs={'document_page_id': 'resolved_object.id'},
permissions=(permission_ocr_content_view,), text=_('OCR'),
view='ocr:document_page_content',
)
link_document_ocr_content = Link(
args='resolved_object.id', icon_class=icon_document_content,
icon_class=icon_document_content,
kwargs={'document_id': 'resolved_object.id'},
permissions=(permission_ocr_content_view,), text=_('OCR'),
view='ocr:document_content',
)
link_document_submit = Link(
args='resolved_object.id', icon_class=icon_document_submit,
icon_class=icon_document_submit,
kwargs={'document_id': 'resolved_object.id'},
permissions=(permission_ocr_document,), text=_('Submit for OCR'),
view='ocr:document_submit'
)
@@ -35,7 +38,8 @@ link_document_multiple_submit = Link(
view='ocr:document_multiple_submit'
)
link_document_type_ocr_settings = Link(
args='resolved_object.id', icon_class=icon_document_type_ocr_settings,
icon_class=icon_document_type_ocr_settings,
kwargs={'document_type_id': 'resolved_object.id'},
permissions=(permission_document_type_ocr_setup,), text=_('Setup OCR'),
view='ocr:document_type_settings',
)
@@ -49,12 +53,14 @@ link_entry_list = Link(
text=_('OCR errors'), view='ocr:entry_list'
)
link_document_ocr_errors_list = Link(
args='resolved_object.id', icon_class=icon_document_ocr_errors_list,
icon_class=icon_document_ocr_errors_list,
kwargs={'document_id': 'resolved_object.id'},
permissions=(permission_ocr_content_view,), text=_('OCR errors'),
view='ocr:document_error_list'
)
link_document_ocr_download = Link(
args='resolved_object.id', icon_class=icon_document_ocr_download,
icon_class=icon_document_ocr_download,
kwargs={'document_id': 'resolved_object.id'},
permissions=(permission_ocr_content_view,), text=_('Download OCR text'),
view='ocr:document_download'
)

View File

@@ -7,13 +7,13 @@ from mayan.apps.permissions import PermissionNamespace
namespace = PermissionNamespace(label=_('OCR'), name='ocr')
permission_ocr_document = namespace.add_permission(
name='ocr_document', label=_('Submit documents for OCR')
label=_('Submit documents for OCR'), name='ocr_document'
)
permission_ocr_content_view = namespace.add_permission(
name='ocr_content_view',
label=_('View the transcribed text from document')
label=_('View the transcribed text from document'),
name='ocr_content_view'
)
permission_document_type_ocr_setup = namespace.add_permission(
name='ocr_document_type_setup',
label=_('Change document type OCR settings')
label=_('Change document type OCR settings'),
name='ocr_document_type_setup'
)

View File

@@ -4,7 +4,8 @@ from django.utils.translation import ugettext_lazy as _
from mayan.apps.task_manager.classes import CeleryQueue
queue_ocr = CeleryQueue(name='ocr', label=_('OCR'))
queue_ocr = CeleryQueue(label=_('OCR'), name='ocr')
queue_ocr.add_task_type(
name='mayan.apps.ocr.tasks.task_do_ocr', label=_('Document version OCR')
label=_('Document version OCR'), name='mayan.apps.ocr.tasks.task_do_ocr'
)

View File

@@ -5,5 +5,5 @@ from django.utils.module_loading import import_string
from .settings import setting_ocr_backend, setting_ocr_backend_arguments
ocr_backend = import_string(
setting_ocr_backend.value
dotted_path=setting_ocr_backend.value
)(**setting_ocr_backend_arguments.value)

View File

@@ -4,7 +4,7 @@ from django.utils.translation import ugettext_lazy as _
from mayan.apps.smart_settings import Namespace
namespace = Namespace(name='ocr', label=_('OCR'))
namespace = Namespace(label=_('OCR'), name='ocr')
setting_ocr_backend = namespace.add_setting(
global_name='OCR_BACKEND', default='mayan.apps.ocr.backends.pyocr.PyOCR',

View File

@@ -28,7 +28,7 @@ def task_do_ocr(self, document_version_pk):
logger.debug('trying to acquire lock: %s', lock_id)
# Acquire lock to avoid doing OCR on the same document version more
# than once concurrently
lock = locking_backend.acquire_lock(lock_id, LOCK_EXPIRE)
lock = locking_backend.acquire_lock(name=lock_id, timeout=LOCK_EXPIRE)
logger.debug('acquired lock: %s', lock_id)
document_version = None
try:

View File

@@ -1,4 +1,9 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
TEST_DOCUMENT_CONTENT = 'Mayan EDMS Documentation'
TEST_DOCUMENT_CONTENT_DEU_1 = 'Repository für elektronische Dokumente.'
TEST_DOCUMENT_CONTENT_DEU_2 = 'Es bietet einen'
TEST_OCR_INDEX_NODE_TEMPLATE = '{% if "mayan" in document.get_ocr_content().lower() %}mayan{% endif %}'
TEST_OCR_INDEX_NODE_TEMPLATE_LEVEL = 'mayan'

View File

@@ -23,7 +23,7 @@ class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase):
def _request_document_ocr_submit_view(self):
return self.post(
viewname='rest_api:document-ocr-submit-view',
args=(self.document.pk,)
kwargs={'document_id': self.document.pk}
)
def test_submit_document_no_access(self):
@@ -42,7 +42,10 @@ class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase):
def _request_document_version_ocr_submit_view(self):
return self.post(
viewname='rest_api:document-version-ocr-submit-view',
args=(self.document.pk, self.document.latest_version.pk,)
kwargs={
'document_id': self.document.pk,
'document_version_id': self.document.latest_version.pk
}
)
def test_submit_document_version_no_access(self):
@@ -61,10 +64,11 @@ class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase):
def _request_document_page_content_view(self):
return self.get(
viewname='rest_api:document-page-ocr-content-view',
args=(
self.document.pk, self.document.latest_version.pk,
self.document.latest_version.pages.first().pk,
)
kwargs={
'document_id': self.document.pk,
'document_version_id': self.document.latest_version.pk,
'document_page_id': self.document.latest_version.pages.first().pk
}
)
def test_get_document_version_page_content_no_access(self):

View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.test import override_settings
@@ -11,9 +9,10 @@ from mayan.apps.documents.tests import (
DocumentTestMixin, TEST_DEU_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL
)
TEST_DOCUMENT_CONTENT = 'Mayan EDMS Documentation'
TEST_DOCUMENT_CONTENT_DEU_1 = 'Repository für elektronische Dokumente.'
TEST_DOCUMENT_CONTENT_DEU_2 = 'Es bietet einen'
from .literals import (
TEST_DOCUMENT_CONTENT, TEST_DOCUMENT_CONTENT_DEU_1,
TEST_DOCUMENT_CONTENT_DEU_2
)
@override_settings(OCR_AUTO_OCR=True)

View File

@@ -7,7 +7,7 @@ from ..permissions import (
permission_ocr_document,
)
TEST_DOCUMENT_CONTENT = 'Mayan EDMS Documentation'
from .literals import TEST_DOCUMENT_CONTENT
class OCRViewsTestCase(GenericDocumentViewTestCase):
@@ -22,14 +22,14 @@ class OCRViewsTestCase(GenericDocumentViewTestCase):
def _request_document_content_view(self):
return self.get(
viewname='ocr:document_content',
kwargs={'pk': self.document.pk}
kwargs={'document_id': self.document.pk}
)
def test_document_content_view_no_permissions(self):
self.document.submit_for_ocr()
response = self._request_document_content_view()
self.assertEqual(response.status_code, 403)
self.assertEqual(response.status_code, 404)
def test_document_content_view_with_access(self):
self.document.submit_for_ocr()
@@ -46,14 +46,14 @@ class OCRViewsTestCase(GenericDocumentViewTestCase):
def _request_document_page_content_view(self):
return self.get(
viewname='ocr:document_page_content',
kwargs={'pk': self.document.pages.first().pk}
kwargs={'document_page_id': self.document.pages.first().pk}
)
def test_document_page_content_view_no_permissions(self):
self.document.submit_for_ocr()
response = self._request_document_page_content_view()
self.assertEqual(response.status_code, 403)
self.assertEqual(response.status_code, 404)
def test_document_page_content_view_with_access(self):
self.document.submit_for_ocr()
@@ -70,7 +70,7 @@ class OCRViewsTestCase(GenericDocumentViewTestCase):
def _request_document_submit_view(self):
return self.post(
viewname='ocr:document_submit',
kwargs={'pk': self.document.pk}
kwargs={'document_id': self.document.pk}
)
def test_document_submit_view_no_permission(self):
@@ -110,13 +110,13 @@ class OCRViewsTestCase(GenericDocumentViewTestCase):
def _request_document_ocr_download_view(self):
return self.get(
viewname='ocr:document_download',
kwargs={'pk': self.document.pk}
kwargs={'document_id': self.document.pk}
)
def test_document_ocr_download_view_no_permission(self):
self.document.submit_for_ocr()
response = self._request_document_ocr_download_view()
self.assertEqual(response.status_code, 403)
self.assertEqual(response.status_code, 404)
def test_document_ocr_download_view_with_permission(self):
self.document.submit_for_ocr()
@@ -144,12 +144,12 @@ class DocumentTypeViewsTestCase(GenericDocumentViewTestCase):
def _request_document_type_ocr_settings_view(self):
return self.get(
viewname='ocr:document_type_settings',
kwargs={'pk': self.document.document_type.pk}
kwargs={'document_type_id': self.document.document_type.pk}
)
def test_document_type_ocr_settings_view_no_permission(self):
response = self._request_document_type_ocr_settings_view()
self.assertEqual(response.status_code, 403)
self.assertEqual(response.status_code, 404)
def test_document_type_ocr_settings_view_with_access(self):
self.grant_access(

View File

@@ -14,54 +14,61 @@ from .views import (
urlpatterns = [
url(
r'^documents/pages/(?P<pk>\d+)/content/$',
DocumentPageOCRContentView.as_view(), name='document_page_content'
regex=r'^document_types/ocr/submit/$', name='document_type_submit',
view=DocumentTypeSubmitView.as_view()
),
url(
r'^documents/(?P<pk>\d+)/content/$', DocumentOCRContentView.as_view(),
name='document_content'
regex=r'^document_types/(?P<document_type_id>\d+)/ocr/settings/$',
name='document_type_settings',
view=DocumentTypeSettingsEditView.as_view()
),
url(
r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(),
name='document_submit'
regex=r'^documents/(?P<document_id>\d+)/ocr/content/$',
name='document_content', view=DocumentOCRContentView.as_view()
),
url(
r'^documents/(?P<pk>\d+)/ocr/errors/$',
DocumentOCRErrorsListView.as_view(), name='document_error_list'
regex=r'^documents/(?P<document_id>\d+)/ocr/download/$',
name='document_download', view=DocumentOCRDownloadView.as_view()
),
url(
r'^documents/(?P<pk>\d+)/ocr/download/$',
DocumentOCRDownloadView.as_view(), name='document_download'
regex=r'^documents/(?P<document_id>\d+)/ocr/errors/$',
name='document_error_list',
view=DocumentOCRErrorsListView.as_view()
),
url(
r'^documents/multiple/submit/$', DocumentSubmitView.as_view(),
name='document_multiple_submit'
regex=r'^documents/(?P<document_id>\d+)/ocr/submit/$',
name='document_submit', view=DocumentSubmitView.as_view()
),
url(
r'^document_types/submit/$', DocumentTypeSubmitView.as_view(),
name='document_type_submit'
regex=r'^documents/multiple/ocr/submit/$',
name='document_multiple_submit',
view=DocumentSubmitView.as_view()
),
url(
r'^document_types/(?P<pk>\d+)/ocr/settings/$',
DocumentTypeSettingsEditView.as_view(),
name='document_type_settings'
regex=r'^documents/pages/(?P<document_page_id>\d+)/ocr/content/$',
name='document_page_content',
view=DocumentPageOCRContentView.as_view()
),
url(r'^errors/$', EntryListView.as_view(), name='entry_list'),
url(
regex=r'^errors/$', name='entry_list',
view=EntryListView.as_view()
)
]
api_urls = [
url(
r'^documents/(?P<pk>\d+)/submit/$', APIDocumentOCRView.as_view(),
name='document-ocr-submit-view'
regex=r'^documents/(?P<document_id>\d+)/ocr/submit/$',
name='document-ocr-submit-view',
view=APIDocumentOCRView.as_view()
),
url(
r'^documents/(?P<document_pk>\d+)/versions/(?P<version_pk>\d+)/ocr/$',
APIDocumentVersionOCRView.as_view(),
name='document-version-ocr-submit-view'
regex=r'^documents/(?P<document_id>\d+)/versions/(?P<document_version_id>\d+)/ocr/$',
name='document-version-ocr-submit-view',
view=APIDocumentVersionOCRView.as_view()
),
url(
r'^documents/(?P<document_pk>\d+)/versions/(?P<version_pk>\d+)/pages/(?P<page_pk>\d+)/ocr/$',
APIDocumentPageOCRContentView.as_view(),
name='document-page-ocr-content-view'
),
regex=r'^documents/(?P<document_id>\d+)/versions/(?P<document_version_id>\d+)/pages/(?P<document_page_id>\d+)/ocr/$',
name='document-page-ocr-content-view',
view=APIDocumentPageOCRContentView.as_view()
)
]

View File

@@ -6,6 +6,7 @@ from django.shortcuts import get_object_or_404
from django.urls import reverse_lazy
from django.utils.translation import ugettext_lazy as _, ungettext
from mayan.apps.acls.models import AccessControlList
from mayan.apps.common.generics import (
FormView, MultipleObjectConfirmActionView, SingleObjectDetailView,
SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView
@@ -26,6 +27,7 @@ class DocumentOCRContentView(SingleObjectDetailView):
form_class = DocumentOCRContentForm
model = Document
object_permission = permission_ocr_content_view
pk_url_kwarg = 'document_id'
def dispatch(self, request, *args, **kwargs):
result = super(DocumentOCRContentView, self).dispatch(
@@ -46,6 +48,7 @@ class DocumentOCRContentView(SingleObjectDetailView):
class DocumentOCRDownloadView(SingleObjectDownloadView):
model = Document
object_permission = permission_ocr_content_view
pk_url_kwarg = 'document_id'
def get_file(self):
file_object = DocumentOCRDownloadView.TextIteratorIO(
@@ -60,7 +63,9 @@ class DocumentOCRErrorsListView(SingleObjectListView):
object_permission = permission_ocr_document
def get_document(self):
return get_object_or_404(klass=Document, pk=self.kwargs['pk'])
return get_object_or_404(
klass=Document, pk=self.kwargs['document_id']
)
def get_extra_context(self):
return {
@@ -77,6 +82,7 @@ class DocumentPageOCRContentView(SingleObjectDetailView):
form_class = DocumentPageOCRContentForm
model = DocumentPage
object_permission = permission_ocr_content_view
pk_url_kwarg = 'document_page_id'
def dispatch(self, request, *args, **kwargs):
result = super(DocumentPageOCRContentView, self).dispatch(
@@ -98,6 +104,7 @@ class DocumentPageOCRContentView(SingleObjectDetailView):
class DocumentSubmitView(MultipleObjectConfirmActionView):
model = Document
object_permission = permission_ocr_document
pk_url_kwarg = 'document_id'
success_message = '%(count)d document submitted to the OCR queue.'
success_message_plural = '%(count)d documents submitted to the OCR queue.'
@@ -106,9 +113,9 @@ class DocumentSubmitView(MultipleObjectConfirmActionView):
result = {
'title': ungettext(
'Submit the selected document to the OCR queue?',
'Submit the selected documents to the OCR queue?',
queryset.count()
singular='Submit the selected document to the OCR queue?',
plural='Submit the selected documents to the OCR queue?',
number=queryset.count()
)
}
@@ -121,10 +128,20 @@ class DocumentSubmitView(MultipleObjectConfirmActionView):
class DocumentTypeSettingsEditView(SingleObjectEditView):
fields = ('auto_ocr',)
object_permission = permission_document_type_ocr_setup
post_action_redirect = reverse_lazy('documents:document_type_list')
post_action_redirect = reverse_lazy(
viewname='documents:document_type_list'
)
def get_document_type(self):
return get_object_or_404(klass=DocumentType, pk=self.kwargs['pk'])
queryset = AccessControlList.objects.restrict_queryset(
permission=permission_document_type_ocr_setup,
queryset=DocumentType.objects.all(),
user=self.request.user
)
return get_object_or_404(
klass=queryset, pk=self.kwargs['document_type_id']
)
def get_extra_context(self):
return {
@@ -143,7 +160,7 @@ class DocumentTypeSubmitView(FormView):
'title': _('Submit all documents of a type for OCR')
}
form_class = DocumentTypeFilteredSelectForm
post_action_redirect = reverse_lazy('common:tools_list')
post_action_redirect = reverse_lazy(viewname='common:tools_list')
def get_form_extra_kwargs(self):
return {
@@ -160,14 +177,14 @@ class DocumentTypeSubmitView(FormView):
count += 1
messages.success(
self.request, _(
message=_(
'%(count)d documents added to the OCR queue.'
) % {
'count': count,
}
}, request=self.request
)
return HttpResponseRedirect(self.get_success_url())
return HttpResponseRedirect(redirect_to=self.get_success_url())
class EntryListView(SingleObjectListView):