Update document parsing app

Update URL parameters to the "_id" forms. Add keyword arguments.
Remove use of is_path in the DOCUMENT_PARSING_PDFTOTEXT_PATH
setting.

Signed-off-by: Roberto Rosario <Roberto.Rosario@mayan-edms.com>
This commit is contained in:
Roberto Rosario
2019-01-21 00:29:28 -04:00
parent fc29309f68
commit c0b34067ef
11 changed files with 75 additions and 71 deletions

View File

@@ -17,7 +17,7 @@ class APIDocumentPageContentView(generics.RetrieveAPIView):
""" """
Returns the content of the selected document page. Returns the content of the selected document page.
""" """
lookup_url_kwarg = 'page_pk' lookup_url_kwarg = 'document_page_id'
mayan_object_permissions = { mayan_object_permissions = {
'GET': (permission_content_view,), 'GET': (permission_content_view,),
} }
@@ -25,11 +25,12 @@ class APIDocumentPageContentView(generics.RetrieveAPIView):
serializer_class = DocumentPageContentSerializer serializer_class = DocumentPageContentSerializer
def get_document(self): def get_document(self):
return get_object_or_404(klass=Document, pk=self.kwargs['document_pk']) return get_object_or_404(klass=Document, pk=self.kwargs['document_id'])
def get_document_version(self): def get_document_version(self):
return get_object_or_404( return get_object_or_404(
klass=self.get_document().versions.all(), pk=self.kwargs['version_pk'] klass=self.get_document().versions.all(),
pk=self.kwargs['document_version_id']
) )
def get_queryset(self): def get_queryset(self):

View File

@@ -5,7 +5,7 @@ from django.utils.translation import ugettext_lazy as _
from mayan.apps.events import EventTypeNamespace from mayan.apps.events import EventTypeNamespace
namespace = EventTypeNamespace( namespace = EventTypeNamespace(
name='document_parsing', label=_('Document parsing') label=_('Document parsing'), name='document_parsing'
) )
event_parsing_document_version_submit = namespace.add_event_type( event_parsing_document_version_submit = namespace.add_event_type(

View File

@@ -5,4 +5,3 @@ class ParserError(Exception):
""" """
Base exception for file parsers Base exception for file parsers
""" """
pass

View File

@@ -16,22 +16,26 @@ from .permissions import (
) )
link_document_content = Link( link_document_content = Link(
args='resolved_object.id', icon_class=icon_document_content, icon_class=icon_document_content,
kwargs={'document_id': 'resolved_object.id'},
permissions=(permission_content_view,), text=_('Content'), permissions=(permission_content_view,), text=_('Content'),
view='document_parsing:document_content', view='document_parsing:document_content',
) )
link_document_page_content = Link( link_document_page_content = Link(
args='resolved_object.id', icon_class=icon_document_content, icon_class=icon_document_content,
kwargs={'document_page_id': 'resolved_object.id'},
permissions=(permission_content_view,), text=_('Content'), permissions=(permission_content_view,), text=_('Content'),
view='document_parsing:document_page_content', view='document_parsing:document_page_content',
) )
link_document_parsing_errors_list = Link( link_document_parsing_errors_list = Link(
args='resolved_object.id', icon_class=icon_document_parsing_errors_list, icon_class=icon_document_parsing_errors_list,
kwargs={'document_id': 'resolved_object.id'},
permissions=(permission_content_view,), text=_('Parsing errors'), permissions=(permission_content_view,), text=_('Parsing errors'),
view='document_parsing:document_parsing_error_list' view='document_parsing:document_parsing_error_list'
) )
link_document_content_download = Link( link_document_content_download = Link(
args='resolved_object.id', icon_class=icon_document_content_download, icon_class=icon_document_content_download,
kwargs={'document_id': 'resolved_object.id'},
permissions=(permission_content_view,), text=_('Download content'), permissions=(permission_content_view,), text=_('Download content'),
view='document_parsing:document_content_download' view='document_parsing:document_content_download'
) )
@@ -40,13 +44,14 @@ link_document_multiple_submit = Link(
view='document_parsing:document_multiple_submit' view='document_parsing:document_multiple_submit'
) )
link_document_submit = Link( link_document_submit = Link(
args='resolved_object.id', icon_class=icon_document_submit, icon_class=icon_document_submit,
kwargs={'document_id': 'resolved_object.id'},
permissions=(permission_parse_document,), permissions=(permission_parse_document,),
text=_('Submit for parsing'), view='document_parsing:document_submit' text=_('Submit for parsing'), view='document_parsing:document_submit'
) )
link_document_type_parsing_settings = Link( link_document_type_parsing_settings = Link(
args='resolved_object.id',
icon_class=icon_document_type_parsing_settings, icon_class=icon_document_type_parsing_settings,
kwargs={'document_type_id': 'resolved_object.id'},
permissions=(permission_document_type_parsing_setup,), permissions=(permission_document_type_parsing_setup,),
text=_('Setup parsing'), text=_('Setup parsing'),
view='document_parsing:document_type_parsing_settings', view='document_parsing:document_type_parsing_settings',

View File

@@ -7,12 +7,12 @@ from mayan.apps.permissions import PermissionNamespace
namespace = PermissionNamespace(label=_('Document parsing'), name='document_parsing') namespace = PermissionNamespace(label=_('Document parsing'), name='document_parsing')
permission_content_view = namespace.add_permission( permission_content_view = namespace.add_permission(
name='content_view', label=_('View the content of a document') label=_('View the content of a document'), name='content_view'
) )
permission_document_type_parsing_setup = namespace.add_permission( permission_document_type_parsing_setup = namespace.add_permission(
name='document_type_setup', label=_('Change document type parsing settings'),
label=_('Change document type parsing settings') name='document_type_setup'
) )
permission_parse_document = namespace.add_permission( permission_parse_document = namespace.add_permission(
name='parse_document', label=_('Parse the content of a document') label=_('Parse the content of a document'), name='parse_document'
) )

View File

@@ -4,8 +4,9 @@ from django.utils.translation import ugettext_lazy as _
from mayan.apps.task_manager.classes import CeleryQueue from mayan.apps.task_manager.classes import CeleryQueue
queue_ocr = CeleryQueue(name='parsing', label=_('Parsing')) queue_ocr = CeleryQueue(label=_('Parsing'), name='parsing')
queue_ocr.add_task_type( queue_ocr.add_task_type(
name='mayan.apps.document_parsing.tasks.task_parse_document_version', label=_('Document version parsing'),
label=_('Document version parsing') name='mayan.apps.document_parsing.tasks.task_parse_document_version'
) )

View File

@@ -6,7 +6,7 @@ from mayan.apps.smart_settings import Namespace
from .literals import DEFAULT_PDFTOTEXT_PATH from .literals import DEFAULT_PDFTOTEXT_PATH
namespace = Namespace(name='document_parsing', label=_('Document parsing')) namespace = Namespace(label=_('Document parsing'), name='document_parsing')
setting_auto_parsing = namespace.add_setting( setting_auto_parsing = namespace.add_setting(
global_name='DOCUMENT_PARSING_AUTO_PARSING', default=True, global_name='DOCUMENT_PARSING_AUTO_PARSING', default=True,
@@ -21,5 +21,4 @@ setting_pdftotext_path = namespace.add_setting(
'File path to poppler\'s pdftotext program used to extract text ' 'File path to poppler\'s pdftotext program used to extract text '
'from PDF files.' 'from PDF files.'
), ),
is_path=True
) )

View File

@@ -16,17 +16,14 @@ TEST_DOCUMENT_CONTENT = 'Sample text'
class DocumentParsingAPITestCase(DocumentTestMixin, BaseAPITestCase): class DocumentParsingAPITestCase(DocumentTestMixin, BaseAPITestCase):
test_document_filename = TEST_HYBRID_DOCUMENT test_document_filename = TEST_HYBRID_DOCUMENT
def setUp(self):
super(DocumentParsingAPITestCase, self).setUp()
self.login_user()
def _request_document_page_content_view(self): def _request_document_page_content_view(self):
return self.get( return self.get(
viewname='rest_api:document-page-content-view', viewname='rest_api:document-page-content-view',
args=( kargs={
self.document.pk, self.document.latest_version.pk, 'document_id': self.document.pk,
self.document.latest_version.pages.first().pk, 'version_id': self.document.latest_version.pk,
) 'page_id': self.document.latest_version.pages.first().pk
}
) )
def test_get_document_version_page_content_no_access(self): def test_get_document_version_page_content_no_access(self):

View File

@@ -22,19 +22,16 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
# Ensure we use a PDF file # Ensure we use a PDF file
test_document_filename = TEST_HYBRID_DOCUMENT test_document_filename = TEST_HYBRID_DOCUMENT
def setUp(self):
super(DocumentContentViewsTestCase, self).setUp()
self.login_user()
def _request_document_content_view(self): def _request_document_content_view(self):
return self.get( return self.get(
'document_parsing:document_content', args=(self.document.pk,) viewname='document_parsing:document_content',
kwargs={'document_id': self.document.pk}
) )
def test_document_content_view_no_permissions(self): def test_document_content_view_no_permissions(self):
response = self._request_document_content_view() response = self._request_document_content_view()
self.assertEqual(response.status_code, 403) self.assertEqual(response.status_code, 404)
def test_document_content_view_with_access(self): def test_document_content_view_with_access(self):
self.grant_access( self.grant_access(
@@ -48,15 +45,15 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
def _request_document_page_content_view(self): def _request_document_page_content_view(self):
return self.get( return self.get(
viewname='document_parsing:document_page_content', args=( viewname='document_parsing:document_page_content', kwargs={
self.document.pages.first().pk, 'document_page_id': self.document.pages.first().pk
) }
) )
def test_document_page_content_view_no_permissions(self): def test_document_page_content_view_no_permissions(self):
response = self._request_document_page_content_view() response = self._request_document_page_content_view()
self.assertEqual(response.status_code, 403) self.assertEqual(response.status_code, 404)
def test_document_page_content_view_with_access(self): def test_document_page_content_view_with_access(self):
self.grant_access( self.grant_access(
@@ -71,12 +68,12 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
def _request_document_content_download_view(self): def _request_document_content_download_view(self):
return self.get( return self.get(
viewname='document_parsing:document_content_download', viewname='document_parsing:document_content_download',
args=(self.document.pk,) kwargs={'document_id': self.document.pk}
) )
def test_document_parsing_download_view_no_permission(self): def test_document_parsing_download_view_no_permission(self):
response = self._request_document_content_download_view() response = self._request_document_content_download_view()
self.assertEqual(response.status_code, 403) self.assertEqual(response.status_code, 404)
def test_download_view_with_access(self): def test_download_view_with_access(self):
self.expected_content_type = 'application/octet-stream; charset=utf-8' self.expected_content_type = 'application/octet-stream; charset=utf-8'
@@ -98,14 +95,10 @@ class DocumentTypeViewsTestCase(GenericDocumentViewTestCase):
# Ensure we use a PDF file # Ensure we use a PDF file
test_document_filename = TEST_HYBRID_DOCUMENT test_document_filename = TEST_HYBRID_DOCUMENT
def setUp(self):
super(DocumentTypeViewsTestCase, self).setUp()
self.login_user()
def _request_document_type_parsing_settings_view(self): def _request_document_type_parsing_settings_view(self):
return self.get( return self.get(
viewname='document_parsing:document_type_parsing_settings', viewname='document_parsing:document_type_parsing_settings',
args=(self.document.document_type.pk,) kwargs={'document_type_id': self.document.document_type.pk}
) )
def test_document_type_parsing_settings_view_no_permission(self): def test_document_type_parsing_settings_view_no_permission(self):

View File

@@ -11,46 +11,50 @@ from .views import (
urlpatterns = [ urlpatterns = [
url( url(
r'^documents/(?P<pk>\d+)/content/$', DocumentContentView.as_view(), regex=r'^documents/(?P<document_id>\d+)/content/$',
name='document_content' name='document_content', view=DocumentContentView.as_view()
), ),
url( url(
r'^documents/pages/(?P<pk>\d+)/content/$', regex=r'^documents/pages/(?P<document_page_id>\d+)/content/$',
DocumentPageContentView.as_view(), name='document_page_content' name='document_page_content', view=DocumentPageContentView.as_view()
), ),
url( url(
r'^documents/(?P<pk>\d+)/content/download/$', regex=r'^documents/(?P<document_id>\d+)/content/download/$',
DocumentContentDownloadView.as_view(), name='document_content_download' name='document_content_download',
view=DocumentContentDownloadView.as_view()
), ),
url( url(
r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(), regex=r'^documents/(?P<document_id>\d+)/submit/$',
name='document_submit' name='document_submit', view=DocumentSubmitView.as_view()
), ),
url( url(
r'^documents/multiple/submit/$', DocumentSubmitView.as_view(), regex=r'^documents/multiple/submit/$', name='document_multiple_submit',
name='document_multiple_submit' view=DocumentSubmitView.as_view()
), ),
url( url(
r'^documents/(?P<pk>\d+)/errors/$', regex=r'^documents/(?P<document_id>\d+)/errors/$',
DocumentParsingErrorsListView.as_view(), name='document_parsing_error_list',
name='document_parsing_error_list' view=DocumentParsingErrorsListView.as_view()
), ),
url( url(
r'^document_types/submit/$', DocumentTypeSubmitView.as_view(), regex=r'^document_types/submit/$', name='document_type_submit',
name='document_type_submit' view=DocumentTypeSubmitView.as_view()
), ),
url( url(
r'^document_types/(?P<pk>\d+)/parsing/settings/$', regex=r'^document_types/(?P<document_type_id>\d+)/parsing/settings/$',
DocumentTypeSettingsEditView.as_view(), name='document_type_parsing_settings',
name='document_type_parsing_settings' view=DocumentTypeSettingsEditView.as_view()
), ),
url(r'^errors/all/$', ParseErrorListView.as_view(), name='error_list'), url(
regex=r'^errors/all/$', name='error_list',
view=ParseErrorListView.as_view()
)
] ]
api_urls = [ api_urls = [
url( url(
r'^documents/(?P<document_pk>\d+)/versions/(?P<version_pk>\d+)/pages/(?P<page_pk>\d+)/content/$', regex=r'^documents/(?P<document_id>\d+)/versions/(?P<document_version_id>\d+)/pages/(?P<document_page_id>\d+)/content/$',
APIDocumentPageContentView.as_view(), view=APIDocumentPageContentView.as_view(),
name='document-page-content-view' name='document-page-content-view'
), )
] ]

View File

@@ -28,6 +28,7 @@ class DocumentContentView(SingleObjectDetailView):
form_class = DocumentContentForm form_class = DocumentContentForm
model = Document model = Document
object_permission = permission_content_view object_permission = permission_content_view
pk_url_kwarg = 'document_id'
def dispatch(self, request, *args, **kwargs): def dispatch(self, request, *args, **kwargs):
result = super(DocumentContentView, self).dispatch( result = super(DocumentContentView, self).dispatch(
@@ -48,6 +49,7 @@ class DocumentContentView(SingleObjectDetailView):
class DocumentContentDownloadView(SingleObjectDownloadView): class DocumentContentDownloadView(SingleObjectDownloadView):
model = Document model = Document
object_permission = permission_content_view object_permission = permission_content_view
pk_url_kwarg = 'document_id'
def get_file(self): def get_file(self):
file_object = DocumentContentDownloadView.TextIteratorIO( file_object = DocumentContentDownloadView.TextIteratorIO(
@@ -62,6 +64,7 @@ class DocumentPageContentView(SingleObjectDetailView):
form_class = DocumentPageContentForm form_class = DocumentPageContentForm
model = DocumentPage model = DocumentPage
object_permission = permission_content_view object_permission = permission_content_view
pk_url_kwarg = 'document_page_id'
def dispatch(self, request, *args, **kwargs): def dispatch(self, request, *args, **kwargs):
result = super(DocumentPageContentView, self).dispatch( result = super(DocumentPageContentView, self).dispatch(
@@ -84,7 +87,7 @@ class DocumentParsingErrorsListView(SingleObjectListView):
view_permission = permission_content_view view_permission = permission_content_view
def get_document(self): def get_document(self):
return get_object_or_404(klass=Document, pk=self.kwargs['pk']) return get_object_or_404(klass=Document, pk=self.kwargs['document_id'])
def get_extra_context(self): def get_extra_context(self):
return { return {
@@ -141,10 +144,12 @@ class DocumentSubmitView(MultipleObjectConfirmActionView):
class DocumentTypeSettingsEditView(SingleObjectEditView): class DocumentTypeSettingsEditView(SingleObjectEditView):
fields = ('auto_parsing',) fields = ('auto_parsing',)
object_permission = permission_document_type_parsing_setup object_permission = permission_document_type_parsing_setup
post_action_redirect = reverse_lazy('documents:document_type_list') post_action_redirect = reverse_lazy(viewname='documents:document_type_list')
def get_document_type(self): def get_document_type(self):
return get_object_or_404(klass=DocumentType, pk=self.kwargs['pk']) return get_object_or_404(
klass=DocumentType, pk=self.kwargs['document_type_id']
)
def get_extra_context(self): def get_extra_context(self):
return { return {
@@ -163,7 +168,7 @@ class DocumentTypeSubmitView(FormView):
'title': _('Submit all documents of a type for parsing') 'title': _('Submit all documents of a type for parsing')
} }
form_class = DocumentTypeFilteredSelectForm form_class = DocumentTypeFilteredSelectForm
post_action_redirect = reverse_lazy('common:tools_list') post_action_redirect = reverse_lazy(viewname='common:tools_list')
def get_form_extra_kwargs(self): def get_form_extra_kwargs(self):
return { return {