Update document parsing app
Update URL parameters to the "_id" forms. Add keyword arguments. Remove use of is_path in the DOCUMENT_PARSING_PDFTOTEXT_PATH setting. Signed-off-by: Roberto Rosario <Roberto.Rosario@mayan-edms.com>
This commit is contained in:
@@ -17,7 +17,7 @@ class APIDocumentPageContentView(generics.RetrieveAPIView):
|
|||||||
"""
|
"""
|
||||||
Returns the content of the selected document page.
|
Returns the content of the selected document page.
|
||||||
"""
|
"""
|
||||||
lookup_url_kwarg = 'page_pk'
|
lookup_url_kwarg = 'document_page_id'
|
||||||
mayan_object_permissions = {
|
mayan_object_permissions = {
|
||||||
'GET': (permission_content_view,),
|
'GET': (permission_content_view,),
|
||||||
}
|
}
|
||||||
@@ -25,11 +25,12 @@ class APIDocumentPageContentView(generics.RetrieveAPIView):
|
|||||||
serializer_class = DocumentPageContentSerializer
|
serializer_class = DocumentPageContentSerializer
|
||||||
|
|
||||||
def get_document(self):
|
def get_document(self):
|
||||||
return get_object_or_404(klass=Document, pk=self.kwargs['document_pk'])
|
return get_object_or_404(klass=Document, pk=self.kwargs['document_id'])
|
||||||
|
|
||||||
def get_document_version(self):
|
def get_document_version(self):
|
||||||
return get_object_or_404(
|
return get_object_or_404(
|
||||||
klass=self.get_document().versions.all(), pk=self.kwargs['version_pk']
|
klass=self.get_document().versions.all(),
|
||||||
|
pk=self.kwargs['document_version_id']
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from django.utils.translation import ugettext_lazy as _
|
|||||||
from mayan.apps.events import EventTypeNamespace
|
from mayan.apps.events import EventTypeNamespace
|
||||||
|
|
||||||
namespace = EventTypeNamespace(
|
namespace = EventTypeNamespace(
|
||||||
name='document_parsing', label=_('Document parsing')
|
label=_('Document parsing'), name='document_parsing'
|
||||||
)
|
)
|
||||||
|
|
||||||
event_parsing_document_version_submit = namespace.add_event_type(
|
event_parsing_document_version_submit = namespace.add_event_type(
|
||||||
|
|||||||
@@ -5,4 +5,3 @@ class ParserError(Exception):
|
|||||||
"""
|
"""
|
||||||
Base exception for file parsers
|
Base exception for file parsers
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
|
|||||||
@@ -16,22 +16,26 @@ from .permissions import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
link_document_content = Link(
|
link_document_content = Link(
|
||||||
args='resolved_object.id', icon_class=icon_document_content,
|
icon_class=icon_document_content,
|
||||||
|
kwargs={'document_id': 'resolved_object.id'},
|
||||||
permissions=(permission_content_view,), text=_('Content'),
|
permissions=(permission_content_view,), text=_('Content'),
|
||||||
view='document_parsing:document_content',
|
view='document_parsing:document_content',
|
||||||
)
|
)
|
||||||
link_document_page_content = Link(
|
link_document_page_content = Link(
|
||||||
args='resolved_object.id', icon_class=icon_document_content,
|
icon_class=icon_document_content,
|
||||||
|
kwargs={'document_page_id': 'resolved_object.id'},
|
||||||
permissions=(permission_content_view,), text=_('Content'),
|
permissions=(permission_content_view,), text=_('Content'),
|
||||||
view='document_parsing:document_page_content',
|
view='document_parsing:document_page_content',
|
||||||
)
|
)
|
||||||
link_document_parsing_errors_list = Link(
|
link_document_parsing_errors_list = Link(
|
||||||
args='resolved_object.id', icon_class=icon_document_parsing_errors_list,
|
icon_class=icon_document_parsing_errors_list,
|
||||||
|
kwargs={'document_id': 'resolved_object.id'},
|
||||||
permissions=(permission_content_view,), text=_('Parsing errors'),
|
permissions=(permission_content_view,), text=_('Parsing errors'),
|
||||||
view='document_parsing:document_parsing_error_list'
|
view='document_parsing:document_parsing_error_list'
|
||||||
)
|
)
|
||||||
link_document_content_download = Link(
|
link_document_content_download = Link(
|
||||||
args='resolved_object.id', icon_class=icon_document_content_download,
|
icon_class=icon_document_content_download,
|
||||||
|
kwargs={'document_id': 'resolved_object.id'},
|
||||||
permissions=(permission_content_view,), text=_('Download content'),
|
permissions=(permission_content_view,), text=_('Download content'),
|
||||||
view='document_parsing:document_content_download'
|
view='document_parsing:document_content_download'
|
||||||
)
|
)
|
||||||
@@ -40,13 +44,14 @@ link_document_multiple_submit = Link(
|
|||||||
view='document_parsing:document_multiple_submit'
|
view='document_parsing:document_multiple_submit'
|
||||||
)
|
)
|
||||||
link_document_submit = Link(
|
link_document_submit = Link(
|
||||||
args='resolved_object.id', icon_class=icon_document_submit,
|
icon_class=icon_document_submit,
|
||||||
|
kwargs={'document_id': 'resolved_object.id'},
|
||||||
permissions=(permission_parse_document,),
|
permissions=(permission_parse_document,),
|
||||||
text=_('Submit for parsing'), view='document_parsing:document_submit'
|
text=_('Submit for parsing'), view='document_parsing:document_submit'
|
||||||
)
|
)
|
||||||
link_document_type_parsing_settings = Link(
|
link_document_type_parsing_settings = Link(
|
||||||
args='resolved_object.id',
|
|
||||||
icon_class=icon_document_type_parsing_settings,
|
icon_class=icon_document_type_parsing_settings,
|
||||||
|
kwargs={'document_type_id': 'resolved_object.id'},
|
||||||
permissions=(permission_document_type_parsing_setup,),
|
permissions=(permission_document_type_parsing_setup,),
|
||||||
text=_('Setup parsing'),
|
text=_('Setup parsing'),
|
||||||
view='document_parsing:document_type_parsing_settings',
|
view='document_parsing:document_type_parsing_settings',
|
||||||
|
|||||||
@@ -7,12 +7,12 @@ from mayan.apps.permissions import PermissionNamespace
|
|||||||
namespace = PermissionNamespace(label=_('Document parsing'), name='document_parsing')
|
namespace = PermissionNamespace(label=_('Document parsing'), name='document_parsing')
|
||||||
|
|
||||||
permission_content_view = namespace.add_permission(
|
permission_content_view = namespace.add_permission(
|
||||||
name='content_view', label=_('View the content of a document')
|
label=_('View the content of a document'), name='content_view'
|
||||||
)
|
)
|
||||||
permission_document_type_parsing_setup = namespace.add_permission(
|
permission_document_type_parsing_setup = namespace.add_permission(
|
||||||
name='document_type_setup',
|
label=_('Change document type parsing settings'),
|
||||||
label=_('Change document type parsing settings')
|
name='document_type_setup'
|
||||||
)
|
)
|
||||||
permission_parse_document = namespace.add_permission(
|
permission_parse_document = namespace.add_permission(
|
||||||
name='parse_document', label=_('Parse the content of a document')
|
label=_('Parse the content of a document'), name='parse_document'
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,8 +4,9 @@ from django.utils.translation import ugettext_lazy as _
|
|||||||
|
|
||||||
from mayan.apps.task_manager.classes import CeleryQueue
|
from mayan.apps.task_manager.classes import CeleryQueue
|
||||||
|
|
||||||
queue_ocr = CeleryQueue(name='parsing', label=_('Parsing'))
|
queue_ocr = CeleryQueue(label=_('Parsing'), name='parsing')
|
||||||
|
|
||||||
queue_ocr.add_task_type(
|
queue_ocr.add_task_type(
|
||||||
name='mayan.apps.document_parsing.tasks.task_parse_document_version',
|
label=_('Document version parsing'),
|
||||||
label=_('Document version parsing')
|
name='mayan.apps.document_parsing.tasks.task_parse_document_version'
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from mayan.apps.smart_settings import Namespace
|
|||||||
|
|
||||||
from .literals import DEFAULT_PDFTOTEXT_PATH
|
from .literals import DEFAULT_PDFTOTEXT_PATH
|
||||||
|
|
||||||
namespace = Namespace(name='document_parsing', label=_('Document parsing'))
|
namespace = Namespace(label=_('Document parsing'), name='document_parsing')
|
||||||
|
|
||||||
setting_auto_parsing = namespace.add_setting(
|
setting_auto_parsing = namespace.add_setting(
|
||||||
global_name='DOCUMENT_PARSING_AUTO_PARSING', default=True,
|
global_name='DOCUMENT_PARSING_AUTO_PARSING', default=True,
|
||||||
@@ -21,5 +21,4 @@ setting_pdftotext_path = namespace.add_setting(
|
|||||||
'File path to poppler\'s pdftotext program used to extract text '
|
'File path to poppler\'s pdftotext program used to extract text '
|
||||||
'from PDF files.'
|
'from PDF files.'
|
||||||
),
|
),
|
||||||
is_path=True
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -16,17 +16,14 @@ TEST_DOCUMENT_CONTENT = 'Sample text'
|
|||||||
class DocumentParsingAPITestCase(DocumentTestMixin, BaseAPITestCase):
|
class DocumentParsingAPITestCase(DocumentTestMixin, BaseAPITestCase):
|
||||||
test_document_filename = TEST_HYBRID_DOCUMENT
|
test_document_filename = TEST_HYBRID_DOCUMENT
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
super(DocumentParsingAPITestCase, self).setUp()
|
|
||||||
self.login_user()
|
|
||||||
|
|
||||||
def _request_document_page_content_view(self):
|
def _request_document_page_content_view(self):
|
||||||
return self.get(
|
return self.get(
|
||||||
viewname='rest_api:document-page-content-view',
|
viewname='rest_api:document-page-content-view',
|
||||||
args=(
|
kargs={
|
||||||
self.document.pk, self.document.latest_version.pk,
|
'document_id': self.document.pk,
|
||||||
self.document.latest_version.pages.first().pk,
|
'version_id': self.document.latest_version.pk,
|
||||||
)
|
'page_id': self.document.latest_version.pages.first().pk
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_get_document_version_page_content_no_access(self):
|
def test_get_document_version_page_content_no_access(self):
|
||||||
|
|||||||
@@ -22,19 +22,16 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
|
|||||||
# Ensure we use a PDF file
|
# Ensure we use a PDF file
|
||||||
test_document_filename = TEST_HYBRID_DOCUMENT
|
test_document_filename = TEST_HYBRID_DOCUMENT
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
super(DocumentContentViewsTestCase, self).setUp()
|
|
||||||
self.login_user()
|
|
||||||
|
|
||||||
def _request_document_content_view(self):
|
def _request_document_content_view(self):
|
||||||
return self.get(
|
return self.get(
|
||||||
'document_parsing:document_content', args=(self.document.pk,)
|
viewname='document_parsing:document_content',
|
||||||
|
kwargs={'document_id': self.document.pk}
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_document_content_view_no_permissions(self):
|
def test_document_content_view_no_permissions(self):
|
||||||
response = self._request_document_content_view()
|
response = self._request_document_content_view()
|
||||||
|
|
||||||
self.assertEqual(response.status_code, 403)
|
self.assertEqual(response.status_code, 404)
|
||||||
|
|
||||||
def test_document_content_view_with_access(self):
|
def test_document_content_view_with_access(self):
|
||||||
self.grant_access(
|
self.grant_access(
|
||||||
@@ -48,15 +45,15 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
|
|||||||
|
|
||||||
def _request_document_page_content_view(self):
|
def _request_document_page_content_view(self):
|
||||||
return self.get(
|
return self.get(
|
||||||
viewname='document_parsing:document_page_content', args=(
|
viewname='document_parsing:document_page_content', kwargs={
|
||||||
self.document.pages.first().pk,
|
'document_page_id': self.document.pages.first().pk
|
||||||
)
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_document_page_content_view_no_permissions(self):
|
def test_document_page_content_view_no_permissions(self):
|
||||||
response = self._request_document_page_content_view()
|
response = self._request_document_page_content_view()
|
||||||
|
|
||||||
self.assertEqual(response.status_code, 403)
|
self.assertEqual(response.status_code, 404)
|
||||||
|
|
||||||
def test_document_page_content_view_with_access(self):
|
def test_document_page_content_view_with_access(self):
|
||||||
self.grant_access(
|
self.grant_access(
|
||||||
@@ -71,12 +68,12 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
|
|||||||
def _request_document_content_download_view(self):
|
def _request_document_content_download_view(self):
|
||||||
return self.get(
|
return self.get(
|
||||||
viewname='document_parsing:document_content_download',
|
viewname='document_parsing:document_content_download',
|
||||||
args=(self.document.pk,)
|
kwargs={'document_id': self.document.pk}
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_document_parsing_download_view_no_permission(self):
|
def test_document_parsing_download_view_no_permission(self):
|
||||||
response = self._request_document_content_download_view()
|
response = self._request_document_content_download_view()
|
||||||
self.assertEqual(response.status_code, 403)
|
self.assertEqual(response.status_code, 404)
|
||||||
|
|
||||||
def test_download_view_with_access(self):
|
def test_download_view_with_access(self):
|
||||||
self.expected_content_type = 'application/octet-stream; charset=utf-8'
|
self.expected_content_type = 'application/octet-stream; charset=utf-8'
|
||||||
@@ -98,14 +95,10 @@ class DocumentTypeViewsTestCase(GenericDocumentViewTestCase):
|
|||||||
# Ensure we use a PDF file
|
# Ensure we use a PDF file
|
||||||
test_document_filename = TEST_HYBRID_DOCUMENT
|
test_document_filename = TEST_HYBRID_DOCUMENT
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
super(DocumentTypeViewsTestCase, self).setUp()
|
|
||||||
self.login_user()
|
|
||||||
|
|
||||||
def _request_document_type_parsing_settings_view(self):
|
def _request_document_type_parsing_settings_view(self):
|
||||||
return self.get(
|
return self.get(
|
||||||
viewname='document_parsing:document_type_parsing_settings',
|
viewname='document_parsing:document_type_parsing_settings',
|
||||||
args=(self.document.document_type.pk,)
|
kwargs={'document_type_id': self.document.document_type.pk}
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_document_type_parsing_settings_view_no_permission(self):
|
def test_document_type_parsing_settings_view_no_permission(self):
|
||||||
|
|||||||
@@ -11,46 +11,50 @@ from .views import (
|
|||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
url(
|
url(
|
||||||
r'^documents/(?P<pk>\d+)/content/$', DocumentContentView.as_view(),
|
regex=r'^documents/(?P<document_id>\d+)/content/$',
|
||||||
name='document_content'
|
name='document_content', view=DocumentContentView.as_view()
|
||||||
),
|
),
|
||||||
url(
|
url(
|
||||||
r'^documents/pages/(?P<pk>\d+)/content/$',
|
regex=r'^documents/pages/(?P<document_page_id>\d+)/content/$',
|
||||||
DocumentPageContentView.as_view(), name='document_page_content'
|
name='document_page_content', view=DocumentPageContentView.as_view()
|
||||||
),
|
),
|
||||||
url(
|
url(
|
||||||
r'^documents/(?P<pk>\d+)/content/download/$',
|
regex=r'^documents/(?P<document_id>\d+)/content/download/$',
|
||||||
DocumentContentDownloadView.as_view(), name='document_content_download'
|
name='document_content_download',
|
||||||
|
view=DocumentContentDownloadView.as_view()
|
||||||
),
|
),
|
||||||
url(
|
url(
|
||||||
r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(),
|
regex=r'^documents/(?P<document_id>\d+)/submit/$',
|
||||||
name='document_submit'
|
name='document_submit', view=DocumentSubmitView.as_view()
|
||||||
),
|
),
|
||||||
url(
|
url(
|
||||||
r'^documents/multiple/submit/$', DocumentSubmitView.as_view(),
|
regex=r'^documents/multiple/submit/$', name='document_multiple_submit',
|
||||||
name='document_multiple_submit'
|
view=DocumentSubmitView.as_view()
|
||||||
),
|
),
|
||||||
url(
|
url(
|
||||||
r'^documents/(?P<pk>\d+)/errors/$',
|
regex=r'^documents/(?P<document_id>\d+)/errors/$',
|
||||||
DocumentParsingErrorsListView.as_view(),
|
name='document_parsing_error_list',
|
||||||
name='document_parsing_error_list'
|
view=DocumentParsingErrorsListView.as_view()
|
||||||
),
|
),
|
||||||
url(
|
url(
|
||||||
r'^document_types/submit/$', DocumentTypeSubmitView.as_view(),
|
regex=r'^document_types/submit/$', name='document_type_submit',
|
||||||
name='document_type_submit'
|
view=DocumentTypeSubmitView.as_view()
|
||||||
),
|
),
|
||||||
url(
|
url(
|
||||||
r'^document_types/(?P<pk>\d+)/parsing/settings/$',
|
regex=r'^document_types/(?P<document_type_id>\d+)/parsing/settings/$',
|
||||||
DocumentTypeSettingsEditView.as_view(),
|
name='document_type_parsing_settings',
|
||||||
name='document_type_parsing_settings'
|
view=DocumentTypeSettingsEditView.as_view()
|
||||||
),
|
),
|
||||||
url(r'^errors/all/$', ParseErrorListView.as_view(), name='error_list'),
|
url(
|
||||||
|
regex=r'^errors/all/$', name='error_list',
|
||||||
|
view=ParseErrorListView.as_view()
|
||||||
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
api_urls = [
|
api_urls = [
|
||||||
url(
|
url(
|
||||||
r'^documents/(?P<document_pk>\d+)/versions/(?P<version_pk>\d+)/pages/(?P<page_pk>\d+)/content/$',
|
regex=r'^documents/(?P<document_id>\d+)/versions/(?P<document_version_id>\d+)/pages/(?P<document_page_id>\d+)/content/$',
|
||||||
APIDocumentPageContentView.as_view(),
|
view=APIDocumentPageContentView.as_view(),
|
||||||
name='document-page-content-view'
|
name='document-page-content-view'
|
||||||
),
|
)
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ class DocumentContentView(SingleObjectDetailView):
|
|||||||
form_class = DocumentContentForm
|
form_class = DocumentContentForm
|
||||||
model = Document
|
model = Document
|
||||||
object_permission = permission_content_view
|
object_permission = permission_content_view
|
||||||
|
pk_url_kwarg = 'document_id'
|
||||||
|
|
||||||
def dispatch(self, request, *args, **kwargs):
|
def dispatch(self, request, *args, **kwargs):
|
||||||
result = super(DocumentContentView, self).dispatch(
|
result = super(DocumentContentView, self).dispatch(
|
||||||
@@ -48,6 +49,7 @@ class DocumentContentView(SingleObjectDetailView):
|
|||||||
class DocumentContentDownloadView(SingleObjectDownloadView):
|
class DocumentContentDownloadView(SingleObjectDownloadView):
|
||||||
model = Document
|
model = Document
|
||||||
object_permission = permission_content_view
|
object_permission = permission_content_view
|
||||||
|
pk_url_kwarg = 'document_id'
|
||||||
|
|
||||||
def get_file(self):
|
def get_file(self):
|
||||||
file_object = DocumentContentDownloadView.TextIteratorIO(
|
file_object = DocumentContentDownloadView.TextIteratorIO(
|
||||||
@@ -62,6 +64,7 @@ class DocumentPageContentView(SingleObjectDetailView):
|
|||||||
form_class = DocumentPageContentForm
|
form_class = DocumentPageContentForm
|
||||||
model = DocumentPage
|
model = DocumentPage
|
||||||
object_permission = permission_content_view
|
object_permission = permission_content_view
|
||||||
|
pk_url_kwarg = 'document_page_id'
|
||||||
|
|
||||||
def dispatch(self, request, *args, **kwargs):
|
def dispatch(self, request, *args, **kwargs):
|
||||||
result = super(DocumentPageContentView, self).dispatch(
|
result = super(DocumentPageContentView, self).dispatch(
|
||||||
@@ -84,7 +87,7 @@ class DocumentParsingErrorsListView(SingleObjectListView):
|
|||||||
view_permission = permission_content_view
|
view_permission = permission_content_view
|
||||||
|
|
||||||
def get_document(self):
|
def get_document(self):
|
||||||
return get_object_or_404(klass=Document, pk=self.kwargs['pk'])
|
return get_object_or_404(klass=Document, pk=self.kwargs['document_id'])
|
||||||
|
|
||||||
def get_extra_context(self):
|
def get_extra_context(self):
|
||||||
return {
|
return {
|
||||||
@@ -141,10 +144,12 @@ class DocumentSubmitView(MultipleObjectConfirmActionView):
|
|||||||
class DocumentTypeSettingsEditView(SingleObjectEditView):
|
class DocumentTypeSettingsEditView(SingleObjectEditView):
|
||||||
fields = ('auto_parsing',)
|
fields = ('auto_parsing',)
|
||||||
object_permission = permission_document_type_parsing_setup
|
object_permission = permission_document_type_parsing_setup
|
||||||
post_action_redirect = reverse_lazy('documents:document_type_list')
|
post_action_redirect = reverse_lazy(viewname='documents:document_type_list')
|
||||||
|
|
||||||
def get_document_type(self):
|
def get_document_type(self):
|
||||||
return get_object_or_404(klass=DocumentType, pk=self.kwargs['pk'])
|
return get_object_or_404(
|
||||||
|
klass=DocumentType, pk=self.kwargs['document_type_id']
|
||||||
|
)
|
||||||
|
|
||||||
def get_extra_context(self):
|
def get_extra_context(self):
|
||||||
return {
|
return {
|
||||||
@@ -163,7 +168,7 @@ class DocumentTypeSubmitView(FormView):
|
|||||||
'title': _('Submit all documents of a type for parsing')
|
'title': _('Submit all documents of a type for parsing')
|
||||||
}
|
}
|
||||||
form_class = DocumentTypeFilteredSelectForm
|
form_class = DocumentTypeFilteredSelectForm
|
||||||
post_action_redirect = reverse_lazy('common:tools_list')
|
post_action_redirect = reverse_lazy(viewname='common:tools_list')
|
||||||
|
|
||||||
def get_form_extra_kwargs(self):
|
def get_form_extra_kwargs(self):
|
||||||
return {
|
return {
|
||||||
|
|||||||
Reference in New Issue
Block a user