Support deleting the parsed content of a document

Signed-off-by: Roberto Rosario <roberto.rosario@mayan-edms.com>
This commit is contained in:
Roberto Rosario
2019-09-24 15:36:32 -04:00
parent fb53726888
commit d7818b64b0
11 changed files with 204 additions and 44 deletions

View File

@@ -11,6 +11,9 @@
* Add missing recursive option to Docker entrypoint * Add missing recursive option to Docker entrypoint
chown. GitLab issue #668. Thanks to John Wice (@brilthor) chown. GitLab issue #668. Thanks to John Wice (@brilthor)
for the report. for the report.
* Add support for deleting the parsed content of a document
of selection of documents.
* Add parsed content deleted event.
3.2.7 (2019-08-28) 3.2.7 (2019-08-28)
================== ==================

View File

@@ -27,6 +27,10 @@ Other changes
- Add missing recursive option to Docker entrypoint - Add missing recursive option to Docker entrypoint
chown. GitLab issue #668. Thanks to John Wice (@brilthor) chown. GitLab issue #668. Thanks to John Wice (@brilthor)
for the report. for the report.
- Add support for deleting the parsed content of a document
of selection of documents.
- Add parsed content deleted event.
Removals Removals
-------- --------

View File

@@ -14,15 +14,22 @@ from mayan.apps.common.menus import (
) )
from mayan.apps.documents.search import document_search, document_page_search from mayan.apps.documents.search import document_search, document_page_search
from mayan.apps.documents.signals import post_version_upload from mayan.apps.documents.signals import post_version_upload
from mayan.apps.events.classes import ModelEventType
from mayan.apps.navigation.classes import SourceColumn from mayan.apps.navigation.classes import SourceColumn
from .dependencies import * # NOQA from .dependencies import * # NOQA
from .events import (
event_parsing_document_content_deleted,
event_parsing_document_version_submit,
event_parsing_document_version_finish
)
from .handlers import ( from .handlers import (
handler_index_document, handler_initialize_new_parsing_settings, handler_index_document, handler_initialize_new_parsing_settings,
handler_parse_document_version handler_parse_document_version
) )
from .links import ( from .links import (
link_document_content, link_document_page_content, link_document_content, link_document_content_delete,
link_document_content_delete_multiple, link_document_page_content,
link_document_content_download, link_document_parsing_errors_list, link_document_content_download, link_document_parsing_errors_list,
link_document_submit_multiple, link_document_submit, link_document_submit_multiple, link_document_submit,
link_document_type_parsing_settings, link_document_type_submit, link_document_type_parsing_settings, link_document_type_submit,
@@ -85,6 +92,14 @@ class DocumentParsingApp(MayanAppConfig):
value=method_document_version_parsing_submit value=method_document_version_parsing_submit
) )
ModelEventType.register(
model=Document, event_types=(
event_parsing_document_content_deleted,
event_parsing_document_version_submit,
event_parsing_document_version_finish
)
)
ModelField( ModelField(
model=Document, name='versions__pages__content__content' model=Document, name='versions__pages__content__content'
) )
@@ -136,16 +151,21 @@ class DocumentParsingApp(MayanAppConfig):
sources=(DocumentType,) sources=(DocumentType,)
) )
menu_multi_item.bind_links( menu_multi_item.bind_links(
links=(link_document_submit_multiple,), sources=(Document,) links=(
link_document_content_delete_multiple,
link_document_submit_multiple,
), sources=(Document,)
) )
menu_secondary.bind_links( menu_secondary.bind_links(
links=( links=(
link_document_content_delete,
link_document_content_download, link_document_content_download,
link_document_parsing_errors_list, link_document_parsing_errors_list,
link_document_submit link_document_submit
), ),
sources=( sources=(
'document_parsing:document_content', 'document_parsing:document_content',
'document_parsing:document_content_delete',
'document_parsing:document_content_download', 'document_parsing:document_content_download',
'document_parsing:document_parsing_error_list', 'document_parsing:document_parsing_error_list',
'document_parsing:document_submit', 'document_parsing:document_submit',

View File

@@ -8,6 +8,10 @@ namespace = EventTypeNamespace(
label=_('Document parsing'), name='document_parsing' label=_('Document parsing'), name='document_parsing'
) )
event_parsing_document_content_deleted = namespace.add_event_type(
label=_('Document parsed content deleted'),
name='document_content_deleted'
)
event_parsing_document_version_submit = namespace.add_event_type( event_parsing_document_version_submit = namespace.add_event_type(
label=_('Document version submitted for parsing'), name='version_submit' label=_('Document version submitted for parsing'), name='version_submit'
) )

View File

@@ -3,6 +3,11 @@ from __future__ import absolute_import, unicode_literals
from mayan.apps.appearance.classes import Icon from mayan.apps.appearance.classes import Icon
icon_document_content = Icon(driver_name='fontawesome', symbol='font') icon_document_content = Icon(driver_name='fontawesome', symbol='font')
icon_document_content_delete = Icon(
driver_name='fontawesome-dual',
primary_symbol='font',
secondary_symbol='minus'
)
icon_document_parsing_errors_list = Icon( icon_document_parsing_errors_list = Icon(
driver_name='fontawesome', symbol='font' driver_name='fontawesome', symbol='font'
) )

View File

@@ -16,6 +16,17 @@ link_document_content = Link(
permissions=(permission_content_view,), text=_('Content'), permissions=(permission_content_view,), text=_('Content'),
view='document_parsing:document_content' view='document_parsing:document_content'
) )
link_document_content_delete = Link(
args='resolved_object.id',
icon_class_path='mayan.apps.document_parsing.icons.icon_document_content_delete',
permissions=(permission_parse_document,), text=_('Delete parsed content'),
view='document_parsing:document_content_delete',
)
link_document_content_delete_multiple = Link(
icon_class_path='mayan.apps.document_parsing.icons.icon_document_content_delete',
text=_('Delete parsed content'),
view='document_parsing:document_content_delete_multiple',
)
link_document_page_content = Link( link_document_page_content = Link(
args='resolved_object.id', args='resolved_object.id',
icon_class_path='mayan.apps.document_parsing.icons.icon_document_content', icon_class_path='mayan.apps.document_parsing.icons.icon_document_content',

View File

@@ -6,9 +6,12 @@ import traceback
from django.apps import apps from django.apps import apps
from django.conf import settings from django.conf import settings
from django.db import models from django.db import models, transaction
from .events import event_parsing_document_version_finish from .events import (
event_parsing_document_content_deleted,
event_parsing_document_version_finish
)
from .parsers import Parser from .parsers import Parser
from .signals import post_document_version_parsing from .signals import post_document_version_parsing
@@ -16,6 +19,15 @@ logger = logging.getLogger(__name__)
class DocumentPageContentManager(models.Manager): class DocumentPageContentManager(models.Manager):
def delete_content_for(self, document, user=None):
with transaction.atomic():
for document_page in document.pages.all():
self.filter(document_page=document_page).delete()
event_parsing_document_content_deleted.commit(
actor=user, target=document
)
def process_document_version(self, document_version): def process_document_version(self, document_version):
logger.info( logger.info(
'Starting parsing for document version: %s', document_version 'Starting parsing for document version: %s', document_version

View File

@@ -6,15 +6,33 @@ from mayan.apps.documents.tests.literals import TEST_PDF_DOCUMENT_FILENAME
from mayan.apps.documents.tests.test_models import GenericDocumentTestCase from mayan.apps.documents.tests.test_models import GenericDocumentTestCase
from ..events import ( from ..events import (
event_parsing_document_content_deleted,
event_parsing_document_version_submit, event_parsing_document_version_submit,
event_parsing_document_version_finish event_parsing_document_version_finish
) )
from ..models import DocumentPageContent
class DocumentParsingEventsTestCase(GenericDocumentTestCase): class DocumentParsingEventsTestCase(GenericDocumentTestCase):
# Ensure we use a PDF file # Ensure we use a PDF file
test_document_filename = TEST_PDF_DOCUMENT_FILENAME test_document_filename = TEST_PDF_DOCUMENT_FILENAME
def test_document_content_deleted_event(self):
Action.objects.all().delete()
DocumentPageContent.objects.delete_content_for(
document=self.test_document
)
# Get the oldest action
action = Action.objects.order_by('-timestamp').last()
self.assertEqual(
action.target, self.test_document
)
self.assertEqual(
action.verb, event_parsing_document_content_deleted.id
)
def test_document_version_submit_event(self): def test_document_version_submit_event(self):
Action.objects.all().delete() Action.objects.all().delete()
self.test_document.submit_for_parsing() self.test_document.submit_for_parsing()

View File

@@ -6,6 +6,7 @@ from mayan.apps.documents.tests import (
GenericDocumentViewTestCase, TEST_HYBRID_DOCUMENT GenericDocumentViewTestCase, TEST_HYBRID_DOCUMENT
) )
from ..models import DocumentPageContent
from ..permissions import ( from ..permissions import (
permission_content_view, permission_document_type_parsing_setup, permission_content_view, permission_document_type_parsing_setup,
permission_parse_document permission_parse_document
@@ -15,22 +16,46 @@ from ..utils import get_document_content
from .literals import TEST_DOCUMENT_CONTENT from .literals import TEST_DOCUMENT_CONTENT
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True) class DocumentContentViewTestMixin(object):
class DocumentContentViewsTestCase(GenericDocumentViewTestCase): def _request_test_document_content_delete_view(self):
_skip_file_descriptor_test = True return self.post(
viewname='document_parsing:document_content_delete', kwargs={
'pk': self.test_document.pk
}
)
# Ensure we use a PDF file def _request_test_document_content_download_view(self):
test_document_filename = TEST_HYBRID_DOCUMENT return self.get(
viewname='document_parsing:document_content_download',
kwargs={'pk': self.test_document.pk}
)
def _request_document_content_view(self): def _request_test_document_content_view(self):
return self.get( return self.get(
'document_parsing:document_content', kwargs={ 'document_parsing:document_content', kwargs={
'pk': self.test_document.pk 'pk': self.test_document.pk
} }
) )
def _request_test_document_page_content_view(self):
return self.get(
viewname='document_parsing:document_page_content', kwargs={
'pk': self.test_document.pages.first().pk,
}
)
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True)
class DocumentContentViewsTestCase(
DocumentContentViewTestMixin, GenericDocumentViewTestCase
):
_skip_file_descriptor_test = True
# Ensure we use a PDF file
test_document_filename = TEST_HYBRID_DOCUMENT
def test_document_content_view_no_permissions(self): def test_document_content_view_no_permissions(self):
response = self._request_document_content_view() response = self._request_test_document_content_view()
self.assertEqual(response.status_code, 404) self.assertEqual(response.status_code, 404)
def test_document_content_view_with_access(self): def test_document_content_view_with_access(self):
@@ -38,20 +63,37 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
obj=self.test_document, permission=permission_content_view obj=self.test_document, permission=permission_content_view
) )
response = self._request_document_content_view() response = self._request_test_document_content_view()
self.assertContains( self.assertContains(
response=response, text=TEST_DOCUMENT_CONTENT, status_code=200 response=response, text=TEST_DOCUMENT_CONTENT, status_code=200
) )
def _request_document_page_content_view(self): def test_document_content_delete_view_no_permissions(self):
return self.get( response = self._request_test_document_content_delete_view()
viewname='document_parsing:document_page_content', kwargs={ self.assertEqual(response.status_code, 404)
'pk': self.test_document.pages.first().pk,
} self.assertTrue(
DocumentPageContent.objects.filter(
document_page=self.test_document.pages.first()
).exists()
)
def test_document_content_delete_view_with_access(self):
self.grant_access(
obj=self.test_document, permission=permission_parse_document
)
response = self._request_test_document_content_delete_view()
self.assertEqual(response.status_code, 302)
self.assertFalse(
DocumentPageContent.objects.filter(
document_page=self.test_document.pages.first()
).exists()
) )
def test_document_page_content_view_no_permissions(self): def test_document_page_content_view_no_permissions(self):
response = self._request_document_page_content_view() response = self._request_test_document_page_content_view()
self.assertEqual(response.status_code, 404) self.assertEqual(response.status_code, 404)
def test_document_page_content_view_with_access(self): def test_document_page_content_view_with_access(self):
@@ -59,19 +101,13 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
permission=permission_content_view, obj=self.test_document permission=permission_content_view, obj=self.test_document
) )
response = self._request_document_page_content_view() response = self._request_test_document_page_content_view()
self.assertContains( self.assertContains(
response=response, text=TEST_DOCUMENT_CONTENT, status_code=200 response=response, text=TEST_DOCUMENT_CONTENT, status_code=200
) )
def _request_document_content_download_view(self):
return self.get(
viewname='document_parsing:document_content_download',
kwargs={'pk': self.test_document.pk}
)
def test_document_parsing_download_view_no_permission(self): def test_document_parsing_download_view_no_permission(self):
response = self._request_document_content_download_view() response = self._request_test_document_content_download_view()
self.assertEqual(response.status_code, 403) self.assertEqual(response.status_code, 403)
def test_download_view_with_access(self): def test_download_view_with_access(self):
@@ -80,7 +116,7 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
permission=permission_content_view, obj=self.test_document permission=permission_content_view, obj=self.test_document
) )
response = self._request_document_content_download_view() response = self._request_test_document_content_download_view()
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assert_download_response( self.assert_download_response(
@@ -89,12 +125,20 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
), ),
) )
class DocumentTypeContentViewsTestMixin(object):
def _request_test_document_type_parsing_settings(self): def _request_test_document_type_parsing_settings(self):
return self.get( return self.get(
viewname='document_parsing:document_type_parsing_settings', viewname='document_parsing:document_type_parsing_settings',
kwargs={'pk': self.test_document.document_type.pk} kwargs={'pk': self.test_document_type.pk}
) )
class DocumentTypeContentViewsTestCase(
DocumentTypeContentViewsTestMixin, GenericDocumentViewTestCase
):
auto_upload_document = False
def test_document_type_parsing_settings_view_no_permission(self): def test_document_type_parsing_settings_view_no_permission(self):
response = self._request_test_document_type_parsing_settings() response = self._request_test_document_type_parsing_settings()
self.assertEqual(response.status_code, 404) self.assertEqual(response.status_code, 404)
@@ -102,7 +146,7 @@ class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
def test_document_type_parsing_settings_view_with_access(self): def test_document_type_parsing_settings_view_with_access(self):
self.grant_access( self.grant_access(
permission=permission_document_type_parsing_setup, permission=permission_document_type_parsing_setup,
obj=self.test_document.document_type obj=self.test_document_type
) )
response = self._request_test_document_type_parsing_settings() response = self._request_test_document_type_parsing_settings()

View File

@@ -4,10 +4,10 @@ from django.conf.urls import url
from .api_views import APIDocumentPageContentView from .api_views import APIDocumentPageContentView
from .views import ( from .views import (
DocumentContentView, DocumentContentDownloadView, DocumentContentView, DocumentContentDeleteView,
DocumentPageContentView, DocumentParsingErrorsListView, DocumentContentDownloadView, DocumentPageContentView,
DocumentSubmitView, DocumentTypeSettingsEditView, DocumentTypeSubmitView, DocumentParsingErrorsListView, DocumentSubmitView,
ParseErrorListView DocumentTypeSettingsEditView, DocumentTypeSubmitView, ParseErrorListView
) )
urlpatterns = [ urlpatterns = [
@@ -16,21 +16,23 @@ urlpatterns = [
view=DocumentContentView.as_view(), name='document_content' view=DocumentContentView.as_view(), name='document_content'
), ),
url( url(
regex=r'^documents/pages/(?P<pk>\d+)/content/$', regex=r'^documents/(?P<pk>\d+)/content/delete/$',
view=DocumentPageContentView.as_view(), name='document_page_content' view=DocumentContentDeleteView.as_view(),
name='document_content_delete'
),
url(
regex=r'^documents/multiple/content/delete/$',
view=DocumentContentDeleteView.as_view(),
name='document_content_delete_multiple'
), ),
url( url(
regex=r'^documents/(?P<pk>\d+)/content/download/$', regex=r'^documents/(?P<pk>\d+)/content/download/$',
view=DocumentContentDownloadView.as_view(), name='document_content_download' view=DocumentContentDownloadView.as_view(),
name='document_content_download'
), ),
url( url(
regex=r'^document_types/submit/$', regex=r'^documents/pages/(?P<pk>\d+)/content/$',
view=DocumentTypeSubmitView.as_view(), name='document_type_submit' view=DocumentPageContentView.as_view(), name='document_page_content'
),
url(
regex=r'^document_types/(?P<pk>\d+)/parsing/settings/$',
view=DocumentTypeSettingsEditView.as_view(),
name='document_type_parsing_settings'
), ),
url( url(
regex=r'^documents/(?P<pk>\d+)/submit/$', regex=r'^documents/(?P<pk>\d+)/submit/$',
@@ -45,6 +47,15 @@ urlpatterns = [
view=DocumentParsingErrorsListView.as_view(), view=DocumentParsingErrorsListView.as_view(),
name='document_parsing_error_list' name='document_parsing_error_list'
), ),
url(
regex=r'^document_types/submit/$',
view=DocumentTypeSubmitView.as_view(), name='document_type_submit'
),
url(
regex=r'^document_types/(?P<pk>\d+)/parsing/settings/$',
view=DocumentTypeSettingsEditView.as_view(),
name='document_type_parsing_settings'
),
url( url(
regex=r'^errors/all/$', view=ParseErrorListView.as_view(), regex=r'^errors/all/$', view=ParseErrorListView.as_view(),
name='error_list' name='error_list'

View File

@@ -15,7 +15,7 @@ from mayan.apps.documents.forms import DocumentTypeFilteredSelectForm
from mayan.apps.documents.models import Document, DocumentPage, DocumentType from mayan.apps.documents.models import Document, DocumentPage, DocumentType
from .forms import DocumentContentForm, DocumentPageContentForm from .forms import DocumentContentForm, DocumentPageContentForm
from .models import DocumentVersionParseError from .models import DocumentPageContent, DocumentVersionParseError
from .permissions import ( from .permissions import (
permission_content_view, permission_document_type_parsing_setup, permission_content_view, permission_document_type_parsing_setup,
permission_parse_document permission_parse_document
@@ -23,6 +23,34 @@ from .permissions import (
from .utils import get_document_content from .utils import get_document_content
class DocumentContentDeleteView(MultipleObjectConfirmActionView):
model = Document
object_permission = permission_parse_document
success_message = 'Deleted parsed content of %(count)d document.'
success_message_plural = 'Deleted parsed content of %(count)d documents.'
def get_extra_context(self):
queryset = self.object_list
result = {
'title': ungettext(
singular='Delete the parsed content of the selected document?',
plural='Delete the parsed content of the selected documents?',
number=queryset.count()
)
}
if queryset.count() == 1:
result['object'] = queryset.first()
return result
def object_action(self, form, instance):
DocumentPageContent.objects.delete_content_for(
document=instance, user=self.request.user
)
class DocumentContentView(SingleObjectDetailView): class DocumentContentView(SingleObjectDetailView):
form_class = DocumentContentForm form_class = DocumentContentForm
model = Document model = Document