diff --git a/HISTORY.rst b/HISTORY.rst index ace468110c..eaa3e1838f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -148,6 +148,8 @@ - Add warning when using SQLite as the database backend. - Use Mailgun's flanker library to process the email sources. - Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads. +- Add the option to enable or disable parsing when uploading a document for each document type. +- Add a new setting option to enable automatic parsing for each new document type created. 2.7.3 (2017-09-11) ================== diff --git a/docs/releases/3.0.rst b/docs/releases/3.0.rst index 15b6dd1bb2..e34934fbae 100644 --- a/docs/releases/3.0.rst +++ b/docs/releases/3.0.rst @@ -467,6 +467,8 @@ Other changes worth mentioning - Improve rendering of the details form. - Update rendering of the readonly multiselect widget to conform to Django's updated field class interface. - Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads. +- Add the option to enable or disable parsing when uploading a document for each document type. +- Add a new setting option to enable automatic parsing for each new document type created. Removals diff --git a/mayan/apps/document_parsing/apps.py b/mayan/apps/document_parsing/apps.py index 53db283436..0c1f151a98 100644 --- a/mayan/apps/document_parsing/apps.py +++ b/mayan/apps/document_parsing/apps.py @@ -6,6 +6,7 @@ import logging from kombu import Exchange, Queue from django.apps import apps +from django.db.models.signals import post_save from django.utils.timezone import now from django.utils.translation import ugettext_lazy as _ @@ -22,13 +23,19 @@ from mayan.celery import app from navigation import SourceColumn from .events import event_parsing_document_version_submit -from .handlers import handler_parse_document_version +from .handlers import ( + handler_initialize_new_parsing_settings, handler_parse_document_version +) from .links import ( link_document_content, link_document_content_download, link_document_parsing_errors_list, link_document_submit_multiple, - link_document_submit, link_document_type_submit, link_error_list + link_document_submit, link_document_type_parsing_settings, + link_document_type_submit, link_error_list +) +from .permissions import ( + permission_content_view, permission_document_type_parsing_setup, + permission_parse_document ) -from .permissions import permission_content_view from .utils import get_document_content logger = logging.getLogger(__name__) @@ -66,7 +73,9 @@ class DocumentParsingApp(MayanAppConfig): Document = apps.get_model( app_label='documents', model_name='Document' ) - + DocumentType = apps.get_model( + app_label='documents', model_name='DocumentType' + ) DocumentVersion = apps.get_model( app_label='documents', model_name='DocumentVersion' ) @@ -74,6 +83,9 @@ class DocumentParsingApp(MayanAppConfig): DocumentVersionParseError = self.get_model('DocumentVersionParseError') Document.add_to_class('submit_for_parsing', document_parsing_submit) + Document.add_to_class( + 'content', get_document_content + ) DocumentVersion.add_to_class( 'content', get_document_content ) @@ -82,7 +94,14 @@ class DocumentParsingApp(MayanAppConfig): ) ModelPermission.register( - model=Document, permissions=(permission_content_view,) + model=Document, permissions=( + permission_content_view, permission_parse_document + ) + ) + ModelPermission.register( + model=DocumentType, permissions=( + permission_document_type_parsing_setup, + ) ) SourceColumn( @@ -127,6 +146,10 @@ class DocumentParsingApp(MayanAppConfig): menu_object.bind_links( links=(link_document_submit,), sources=(Document,) ) + menu_object.bind_links( + links=(link_document_type_parsing_settings,), sources=(DocumentType,), + position=99 + ) menu_secondary.bind_links( links=( link_document_content, link_document_parsing_errors_list, @@ -143,7 +166,11 @@ class DocumentParsingApp(MayanAppConfig): link_document_type_submit, link_error_list, ) ) - + post_save.connect( + dispatch_uid='handler_initialize_new_parsing_settings', + receiver=handler_initialize_new_parsing_settings, + sender=DocumentType + ) post_version_upload.connect( dispatch_uid='document_parsing_handler_parse_document_version', receiver=handler_parse_document_version, diff --git a/mayan/apps/document_parsing/handlers.py b/mayan/apps/document_parsing/handlers.py index 6e3338f484..f4e506afac 100644 --- a/mayan/apps/document_parsing/handlers.py +++ b/mayan/apps/document_parsing/handlers.py @@ -2,8 +2,24 @@ from __future__ import unicode_literals import logging +from django.apps import apps + +from .settings import setting_auto_parsing + logger = logging.getLogger(__name__) +def handler_initialize_new_parsing_settings(sender, instance, **kwargs): + DocumentTypeSettings = apps.get_model( + app_label='document_parsing', model_name='DocumentTypeSettings' + ) + + if kwargs['created']: + DocumentTypeSettings.objects.create( + document_type=instance, auto_parsing=setting_auto_parsing.value + ) + + def handler_parse_document_version(sender, instance, **kwargs): - instance.submit_for_parsing() + if instance.document.document_type.parsing_settings.auto_parsing: + instance.submit_for_parsing() diff --git a/mayan/apps/document_parsing/links.py b/mayan/apps/document_parsing/links.py index f9ca43fc92..a306fa6ef5 100644 --- a/mayan/apps/document_parsing/links.py +++ b/mayan/apps/document_parsing/links.py @@ -4,7 +4,10 @@ from django.utils.translation import ugettext_lazy as _ from navigation import Link -from .permissions import permission_content_view, permission_parse_document +from .permissions import ( + permission_content_view, permission_document_type_parsing_setup, + permission_parse_document +) link_document_content = Link( args='resolved_object.id', icon='fa fa-font', @@ -29,6 +32,10 @@ link_document_submit = Link( args='resolved_object.id', permissions=(permission_parse_document,), text=_('Submit for parsing'), view='document_parsing:document_submit' ) +link_document_type_parsing_settings = Link( + args='resolved_object.id', permissions=(permission_document_type_parsing_setup,), + text=_('Setup parsing'), view='document_parsing:document_type_parsing_settings', +) link_document_type_submit = Link( icon='fa fa-crosshairs', text=_('Parse documents per type'), view='document_parsing:document_type_submit' diff --git a/mayan/apps/document_parsing/migrations/0003_documenttypesettings.py b/mayan/apps/document_parsing/migrations/0003_documenttypesettings.py new file mode 100644 index 0000000000..de6feb496b --- /dev/null +++ b/mayan/apps/document_parsing/migrations/0003_documenttypesettings.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.11 on 2018-04-10 06:39 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +def create_parsing_setting_for_existing_document_types(apps, schema_editor): + DocumentType = apps.get_model('documents', 'DocumentType') + DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings') + + for document_type in DocumentType.objects.all(): + try: + DocumentTypeSettings.objects.create(document_type=document_type) + except DocumentTypeSettings.DoesNotExist: + pass + + +def delete_parsing_setting_for_existing_document_types(apps, schema_editor): + DocumentType = apps.get_model('documents', 'DocumentType') + DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings') + + for document_type in DocumentType.objects.all(): + try: + DocumentTypeSettings.objects.get(document_type=document_type).delete() + except DocumentTypeSettings.DoesNotExist: + pass + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0042_auto_20180403_0702'), + ('document_parsing', '0002_auto_20170827_1617'), + ] + + operations = [ + migrations.CreateModel( + name='DocumentTypeSettings', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('auto_parsing', models.BooleanField(default=True, verbose_name='Automatically queue newly created documents for parsing.')), + ('document_type', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='parsing_settings', to='documents.DocumentType', verbose_name='Document type')), + ], + options={ + 'verbose_name': 'Document type settings', + 'verbose_name_plural': 'Document types settings', + }, + ), + migrations.RunPython( + code=create_parsing_setting_for_existing_document_types, + reverse_code=delete_parsing_setting_for_existing_document_types, + ) + ] diff --git a/mayan/apps/document_parsing/models.py b/mayan/apps/document_parsing/models.py index 23fae82a96..30ea3ee81f 100644 --- a/mayan/apps/document_parsing/models.py +++ b/mayan/apps/document_parsing/models.py @@ -4,7 +4,7 @@ from django.db import models from django.utils.encoding import force_text, python_2_unicode_compatible from django.utils.translation import ugettext_lazy as _ -from documents.models import DocumentPage, DocumentVersion +from documents.models import DocumentPage, DocumentType, DocumentVersion from .managers import DocumentPageContentManager @@ -27,6 +27,21 @@ class DocumentPageContent(models.Model): return force_text(self.document_page) +class DocumentTypeSettings(models.Model): + document_type = models.OneToOneField( + on_delete=models.CASCADE, related_name='parsing_settings', + to=DocumentType, unique=True, verbose_name=_('Document type') + ) + auto_parsing = models.BooleanField( + default=True, + verbose_name=_('Automatically queue newly created documents for parsing.') + ) + + class Meta: + verbose_name = _('Document type settings') + verbose_name_plural = _('Document types settings') + + @python_2_unicode_compatible class DocumentVersionParseError(models.Model): document_version = models.ForeignKey( diff --git a/mayan/apps/document_parsing/permissions.py b/mayan/apps/document_parsing/permissions.py index f58deedfcb..0ba6382b88 100644 --- a/mayan/apps/document_parsing/permissions.py +++ b/mayan/apps/document_parsing/permissions.py @@ -9,7 +9,10 @@ namespace = PermissionNamespace('document_parsing', _('Document parsing')) permission_content_view = namespace.add_permission( name='content_view', label=_('View the content of a document') ) - +permission_document_type_parsing_setup = namespace.add_permission( + name='document_type_setup', + label=_('Change document type parsing settings') +) permission_parse_document = namespace.add_permission( name='parse_document', label=_('Parse the content of a document') ) diff --git a/mayan/apps/document_parsing/settings.py b/mayan/apps/document_parsing/settings.py index 47caeb44a8..5f140ae800 100644 --- a/mayan/apps/document_parsing/settings.py +++ b/mayan/apps/document_parsing/settings.py @@ -6,6 +6,12 @@ from smart_settings import Namespace namespace = Namespace(name='document_parsing', label=_('Document parsing')) +setting_auto_parsing = namespace.add_setting( + global_name='DOCUMENT_PARSING_AUTO_PARSING', default=True, + help_text=_( + 'Set new document types to perform parsing automatically by default.' + ) +) setting_pdftotext_path = namespace.add_setting( global_name='DOCUMENT_PARSING_PDFTOTEXT_PATH', default='/usr/bin/pdftotext', diff --git a/mayan/apps/document_parsing/tests/test_models.py b/mayan/apps/document_parsing/tests/test_models.py new file mode 100644 index 0000000000..4856676b6d --- /dev/null +++ b/mayan/apps/document_parsing/tests/test_models.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +from django.test import override_settings + +from documents.tests import GenericDocumentTestCase, TEST_DOCUMENT_PATH + + +class DocumentAutoParsingTestCase(GenericDocumentTestCase): + test_document_filename = TEST_DOCUMENT_PATH + auto_create_document_type = False + + @override_settings(DOCUMENT_PARSING_AUTO_PARSING=False) + def test_disable_auto_parsing(self): + self.create_document_type() + self.document = self.upload_document() + with self.assertRaises(StopIteration): + self.document.latest_version.content().next() + + @override_settings(DOCUMENT_PARSING_AUTO_PARSING=True) + def test_enabled_auto_parsing(self): + self.create_document_type() + self.document = self.upload_document() + self.assertTrue('Mayan' in self.document.content().next()) diff --git a/mayan/apps/document_parsing/tests/test_views.py b/mayan/apps/document_parsing/tests/test_views.py index cb081e93ba..aeb55599e1 100644 --- a/mayan/apps/document_parsing/tests/test_views.py +++ b/mayan/apps/document_parsing/tests/test_views.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +from django.test import override_settings + from documents.tests import ( GenericDocumentViewTestCase, TEST_DOCUMENT_FILENAME, TEST_DOCUMENT_PATH @@ -9,6 +11,7 @@ from ..permissions import permission_content_view from ..utils import get_document_content +@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True) class DocumentContentViewsTestCase(GenericDocumentViewTestCase): _skip_file_descriptor_test = True diff --git a/mayan/apps/document_parsing/urls.py b/mayan/apps/document_parsing/urls.py index a0916c41dd..3b91cbe7f5 100644 --- a/mayan/apps/document_parsing/urls.py +++ b/mayan/apps/document_parsing/urls.py @@ -5,8 +5,8 @@ from django.conf.urls import url from .api_views import APIDocumentPageContentView from .views import ( DocumentContentView, DocumentContentDownloadView, - DocumentParsingErrorsListView, DocumentSubmitView, DocumentTypeSubmitView, - ParseErrorListView + DocumentParsingErrorsListView, DocumentSubmitView, + DocumentTypeSettingsEditView, DocumentTypeSubmitView, ParseErrorListView ) urlpatterns = [ @@ -22,6 +22,11 @@ urlpatterns = [ r'^document_types/submit/$', DocumentTypeSubmitView.as_view(), name='document_type_submit' ), + url( + r'^document_types/(?P\d+)/parsing/settings/$', + DocumentTypeSettingsEditView.as_view(), + name='document_type_parsing_settings' + ), url( r'^documents/(?P\d+)/submit/$', DocumentSubmitView.as_view(), name='document_submit' diff --git a/mayan/apps/document_parsing/views.py b/mayan/apps/document_parsing/views.py index 032371d853..901b354dfd 100644 --- a/mayan/apps/document_parsing/views.py +++ b/mayan/apps/document_parsing/views.py @@ -8,13 +8,16 @@ from django.utils.translation import ugettext_lazy as _, ungettext from common.generics import ( FormView, MultipleObjectConfirmActionView, SingleObjectDetailView, - SingleObjectDownloadView, SingleObjectListView + SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView ) -from documents.models import Document +from documents.models import Document, DocumentType from .forms import DocumentContentForm, DocumentTypeSelectForm from .models import DocumentVersionParseError -from .permissions import permission_content_view, permission_parse_document +from .permissions import ( + permission_content_view, permission_document_type_parsing_setup, + permission_parse_document +) from .utils import get_document_content @@ -110,6 +113,23 @@ class DocumentSubmitView(MultipleObjectConfirmActionView): instance.submit_for_parsing() +class DocumentTypeSettingsEditView(SingleObjectEditView): + fields = ('auto_parsing',) + view_permission = permission_document_type_parsing_setup + + def get_object(self, queryset=None): + return get_object_or_404( + DocumentType, pk=self.kwargs['pk'] + ).parsing_settings + + def get_extra_context(self): + return { + 'title': _( + 'Edit parsing settings for document type: %s' + ) % self.get_object().document_type + } + + class DocumentTypeSubmitView(FormView): form_class = DocumentTypeSelectForm extra_context = {