Add the option to enable or disable parsing when uploading a document for each document type. Add a new setting option to enable automatic parsing for each new document type created.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2018-04-10 04:02:41 -04:00
parent 74628ab04b
commit b5d79f42a9
13 changed files with 199 additions and 15 deletions

View File

@@ -148,6 +148,8 @@
- Add warning when using SQLite as the database backend. - Add warning when using SQLite as the database backend.
- Use Mailgun's flanker library to process the email sources. - Use Mailgun's flanker library to process the email sources.
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads. - Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
- Add the option to enable or disable parsing when uploading a document for each document type.
- Add a new setting option to enable automatic parsing for each new document type created.
2.7.3 (2017-09-11) 2.7.3 (2017-09-11)
================== ==================

View File

@@ -467,6 +467,8 @@ Other changes worth mentioning
- Improve rendering of the details form. - Improve rendering of the details form.
- Update rendering of the readonly multiselect widget to conform to Django's updated field class interface. - Update rendering of the readonly multiselect widget to conform to Django's updated field class interface.
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads. - Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
- Add the option to enable or disable parsing when uploading a document for each document type.
- Add a new setting option to enable automatic parsing for each new document type created.
Removals Removals

View File

@@ -6,6 +6,7 @@ import logging
from kombu import Exchange, Queue from kombu import Exchange, Queue
from django.apps import apps from django.apps import apps
from django.db.models.signals import post_save
from django.utils.timezone import now from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
@@ -22,13 +23,19 @@ from mayan.celery import app
from navigation import SourceColumn from navigation import SourceColumn
from .events import event_parsing_document_version_submit from .events import event_parsing_document_version_submit
from .handlers import handler_parse_document_version from .handlers import (
handler_initialize_new_parsing_settings, handler_parse_document_version
)
from .links import ( from .links import (
link_document_content, link_document_content_download, link_document_content, link_document_content_download,
link_document_parsing_errors_list, link_document_submit_multiple, link_document_parsing_errors_list, link_document_submit_multiple,
link_document_submit, link_document_type_submit, link_error_list link_document_submit, link_document_type_parsing_settings,
link_document_type_submit, link_error_list
)
from .permissions import (
permission_content_view, permission_document_type_parsing_setup,
permission_parse_document
) )
from .permissions import permission_content_view
from .utils import get_document_content from .utils import get_document_content
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -66,7 +73,9 @@ class DocumentParsingApp(MayanAppConfig):
Document = apps.get_model( Document = apps.get_model(
app_label='documents', model_name='Document' app_label='documents', model_name='Document'
) )
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'
)
DocumentVersion = apps.get_model( DocumentVersion = apps.get_model(
app_label='documents', model_name='DocumentVersion' app_label='documents', model_name='DocumentVersion'
) )
@@ -74,6 +83,9 @@ class DocumentParsingApp(MayanAppConfig):
DocumentVersionParseError = self.get_model('DocumentVersionParseError') DocumentVersionParseError = self.get_model('DocumentVersionParseError')
Document.add_to_class('submit_for_parsing', document_parsing_submit) Document.add_to_class('submit_for_parsing', document_parsing_submit)
Document.add_to_class(
'content', get_document_content
)
DocumentVersion.add_to_class( DocumentVersion.add_to_class(
'content', get_document_content 'content', get_document_content
) )
@@ -82,7 +94,14 @@ class DocumentParsingApp(MayanAppConfig):
) )
ModelPermission.register( ModelPermission.register(
model=Document, permissions=(permission_content_view,) model=Document, permissions=(
permission_content_view, permission_parse_document
)
)
ModelPermission.register(
model=DocumentType, permissions=(
permission_document_type_parsing_setup,
)
) )
SourceColumn( SourceColumn(
@@ -127,6 +146,10 @@ class DocumentParsingApp(MayanAppConfig):
menu_object.bind_links( menu_object.bind_links(
links=(link_document_submit,), sources=(Document,) links=(link_document_submit,), sources=(Document,)
) )
menu_object.bind_links(
links=(link_document_type_parsing_settings,), sources=(DocumentType,),
position=99
)
menu_secondary.bind_links( menu_secondary.bind_links(
links=( links=(
link_document_content, link_document_parsing_errors_list, link_document_content, link_document_parsing_errors_list,
@@ -143,7 +166,11 @@ class DocumentParsingApp(MayanAppConfig):
link_document_type_submit, link_error_list, link_document_type_submit, link_error_list,
) )
) )
post_save.connect(
dispatch_uid='handler_initialize_new_parsing_settings',
receiver=handler_initialize_new_parsing_settings,
sender=DocumentType
)
post_version_upload.connect( post_version_upload.connect(
dispatch_uid='document_parsing_handler_parse_document_version', dispatch_uid='document_parsing_handler_parse_document_version',
receiver=handler_parse_document_version, receiver=handler_parse_document_version,

View File

@@ -2,8 +2,24 @@ from __future__ import unicode_literals
import logging import logging
from django.apps import apps
from .settings import setting_auto_parsing
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def handler_initialize_new_parsing_settings(sender, instance, **kwargs):
DocumentTypeSettings = apps.get_model(
app_label='document_parsing', model_name='DocumentTypeSettings'
)
if kwargs['created']:
DocumentTypeSettings.objects.create(
document_type=instance, auto_parsing=setting_auto_parsing.value
)
def handler_parse_document_version(sender, instance, **kwargs): def handler_parse_document_version(sender, instance, **kwargs):
instance.submit_for_parsing() if instance.document.document_type.parsing_settings.auto_parsing:
instance.submit_for_parsing()

View File

@@ -4,7 +4,10 @@ from django.utils.translation import ugettext_lazy as _
from navigation import Link from navigation import Link
from .permissions import permission_content_view, permission_parse_document from .permissions import (
permission_content_view, permission_document_type_parsing_setup,
permission_parse_document
)
link_document_content = Link( link_document_content = Link(
args='resolved_object.id', icon='fa fa-font', args='resolved_object.id', icon='fa fa-font',
@@ -29,6 +32,10 @@ link_document_submit = Link(
args='resolved_object.id', permissions=(permission_parse_document,), args='resolved_object.id', permissions=(permission_parse_document,),
text=_('Submit for parsing'), view='document_parsing:document_submit' text=_('Submit for parsing'), view='document_parsing:document_submit'
) )
link_document_type_parsing_settings = Link(
args='resolved_object.id', permissions=(permission_document_type_parsing_setup,),
text=_('Setup parsing'), view='document_parsing:document_type_parsing_settings',
)
link_document_type_submit = Link( link_document_type_submit = Link(
icon='fa fa-crosshairs', text=_('Parse documents per type'), icon='fa fa-crosshairs', text=_('Parse documents per type'),
view='document_parsing:document_type_submit' view='document_parsing:document_type_submit'

View File

@@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.11 on 2018-04-10 06:39
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
def create_parsing_setting_for_existing_document_types(apps, schema_editor):
DocumentType = apps.get_model('documents', 'DocumentType')
DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings')
for document_type in DocumentType.objects.all():
try:
DocumentTypeSettings.objects.create(document_type=document_type)
except DocumentTypeSettings.DoesNotExist:
pass
def delete_parsing_setting_for_existing_document_types(apps, schema_editor):
DocumentType = apps.get_model('documents', 'DocumentType')
DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings')
for document_type in DocumentType.objects.all():
try:
DocumentTypeSettings.objects.get(document_type=document_type).delete()
except DocumentTypeSettings.DoesNotExist:
pass
class Migration(migrations.Migration):
dependencies = [
('documents', '0042_auto_20180403_0702'),
('document_parsing', '0002_auto_20170827_1617'),
]
operations = [
migrations.CreateModel(
name='DocumentTypeSettings',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('auto_parsing', models.BooleanField(default=True, verbose_name='Automatically queue newly created documents for parsing.')),
('document_type', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='parsing_settings', to='documents.DocumentType', verbose_name='Document type')),
],
options={
'verbose_name': 'Document type settings',
'verbose_name_plural': 'Document types settings',
},
),
migrations.RunPython(
code=create_parsing_setting_for_existing_document_types,
reverse_code=delete_parsing_setting_for_existing_document_types,
)
]

View File

@@ -4,7 +4,7 @@ from django.db import models
from django.utils.encoding import force_text, python_2_unicode_compatible from django.utils.encoding import force_text, python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from documents.models import DocumentPage, DocumentVersion from documents.models import DocumentPage, DocumentType, DocumentVersion
from .managers import DocumentPageContentManager from .managers import DocumentPageContentManager
@@ -27,6 +27,21 @@ class DocumentPageContent(models.Model):
return force_text(self.document_page) return force_text(self.document_page)
class DocumentTypeSettings(models.Model):
document_type = models.OneToOneField(
on_delete=models.CASCADE, related_name='parsing_settings',
to=DocumentType, unique=True, verbose_name=_('Document type')
)
auto_parsing = models.BooleanField(
default=True,
verbose_name=_('Automatically queue newly created documents for parsing.')
)
class Meta:
verbose_name = _('Document type settings')
verbose_name_plural = _('Document types settings')
@python_2_unicode_compatible @python_2_unicode_compatible
class DocumentVersionParseError(models.Model): class DocumentVersionParseError(models.Model):
document_version = models.ForeignKey( document_version = models.ForeignKey(

View File

@@ -9,7 +9,10 @@ namespace = PermissionNamespace('document_parsing', _('Document parsing'))
permission_content_view = namespace.add_permission( permission_content_view = namespace.add_permission(
name='content_view', label=_('View the content of a document') name='content_view', label=_('View the content of a document')
) )
permission_document_type_parsing_setup = namespace.add_permission(
name='document_type_setup',
label=_('Change document type parsing settings')
)
permission_parse_document = namespace.add_permission( permission_parse_document = namespace.add_permission(
name='parse_document', label=_('Parse the content of a document') name='parse_document', label=_('Parse the content of a document')
) )

View File

@@ -6,6 +6,12 @@ from smart_settings import Namespace
namespace = Namespace(name='document_parsing', label=_('Document parsing')) namespace = Namespace(name='document_parsing', label=_('Document parsing'))
setting_auto_parsing = namespace.add_setting(
global_name='DOCUMENT_PARSING_AUTO_PARSING', default=True,
help_text=_(
'Set new document types to perform parsing automatically by default.'
)
)
setting_pdftotext_path = namespace.add_setting( setting_pdftotext_path = namespace.add_setting(
global_name='DOCUMENT_PARSING_PDFTOTEXT_PATH', global_name='DOCUMENT_PARSING_PDFTOTEXT_PATH',
default='/usr/bin/pdftotext', default='/usr/bin/pdftotext',

View File

@@ -0,0 +1,23 @@
from __future__ import unicode_literals
from django.test import override_settings
from documents.tests import GenericDocumentTestCase, TEST_DOCUMENT_PATH
class DocumentAutoParsingTestCase(GenericDocumentTestCase):
test_document_filename = TEST_DOCUMENT_PATH
auto_create_document_type = False
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=False)
def test_disable_auto_parsing(self):
self.create_document_type()
self.document = self.upload_document()
with self.assertRaises(StopIteration):
self.document.latest_version.content().next()
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True)
def test_enabled_auto_parsing(self):
self.create_document_type()
self.document = self.upload_document()
self.assertTrue('Mayan' in self.document.content().next())

View File

@@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from django.test import override_settings
from documents.tests import ( from documents.tests import (
GenericDocumentViewTestCase, TEST_DOCUMENT_FILENAME, GenericDocumentViewTestCase, TEST_DOCUMENT_FILENAME,
TEST_DOCUMENT_PATH TEST_DOCUMENT_PATH
@@ -9,6 +11,7 @@ from ..permissions import permission_content_view
from ..utils import get_document_content from ..utils import get_document_content
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True)
class DocumentContentViewsTestCase(GenericDocumentViewTestCase): class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
_skip_file_descriptor_test = True _skip_file_descriptor_test = True

View File

@@ -5,8 +5,8 @@ from django.conf.urls import url
from .api_views import APIDocumentPageContentView from .api_views import APIDocumentPageContentView
from .views import ( from .views import (
DocumentContentView, DocumentContentDownloadView, DocumentContentView, DocumentContentDownloadView,
DocumentParsingErrorsListView, DocumentSubmitView, DocumentTypeSubmitView, DocumentParsingErrorsListView, DocumentSubmitView,
ParseErrorListView DocumentTypeSettingsEditView, DocumentTypeSubmitView, ParseErrorListView
) )
urlpatterns = [ urlpatterns = [
@@ -22,6 +22,11 @@ urlpatterns = [
r'^document_types/submit/$', DocumentTypeSubmitView.as_view(), r'^document_types/submit/$', DocumentTypeSubmitView.as_view(),
name='document_type_submit' name='document_type_submit'
), ),
url(
r'^document_types/(?P<pk>\d+)/parsing/settings/$',
DocumentTypeSettingsEditView.as_view(),
name='document_type_parsing_settings'
),
url( url(
r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(), r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(),
name='document_submit' name='document_submit'

View File

@@ -8,13 +8,16 @@ from django.utils.translation import ugettext_lazy as _, ungettext
from common.generics import ( from common.generics import (
FormView, MultipleObjectConfirmActionView, SingleObjectDetailView, FormView, MultipleObjectConfirmActionView, SingleObjectDetailView,
SingleObjectDownloadView, SingleObjectListView SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView
) )
from documents.models import Document from documents.models import Document, DocumentType
from .forms import DocumentContentForm, DocumentTypeSelectForm from .forms import DocumentContentForm, DocumentTypeSelectForm
from .models import DocumentVersionParseError from .models import DocumentVersionParseError
from .permissions import permission_content_view, permission_parse_document from .permissions import (
permission_content_view, permission_document_type_parsing_setup,
permission_parse_document
)
from .utils import get_document_content from .utils import get_document_content
@@ -110,6 +113,23 @@ class DocumentSubmitView(MultipleObjectConfirmActionView):
instance.submit_for_parsing() instance.submit_for_parsing()
class DocumentTypeSettingsEditView(SingleObjectEditView):
fields = ('auto_parsing',)
view_permission = permission_document_type_parsing_setup
def get_object(self, queryset=None):
return get_object_or_404(
DocumentType, pk=self.kwargs['pk']
).parsing_settings
def get_extra_context(self):
return {
'title': _(
'Edit parsing settings for document type: %s'
) % self.get_object().document_type
}
class DocumentTypeSubmitView(FormView): class DocumentTypeSubmitView(FormView):
form_class = DocumentTypeSelectForm form_class = DocumentTypeSelectForm
extra_context = { extra_context = {