Add the option to enable or disable parsing when uploading a document for each document type. Add a new setting option to enable automatic parsing for each new document type created.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2018-04-10 04:02:41 -04:00
parent 74628ab04b
commit b5d79f42a9
13 changed files with 199 additions and 15 deletions

View File

@@ -148,6 +148,8 @@
- Add warning when using SQLite as the database backend.
- Use Mailgun's flanker library to process the email sources.
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
- Add the option to enable or disable parsing when uploading a document for each document type.
- Add a new setting option to enable automatic parsing for each new document type created.
2.7.3 (2017-09-11)
==================

View File

@@ -467,6 +467,8 @@ Other changes worth mentioning
- Improve rendering of the details form.
- Update rendering of the readonly multiselect widget to conform to Django's updated field class interface.
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
- Add the option to enable or disable parsing when uploading a document for each document type.
- Add a new setting option to enable automatic parsing for each new document type created.
Removals

View File

@@ -6,6 +6,7 @@ import logging
from kombu import Exchange, Queue
from django.apps import apps
from django.db.models.signals import post_save
from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _
@@ -22,13 +23,19 @@ from mayan.celery import app
from navigation import SourceColumn
from .events import event_parsing_document_version_submit
from .handlers import handler_parse_document_version
from .handlers import (
handler_initialize_new_parsing_settings, handler_parse_document_version
)
from .links import (
link_document_content, link_document_content_download,
link_document_parsing_errors_list, link_document_submit_multiple,
link_document_submit, link_document_type_submit, link_error_list
link_document_submit, link_document_type_parsing_settings,
link_document_type_submit, link_error_list
)
from .permissions import (
permission_content_view, permission_document_type_parsing_setup,
permission_parse_document
)
from .permissions import permission_content_view
from .utils import get_document_content
logger = logging.getLogger(__name__)
@@ -66,7 +73,9 @@ class DocumentParsingApp(MayanAppConfig):
Document = apps.get_model(
app_label='documents', model_name='Document'
)
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'
)
DocumentVersion = apps.get_model(
app_label='documents', model_name='DocumentVersion'
)
@@ -74,6 +83,9 @@ class DocumentParsingApp(MayanAppConfig):
DocumentVersionParseError = self.get_model('DocumentVersionParseError')
Document.add_to_class('submit_for_parsing', document_parsing_submit)
Document.add_to_class(
'content', get_document_content
)
DocumentVersion.add_to_class(
'content', get_document_content
)
@@ -82,7 +94,14 @@ class DocumentParsingApp(MayanAppConfig):
)
ModelPermission.register(
model=Document, permissions=(permission_content_view,)
model=Document, permissions=(
permission_content_view, permission_parse_document
)
)
ModelPermission.register(
model=DocumentType, permissions=(
permission_document_type_parsing_setup,
)
)
SourceColumn(
@@ -127,6 +146,10 @@ class DocumentParsingApp(MayanAppConfig):
menu_object.bind_links(
links=(link_document_submit,), sources=(Document,)
)
menu_object.bind_links(
links=(link_document_type_parsing_settings,), sources=(DocumentType,),
position=99
)
menu_secondary.bind_links(
links=(
link_document_content, link_document_parsing_errors_list,
@@ -143,7 +166,11 @@ class DocumentParsingApp(MayanAppConfig):
link_document_type_submit, link_error_list,
)
)
post_save.connect(
dispatch_uid='handler_initialize_new_parsing_settings',
receiver=handler_initialize_new_parsing_settings,
sender=DocumentType
)
post_version_upload.connect(
dispatch_uid='document_parsing_handler_parse_document_version',
receiver=handler_parse_document_version,

View File

@@ -2,8 +2,24 @@ from __future__ import unicode_literals
import logging
from django.apps import apps
from .settings import setting_auto_parsing
logger = logging.getLogger(__name__)
def handler_initialize_new_parsing_settings(sender, instance, **kwargs):
DocumentTypeSettings = apps.get_model(
app_label='document_parsing', model_name='DocumentTypeSettings'
)
if kwargs['created']:
DocumentTypeSettings.objects.create(
document_type=instance, auto_parsing=setting_auto_parsing.value
)
def handler_parse_document_version(sender, instance, **kwargs):
instance.submit_for_parsing()
if instance.document.document_type.parsing_settings.auto_parsing:
instance.submit_for_parsing()

View File

@@ -4,7 +4,10 @@ from django.utils.translation import ugettext_lazy as _
from navigation import Link
from .permissions import permission_content_view, permission_parse_document
from .permissions import (
permission_content_view, permission_document_type_parsing_setup,
permission_parse_document
)
link_document_content = Link(
args='resolved_object.id', icon='fa fa-font',
@@ -29,6 +32,10 @@ link_document_submit = Link(
args='resolved_object.id', permissions=(permission_parse_document,),
text=_('Submit for parsing'), view='document_parsing:document_submit'
)
link_document_type_parsing_settings = Link(
args='resolved_object.id', permissions=(permission_document_type_parsing_setup,),
text=_('Setup parsing'), view='document_parsing:document_type_parsing_settings',
)
link_document_type_submit = Link(
icon='fa fa-crosshairs', text=_('Parse documents per type'),
view='document_parsing:document_type_submit'

View File

@@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.11 on 2018-04-10 06:39
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
def create_parsing_setting_for_existing_document_types(apps, schema_editor):
DocumentType = apps.get_model('documents', 'DocumentType')
DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings')
for document_type in DocumentType.objects.all():
try:
DocumentTypeSettings.objects.create(document_type=document_type)
except DocumentTypeSettings.DoesNotExist:
pass
def delete_parsing_setting_for_existing_document_types(apps, schema_editor):
DocumentType = apps.get_model('documents', 'DocumentType')
DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings')
for document_type in DocumentType.objects.all():
try:
DocumentTypeSettings.objects.get(document_type=document_type).delete()
except DocumentTypeSettings.DoesNotExist:
pass
class Migration(migrations.Migration):
dependencies = [
('documents', '0042_auto_20180403_0702'),
('document_parsing', '0002_auto_20170827_1617'),
]
operations = [
migrations.CreateModel(
name='DocumentTypeSettings',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('auto_parsing', models.BooleanField(default=True, verbose_name='Automatically queue newly created documents for parsing.')),
('document_type', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='parsing_settings', to='documents.DocumentType', verbose_name='Document type')),
],
options={
'verbose_name': 'Document type settings',
'verbose_name_plural': 'Document types settings',
},
),
migrations.RunPython(
code=create_parsing_setting_for_existing_document_types,
reverse_code=delete_parsing_setting_for_existing_document_types,
)
]

View File

@@ -4,7 +4,7 @@ from django.db import models
from django.utils.encoding import force_text, python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _
from documents.models import DocumentPage, DocumentVersion
from documents.models import DocumentPage, DocumentType, DocumentVersion
from .managers import DocumentPageContentManager
@@ -27,6 +27,21 @@ class DocumentPageContent(models.Model):
return force_text(self.document_page)
class DocumentTypeSettings(models.Model):
document_type = models.OneToOneField(
on_delete=models.CASCADE, related_name='parsing_settings',
to=DocumentType, unique=True, verbose_name=_('Document type')
)
auto_parsing = models.BooleanField(
default=True,
verbose_name=_('Automatically queue newly created documents for parsing.')
)
class Meta:
verbose_name = _('Document type settings')
verbose_name_plural = _('Document types settings')
@python_2_unicode_compatible
class DocumentVersionParseError(models.Model):
document_version = models.ForeignKey(

View File

@@ -9,7 +9,10 @@ namespace = PermissionNamespace('document_parsing', _('Document parsing'))
permission_content_view = namespace.add_permission(
name='content_view', label=_('View the content of a document')
)
permission_document_type_parsing_setup = namespace.add_permission(
name='document_type_setup',
label=_('Change document type parsing settings')
)
permission_parse_document = namespace.add_permission(
name='parse_document', label=_('Parse the content of a document')
)

View File

@@ -6,6 +6,12 @@ from smart_settings import Namespace
namespace = Namespace(name='document_parsing', label=_('Document parsing'))
setting_auto_parsing = namespace.add_setting(
global_name='DOCUMENT_PARSING_AUTO_PARSING', default=True,
help_text=_(
'Set new document types to perform parsing automatically by default.'
)
)
setting_pdftotext_path = namespace.add_setting(
global_name='DOCUMENT_PARSING_PDFTOTEXT_PATH',
default='/usr/bin/pdftotext',

View File

@@ -0,0 +1,23 @@
from __future__ import unicode_literals
from django.test import override_settings
from documents.tests import GenericDocumentTestCase, TEST_DOCUMENT_PATH
class DocumentAutoParsingTestCase(GenericDocumentTestCase):
test_document_filename = TEST_DOCUMENT_PATH
auto_create_document_type = False
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=False)
def test_disable_auto_parsing(self):
self.create_document_type()
self.document = self.upload_document()
with self.assertRaises(StopIteration):
self.document.latest_version.content().next()
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True)
def test_enabled_auto_parsing(self):
self.create_document_type()
self.document = self.upload_document()
self.assertTrue('Mayan' in self.document.content().next())

View File

@@ -1,5 +1,7 @@
from __future__ import unicode_literals
from django.test import override_settings
from documents.tests import (
GenericDocumentViewTestCase, TEST_DOCUMENT_FILENAME,
TEST_DOCUMENT_PATH
@@ -9,6 +11,7 @@ from ..permissions import permission_content_view
from ..utils import get_document_content
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True)
class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
_skip_file_descriptor_test = True

View File

@@ -5,8 +5,8 @@ from django.conf.urls import url
from .api_views import APIDocumentPageContentView
from .views import (
DocumentContentView, DocumentContentDownloadView,
DocumentParsingErrorsListView, DocumentSubmitView, DocumentTypeSubmitView,
ParseErrorListView
DocumentParsingErrorsListView, DocumentSubmitView,
DocumentTypeSettingsEditView, DocumentTypeSubmitView, ParseErrorListView
)
urlpatterns = [
@@ -22,6 +22,11 @@ urlpatterns = [
r'^document_types/submit/$', DocumentTypeSubmitView.as_view(),
name='document_type_submit'
),
url(
r'^document_types/(?P<pk>\d+)/parsing/settings/$',
DocumentTypeSettingsEditView.as_view(),
name='document_type_parsing_settings'
),
url(
r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(),
name='document_submit'

View File

@@ -8,13 +8,16 @@ from django.utils.translation import ugettext_lazy as _, ungettext
from common.generics import (
FormView, MultipleObjectConfirmActionView, SingleObjectDetailView,
SingleObjectDownloadView, SingleObjectListView
SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView
)
from documents.models import Document
from documents.models import Document, DocumentType
from .forms import DocumentContentForm, DocumentTypeSelectForm
from .models import DocumentVersionParseError
from .permissions import permission_content_view, permission_parse_document
from .permissions import (
permission_content_view, permission_document_type_parsing_setup,
permission_parse_document
)
from .utils import get_document_content
@@ -110,6 +113,23 @@ class DocumentSubmitView(MultipleObjectConfirmActionView):
instance.submit_for_parsing()
class DocumentTypeSettingsEditView(SingleObjectEditView):
fields = ('auto_parsing',)
view_permission = permission_document_type_parsing_setup
def get_object(self, queryset=None):
return get_object_or_404(
DocumentType, pk=self.kwargs['pk']
).parsing_settings
def get_extra_context(self):
return {
'title': _(
'Edit parsing settings for document type: %s'
) % self.get_object().document_type
}
class DocumentTypeSubmitView(FormView):
form_class = DocumentTypeSelectForm
extra_context = {