Add the option to enable or disable parsing when uploading a document for each document type. Add a new setting option to enable automatic parsing for each new document type created.
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -148,6 +148,8 @@
|
||||
- Add warning when using SQLite as the database backend.
|
||||
- Use Mailgun's flanker library to process the email sources.
|
||||
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
|
||||
- Add the option to enable or disable parsing when uploading a document for each document type.
|
||||
- Add a new setting option to enable automatic parsing for each new document type created.
|
||||
|
||||
2.7.3 (2017-09-11)
|
||||
==================
|
||||
|
||||
@@ -467,6 +467,8 @@ Other changes worth mentioning
|
||||
- Improve rendering of the details form.
|
||||
- Update rendering of the readonly multiselect widget to conform to Django's updated field class interface.
|
||||
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
|
||||
- Add the option to enable or disable parsing when uploading a document for each document type.
|
||||
- Add a new setting option to enable automatic parsing for each new document type created.
|
||||
|
||||
|
||||
Removals
|
||||
|
||||
@@ -6,6 +6,7 @@ import logging
|
||||
from kombu import Exchange, Queue
|
||||
|
||||
from django.apps import apps
|
||||
from django.db.models.signals import post_save
|
||||
from django.utils.timezone import now
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
@@ -22,13 +23,19 @@ from mayan.celery import app
|
||||
from navigation import SourceColumn
|
||||
|
||||
from .events import event_parsing_document_version_submit
|
||||
from .handlers import handler_parse_document_version
|
||||
from .handlers import (
|
||||
handler_initialize_new_parsing_settings, handler_parse_document_version
|
||||
)
|
||||
from .links import (
|
||||
link_document_content, link_document_content_download,
|
||||
link_document_parsing_errors_list, link_document_submit_multiple,
|
||||
link_document_submit, link_document_type_submit, link_error_list
|
||||
link_document_submit, link_document_type_parsing_settings,
|
||||
link_document_type_submit, link_error_list
|
||||
)
|
||||
from .permissions import (
|
||||
permission_content_view, permission_document_type_parsing_setup,
|
||||
permission_parse_document
|
||||
)
|
||||
from .permissions import permission_content_view
|
||||
from .utils import get_document_content
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -66,7 +73,9 @@ class DocumentParsingApp(MayanAppConfig):
|
||||
Document = apps.get_model(
|
||||
app_label='documents', model_name='Document'
|
||||
)
|
||||
|
||||
DocumentType = apps.get_model(
|
||||
app_label='documents', model_name='DocumentType'
|
||||
)
|
||||
DocumentVersion = apps.get_model(
|
||||
app_label='documents', model_name='DocumentVersion'
|
||||
)
|
||||
@@ -74,6 +83,9 @@ class DocumentParsingApp(MayanAppConfig):
|
||||
DocumentVersionParseError = self.get_model('DocumentVersionParseError')
|
||||
|
||||
Document.add_to_class('submit_for_parsing', document_parsing_submit)
|
||||
Document.add_to_class(
|
||||
'content', get_document_content
|
||||
)
|
||||
DocumentVersion.add_to_class(
|
||||
'content', get_document_content
|
||||
)
|
||||
@@ -82,7 +94,14 @@ class DocumentParsingApp(MayanAppConfig):
|
||||
)
|
||||
|
||||
ModelPermission.register(
|
||||
model=Document, permissions=(permission_content_view,)
|
||||
model=Document, permissions=(
|
||||
permission_content_view, permission_parse_document
|
||||
)
|
||||
)
|
||||
ModelPermission.register(
|
||||
model=DocumentType, permissions=(
|
||||
permission_document_type_parsing_setup,
|
||||
)
|
||||
)
|
||||
|
||||
SourceColumn(
|
||||
@@ -127,6 +146,10 @@ class DocumentParsingApp(MayanAppConfig):
|
||||
menu_object.bind_links(
|
||||
links=(link_document_submit,), sources=(Document,)
|
||||
)
|
||||
menu_object.bind_links(
|
||||
links=(link_document_type_parsing_settings,), sources=(DocumentType,),
|
||||
position=99
|
||||
)
|
||||
menu_secondary.bind_links(
|
||||
links=(
|
||||
link_document_content, link_document_parsing_errors_list,
|
||||
@@ -143,7 +166,11 @@ class DocumentParsingApp(MayanAppConfig):
|
||||
link_document_type_submit, link_error_list,
|
||||
)
|
||||
)
|
||||
|
||||
post_save.connect(
|
||||
dispatch_uid='handler_initialize_new_parsing_settings',
|
||||
receiver=handler_initialize_new_parsing_settings,
|
||||
sender=DocumentType
|
||||
)
|
||||
post_version_upload.connect(
|
||||
dispatch_uid='document_parsing_handler_parse_document_version',
|
||||
receiver=handler_parse_document_version,
|
||||
|
||||
@@ -2,8 +2,24 @@ from __future__ import unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
from django.apps import apps
|
||||
|
||||
from .settings import setting_auto_parsing
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def handler_initialize_new_parsing_settings(sender, instance, **kwargs):
|
||||
DocumentTypeSettings = apps.get_model(
|
||||
app_label='document_parsing', model_name='DocumentTypeSettings'
|
||||
)
|
||||
|
||||
if kwargs['created']:
|
||||
DocumentTypeSettings.objects.create(
|
||||
document_type=instance, auto_parsing=setting_auto_parsing.value
|
||||
)
|
||||
|
||||
|
||||
def handler_parse_document_version(sender, instance, **kwargs):
|
||||
instance.submit_for_parsing()
|
||||
if instance.document.document_type.parsing_settings.auto_parsing:
|
||||
instance.submit_for_parsing()
|
||||
|
||||
@@ -4,7 +4,10 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from navigation import Link
|
||||
|
||||
from .permissions import permission_content_view, permission_parse_document
|
||||
from .permissions import (
|
||||
permission_content_view, permission_document_type_parsing_setup,
|
||||
permission_parse_document
|
||||
)
|
||||
|
||||
link_document_content = Link(
|
||||
args='resolved_object.id', icon='fa fa-font',
|
||||
@@ -29,6 +32,10 @@ link_document_submit = Link(
|
||||
args='resolved_object.id', permissions=(permission_parse_document,),
|
||||
text=_('Submit for parsing'), view='document_parsing:document_submit'
|
||||
)
|
||||
link_document_type_parsing_settings = Link(
|
||||
args='resolved_object.id', permissions=(permission_document_type_parsing_setup,),
|
||||
text=_('Setup parsing'), view='document_parsing:document_type_parsing_settings',
|
||||
)
|
||||
link_document_type_submit = Link(
|
||||
icon='fa fa-crosshairs', text=_('Parse documents per type'),
|
||||
view='document_parsing:document_type_submit'
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.11.11 on 2018-04-10 06:39
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
def create_parsing_setting_for_existing_document_types(apps, schema_editor):
|
||||
DocumentType = apps.get_model('documents', 'DocumentType')
|
||||
DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings')
|
||||
|
||||
for document_type in DocumentType.objects.all():
|
||||
try:
|
||||
DocumentTypeSettings.objects.create(document_type=document_type)
|
||||
except DocumentTypeSettings.DoesNotExist:
|
||||
pass
|
||||
|
||||
|
||||
def delete_parsing_setting_for_existing_document_types(apps, schema_editor):
|
||||
DocumentType = apps.get_model('documents', 'DocumentType')
|
||||
DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings')
|
||||
|
||||
for document_type in DocumentType.objects.all():
|
||||
try:
|
||||
DocumentTypeSettings.objects.get(document_type=document_type).delete()
|
||||
except DocumentTypeSettings.DoesNotExist:
|
||||
pass
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0042_auto_20180403_0702'),
|
||||
('document_parsing', '0002_auto_20170827_1617'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='DocumentTypeSettings',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('auto_parsing', models.BooleanField(default=True, verbose_name='Automatically queue newly created documents for parsing.')),
|
||||
('document_type', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='parsing_settings', to='documents.DocumentType', verbose_name='Document type')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Document type settings',
|
||||
'verbose_name_plural': 'Document types settings',
|
||||
},
|
||||
),
|
||||
migrations.RunPython(
|
||||
code=create_parsing_setting_for_existing_document_types,
|
||||
reverse_code=delete_parsing_setting_for_existing_document_types,
|
||||
)
|
||||
]
|
||||
@@ -4,7 +4,7 @@ from django.db import models
|
||||
from django.utils.encoding import force_text, python_2_unicode_compatible
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from documents.models import DocumentPage, DocumentVersion
|
||||
from documents.models import DocumentPage, DocumentType, DocumentVersion
|
||||
|
||||
from .managers import DocumentPageContentManager
|
||||
|
||||
@@ -27,6 +27,21 @@ class DocumentPageContent(models.Model):
|
||||
return force_text(self.document_page)
|
||||
|
||||
|
||||
class DocumentTypeSettings(models.Model):
|
||||
document_type = models.OneToOneField(
|
||||
on_delete=models.CASCADE, related_name='parsing_settings',
|
||||
to=DocumentType, unique=True, verbose_name=_('Document type')
|
||||
)
|
||||
auto_parsing = models.BooleanField(
|
||||
default=True,
|
||||
verbose_name=_('Automatically queue newly created documents for parsing.')
|
||||
)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Document type settings')
|
||||
verbose_name_plural = _('Document types settings')
|
||||
|
||||
|
||||
@python_2_unicode_compatible
|
||||
class DocumentVersionParseError(models.Model):
|
||||
document_version = models.ForeignKey(
|
||||
|
||||
@@ -9,7 +9,10 @@ namespace = PermissionNamespace('document_parsing', _('Document parsing'))
|
||||
permission_content_view = namespace.add_permission(
|
||||
name='content_view', label=_('View the content of a document')
|
||||
)
|
||||
|
||||
permission_document_type_parsing_setup = namespace.add_permission(
|
||||
name='document_type_setup',
|
||||
label=_('Change document type parsing settings')
|
||||
)
|
||||
permission_parse_document = namespace.add_permission(
|
||||
name='parse_document', label=_('Parse the content of a document')
|
||||
)
|
||||
|
||||
@@ -6,6 +6,12 @@ from smart_settings import Namespace
|
||||
|
||||
namespace = Namespace(name='document_parsing', label=_('Document parsing'))
|
||||
|
||||
setting_auto_parsing = namespace.add_setting(
|
||||
global_name='DOCUMENT_PARSING_AUTO_PARSING', default=True,
|
||||
help_text=_(
|
||||
'Set new document types to perform parsing automatically by default.'
|
||||
)
|
||||
)
|
||||
setting_pdftotext_path = namespace.add_setting(
|
||||
global_name='DOCUMENT_PARSING_PDFTOTEXT_PATH',
|
||||
default='/usr/bin/pdftotext',
|
||||
|
||||
23
mayan/apps/document_parsing/tests/test_models.py
Normal file
23
mayan/apps/document_parsing/tests/test_models.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.tests import GenericDocumentTestCase, TEST_DOCUMENT_PATH
|
||||
|
||||
|
||||
class DocumentAutoParsingTestCase(GenericDocumentTestCase):
|
||||
test_document_filename = TEST_DOCUMENT_PATH
|
||||
auto_create_document_type = False
|
||||
|
||||
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=False)
|
||||
def test_disable_auto_parsing(self):
|
||||
self.create_document_type()
|
||||
self.document = self.upload_document()
|
||||
with self.assertRaises(StopIteration):
|
||||
self.document.latest_version.content().next()
|
||||
|
||||
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True)
|
||||
def test_enabled_auto_parsing(self):
|
||||
self.create_document_type()
|
||||
self.document = self.upload_document()
|
||||
self.assertTrue('Mayan' in self.document.content().next())
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.tests import (
|
||||
GenericDocumentViewTestCase, TEST_DOCUMENT_FILENAME,
|
||||
TEST_DOCUMENT_PATH
|
||||
@@ -9,6 +11,7 @@ from ..permissions import permission_content_view
|
||||
from ..utils import get_document_content
|
||||
|
||||
|
||||
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True)
|
||||
class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
|
||||
_skip_file_descriptor_test = True
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@ from django.conf.urls import url
|
||||
from .api_views import APIDocumentPageContentView
|
||||
from .views import (
|
||||
DocumentContentView, DocumentContentDownloadView,
|
||||
DocumentParsingErrorsListView, DocumentSubmitView, DocumentTypeSubmitView,
|
||||
ParseErrorListView
|
||||
DocumentParsingErrorsListView, DocumentSubmitView,
|
||||
DocumentTypeSettingsEditView, DocumentTypeSubmitView, ParseErrorListView
|
||||
)
|
||||
|
||||
urlpatterns = [
|
||||
@@ -22,6 +22,11 @@ urlpatterns = [
|
||||
r'^document_types/submit/$', DocumentTypeSubmitView.as_view(),
|
||||
name='document_type_submit'
|
||||
),
|
||||
url(
|
||||
r'^document_types/(?P<pk>\d+)/parsing/settings/$',
|
||||
DocumentTypeSettingsEditView.as_view(),
|
||||
name='document_type_parsing_settings'
|
||||
),
|
||||
url(
|
||||
r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(),
|
||||
name='document_submit'
|
||||
|
||||
@@ -8,13 +8,16 @@ from django.utils.translation import ugettext_lazy as _, ungettext
|
||||
|
||||
from common.generics import (
|
||||
FormView, MultipleObjectConfirmActionView, SingleObjectDetailView,
|
||||
SingleObjectDownloadView, SingleObjectListView
|
||||
SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView
|
||||
)
|
||||
from documents.models import Document
|
||||
from documents.models import Document, DocumentType
|
||||
|
||||
from .forms import DocumentContentForm, DocumentTypeSelectForm
|
||||
from .models import DocumentVersionParseError
|
||||
from .permissions import permission_content_view, permission_parse_document
|
||||
from .permissions import (
|
||||
permission_content_view, permission_document_type_parsing_setup,
|
||||
permission_parse_document
|
||||
)
|
||||
from .utils import get_document_content
|
||||
|
||||
|
||||
@@ -110,6 +113,23 @@ class DocumentSubmitView(MultipleObjectConfirmActionView):
|
||||
instance.submit_for_parsing()
|
||||
|
||||
|
||||
class DocumentTypeSettingsEditView(SingleObjectEditView):
|
||||
fields = ('auto_parsing',)
|
||||
view_permission = permission_document_type_parsing_setup
|
||||
|
||||
def get_object(self, queryset=None):
|
||||
return get_object_or_404(
|
||||
DocumentType, pk=self.kwargs['pk']
|
||||
).parsing_settings
|
||||
|
||||
def get_extra_context(self):
|
||||
return {
|
||||
'title': _(
|
||||
'Edit parsing settings for document type: %s'
|
||||
) % self.get_object().document_type
|
||||
}
|
||||
|
||||
|
||||
class DocumentTypeSubmitView(FormView):
|
||||
form_class = DocumentTypeSelectForm
|
||||
extra_context = {
|
||||
|
||||
Reference in New Issue
Block a user