Add the option to enable or disable parsing when uploading a document for each document type. Add a new setting option to enable automatic parsing for each new document type created.
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -148,6 +148,8 @@
|
|||||||
- Add warning when using SQLite as the database backend.
|
- Add warning when using SQLite as the database backend.
|
||||||
- Use Mailgun's flanker library to process the email sources.
|
- Use Mailgun's flanker library to process the email sources.
|
||||||
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
|
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
|
||||||
|
- Add the option to enable or disable parsing when uploading a document for each document type.
|
||||||
|
- Add a new setting option to enable automatic parsing for each new document type created.
|
||||||
|
|
||||||
2.7.3 (2017-09-11)
|
2.7.3 (2017-09-11)
|
||||||
==================
|
==================
|
||||||
|
|||||||
@@ -467,6 +467,8 @@ Other changes worth mentioning
|
|||||||
- Improve rendering of the details form.
|
- Improve rendering of the details form.
|
||||||
- Update rendering of the readonly multiselect widget to conform to Django's updated field class interface.
|
- Update rendering of the readonly multiselect widget to conform to Django's updated field class interface.
|
||||||
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
|
- Add locking for interval sources. This reduces the chance of repeated documents from long running email downloads.
|
||||||
|
- Add the option to enable or disable parsing when uploading a document for each document type.
|
||||||
|
- Add a new setting option to enable automatic parsing for each new document type created.
|
||||||
|
|
||||||
|
|
||||||
Removals
|
Removals
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import logging
|
|||||||
from kombu import Exchange, Queue
|
from kombu import Exchange, Queue
|
||||||
|
|
||||||
from django.apps import apps
|
from django.apps import apps
|
||||||
|
from django.db.models.signals import post_save
|
||||||
from django.utils.timezone import now
|
from django.utils.timezone import now
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
@@ -22,13 +23,19 @@ from mayan.celery import app
|
|||||||
from navigation import SourceColumn
|
from navigation import SourceColumn
|
||||||
|
|
||||||
from .events import event_parsing_document_version_submit
|
from .events import event_parsing_document_version_submit
|
||||||
from .handlers import handler_parse_document_version
|
from .handlers import (
|
||||||
|
handler_initialize_new_parsing_settings, handler_parse_document_version
|
||||||
|
)
|
||||||
from .links import (
|
from .links import (
|
||||||
link_document_content, link_document_content_download,
|
link_document_content, link_document_content_download,
|
||||||
link_document_parsing_errors_list, link_document_submit_multiple,
|
link_document_parsing_errors_list, link_document_submit_multiple,
|
||||||
link_document_submit, link_document_type_submit, link_error_list
|
link_document_submit, link_document_type_parsing_settings,
|
||||||
|
link_document_type_submit, link_error_list
|
||||||
|
)
|
||||||
|
from .permissions import (
|
||||||
|
permission_content_view, permission_document_type_parsing_setup,
|
||||||
|
permission_parse_document
|
||||||
)
|
)
|
||||||
from .permissions import permission_content_view
|
|
||||||
from .utils import get_document_content
|
from .utils import get_document_content
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -66,7 +73,9 @@ class DocumentParsingApp(MayanAppConfig):
|
|||||||
Document = apps.get_model(
|
Document = apps.get_model(
|
||||||
app_label='documents', model_name='Document'
|
app_label='documents', model_name='Document'
|
||||||
)
|
)
|
||||||
|
DocumentType = apps.get_model(
|
||||||
|
app_label='documents', model_name='DocumentType'
|
||||||
|
)
|
||||||
DocumentVersion = apps.get_model(
|
DocumentVersion = apps.get_model(
|
||||||
app_label='documents', model_name='DocumentVersion'
|
app_label='documents', model_name='DocumentVersion'
|
||||||
)
|
)
|
||||||
@@ -74,6 +83,9 @@ class DocumentParsingApp(MayanAppConfig):
|
|||||||
DocumentVersionParseError = self.get_model('DocumentVersionParseError')
|
DocumentVersionParseError = self.get_model('DocumentVersionParseError')
|
||||||
|
|
||||||
Document.add_to_class('submit_for_parsing', document_parsing_submit)
|
Document.add_to_class('submit_for_parsing', document_parsing_submit)
|
||||||
|
Document.add_to_class(
|
||||||
|
'content', get_document_content
|
||||||
|
)
|
||||||
DocumentVersion.add_to_class(
|
DocumentVersion.add_to_class(
|
||||||
'content', get_document_content
|
'content', get_document_content
|
||||||
)
|
)
|
||||||
@@ -82,7 +94,14 @@ class DocumentParsingApp(MayanAppConfig):
|
|||||||
)
|
)
|
||||||
|
|
||||||
ModelPermission.register(
|
ModelPermission.register(
|
||||||
model=Document, permissions=(permission_content_view,)
|
model=Document, permissions=(
|
||||||
|
permission_content_view, permission_parse_document
|
||||||
|
)
|
||||||
|
)
|
||||||
|
ModelPermission.register(
|
||||||
|
model=DocumentType, permissions=(
|
||||||
|
permission_document_type_parsing_setup,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
SourceColumn(
|
SourceColumn(
|
||||||
@@ -127,6 +146,10 @@ class DocumentParsingApp(MayanAppConfig):
|
|||||||
menu_object.bind_links(
|
menu_object.bind_links(
|
||||||
links=(link_document_submit,), sources=(Document,)
|
links=(link_document_submit,), sources=(Document,)
|
||||||
)
|
)
|
||||||
|
menu_object.bind_links(
|
||||||
|
links=(link_document_type_parsing_settings,), sources=(DocumentType,),
|
||||||
|
position=99
|
||||||
|
)
|
||||||
menu_secondary.bind_links(
|
menu_secondary.bind_links(
|
||||||
links=(
|
links=(
|
||||||
link_document_content, link_document_parsing_errors_list,
|
link_document_content, link_document_parsing_errors_list,
|
||||||
@@ -143,7 +166,11 @@ class DocumentParsingApp(MayanAppConfig):
|
|||||||
link_document_type_submit, link_error_list,
|
link_document_type_submit, link_error_list,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
post_save.connect(
|
||||||
|
dispatch_uid='handler_initialize_new_parsing_settings',
|
||||||
|
receiver=handler_initialize_new_parsing_settings,
|
||||||
|
sender=DocumentType
|
||||||
|
)
|
||||||
post_version_upload.connect(
|
post_version_upload.connect(
|
||||||
dispatch_uid='document_parsing_handler_parse_document_version',
|
dispatch_uid='document_parsing_handler_parse_document_version',
|
||||||
receiver=handler_parse_document_version,
|
receiver=handler_parse_document_version,
|
||||||
|
|||||||
@@ -2,8 +2,24 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from django.apps import apps
|
||||||
|
|
||||||
|
from .settings import setting_auto_parsing
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def handler_initialize_new_parsing_settings(sender, instance, **kwargs):
|
||||||
|
DocumentTypeSettings = apps.get_model(
|
||||||
|
app_label='document_parsing', model_name='DocumentTypeSettings'
|
||||||
|
)
|
||||||
|
|
||||||
|
if kwargs['created']:
|
||||||
|
DocumentTypeSettings.objects.create(
|
||||||
|
document_type=instance, auto_parsing=setting_auto_parsing.value
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def handler_parse_document_version(sender, instance, **kwargs):
|
def handler_parse_document_version(sender, instance, **kwargs):
|
||||||
instance.submit_for_parsing()
|
if instance.document.document_type.parsing_settings.auto_parsing:
|
||||||
|
instance.submit_for_parsing()
|
||||||
|
|||||||
@@ -4,7 +4,10 @@ from django.utils.translation import ugettext_lazy as _
|
|||||||
|
|
||||||
from navigation import Link
|
from navigation import Link
|
||||||
|
|
||||||
from .permissions import permission_content_view, permission_parse_document
|
from .permissions import (
|
||||||
|
permission_content_view, permission_document_type_parsing_setup,
|
||||||
|
permission_parse_document
|
||||||
|
)
|
||||||
|
|
||||||
link_document_content = Link(
|
link_document_content = Link(
|
||||||
args='resolved_object.id', icon='fa fa-font',
|
args='resolved_object.id', icon='fa fa-font',
|
||||||
@@ -29,6 +32,10 @@ link_document_submit = Link(
|
|||||||
args='resolved_object.id', permissions=(permission_parse_document,),
|
args='resolved_object.id', permissions=(permission_parse_document,),
|
||||||
text=_('Submit for parsing'), view='document_parsing:document_submit'
|
text=_('Submit for parsing'), view='document_parsing:document_submit'
|
||||||
)
|
)
|
||||||
|
link_document_type_parsing_settings = Link(
|
||||||
|
args='resolved_object.id', permissions=(permission_document_type_parsing_setup,),
|
||||||
|
text=_('Setup parsing'), view='document_parsing:document_type_parsing_settings',
|
||||||
|
)
|
||||||
link_document_type_submit = Link(
|
link_document_type_submit = Link(
|
||||||
icon='fa fa-crosshairs', text=_('Parse documents per type'),
|
icon='fa fa-crosshairs', text=_('Parse documents per type'),
|
||||||
view='document_parsing:document_type_submit'
|
view='document_parsing:document_type_submit'
|
||||||
|
|||||||
@@ -0,0 +1,55 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.11.11 on 2018-04-10 06:39
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
|
||||||
|
|
||||||
|
def create_parsing_setting_for_existing_document_types(apps, schema_editor):
|
||||||
|
DocumentType = apps.get_model('documents', 'DocumentType')
|
||||||
|
DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings')
|
||||||
|
|
||||||
|
for document_type in DocumentType.objects.all():
|
||||||
|
try:
|
||||||
|
DocumentTypeSettings.objects.create(document_type=document_type)
|
||||||
|
except DocumentTypeSettings.DoesNotExist:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def delete_parsing_setting_for_existing_document_types(apps, schema_editor):
|
||||||
|
DocumentType = apps.get_model('documents', 'DocumentType')
|
||||||
|
DocumentTypeSettings = apps.get_model('document_parsing', 'DocumentTypeSettings')
|
||||||
|
|
||||||
|
for document_type in DocumentType.objects.all():
|
||||||
|
try:
|
||||||
|
DocumentTypeSettings.objects.get(document_type=document_type).delete()
|
||||||
|
except DocumentTypeSettings.DoesNotExist:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('documents', '0042_auto_20180403_0702'),
|
||||||
|
('document_parsing', '0002_auto_20170827_1617'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='DocumentTypeSettings',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('auto_parsing', models.BooleanField(default=True, verbose_name='Automatically queue newly created documents for parsing.')),
|
||||||
|
('document_type', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='parsing_settings', to='documents.DocumentType', verbose_name='Document type')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Document type settings',
|
||||||
|
'verbose_name_plural': 'Document types settings',
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.RunPython(
|
||||||
|
code=create_parsing_setting_for_existing_document_types,
|
||||||
|
reverse_code=delete_parsing_setting_for_existing_document_types,
|
||||||
|
)
|
||||||
|
]
|
||||||
@@ -4,7 +4,7 @@ from django.db import models
|
|||||||
from django.utils.encoding import force_text, python_2_unicode_compatible
|
from django.utils.encoding import force_text, python_2_unicode_compatible
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
from documents.models import DocumentPage, DocumentVersion
|
from documents.models import DocumentPage, DocumentType, DocumentVersion
|
||||||
|
|
||||||
from .managers import DocumentPageContentManager
|
from .managers import DocumentPageContentManager
|
||||||
|
|
||||||
@@ -27,6 +27,21 @@ class DocumentPageContent(models.Model):
|
|||||||
return force_text(self.document_page)
|
return force_text(self.document_page)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentTypeSettings(models.Model):
|
||||||
|
document_type = models.OneToOneField(
|
||||||
|
on_delete=models.CASCADE, related_name='parsing_settings',
|
||||||
|
to=DocumentType, unique=True, verbose_name=_('Document type')
|
||||||
|
)
|
||||||
|
auto_parsing = models.BooleanField(
|
||||||
|
default=True,
|
||||||
|
verbose_name=_('Automatically queue newly created documents for parsing.')
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
verbose_name = _('Document type settings')
|
||||||
|
verbose_name_plural = _('Document types settings')
|
||||||
|
|
||||||
|
|
||||||
@python_2_unicode_compatible
|
@python_2_unicode_compatible
|
||||||
class DocumentVersionParseError(models.Model):
|
class DocumentVersionParseError(models.Model):
|
||||||
document_version = models.ForeignKey(
|
document_version = models.ForeignKey(
|
||||||
|
|||||||
@@ -9,7 +9,10 @@ namespace = PermissionNamespace('document_parsing', _('Document parsing'))
|
|||||||
permission_content_view = namespace.add_permission(
|
permission_content_view = namespace.add_permission(
|
||||||
name='content_view', label=_('View the content of a document')
|
name='content_view', label=_('View the content of a document')
|
||||||
)
|
)
|
||||||
|
permission_document_type_parsing_setup = namespace.add_permission(
|
||||||
|
name='document_type_setup',
|
||||||
|
label=_('Change document type parsing settings')
|
||||||
|
)
|
||||||
permission_parse_document = namespace.add_permission(
|
permission_parse_document = namespace.add_permission(
|
||||||
name='parse_document', label=_('Parse the content of a document')
|
name='parse_document', label=_('Parse the content of a document')
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -6,6 +6,12 @@ from smart_settings import Namespace
|
|||||||
|
|
||||||
namespace = Namespace(name='document_parsing', label=_('Document parsing'))
|
namespace = Namespace(name='document_parsing', label=_('Document parsing'))
|
||||||
|
|
||||||
|
setting_auto_parsing = namespace.add_setting(
|
||||||
|
global_name='DOCUMENT_PARSING_AUTO_PARSING', default=True,
|
||||||
|
help_text=_(
|
||||||
|
'Set new document types to perform parsing automatically by default.'
|
||||||
|
)
|
||||||
|
)
|
||||||
setting_pdftotext_path = namespace.add_setting(
|
setting_pdftotext_path = namespace.add_setting(
|
||||||
global_name='DOCUMENT_PARSING_PDFTOTEXT_PATH',
|
global_name='DOCUMENT_PARSING_PDFTOTEXT_PATH',
|
||||||
default='/usr/bin/pdftotext',
|
default='/usr/bin/pdftotext',
|
||||||
|
|||||||
23
mayan/apps/document_parsing/tests/test_models.py
Normal file
23
mayan/apps/document_parsing/tests/test_models.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.test import override_settings
|
||||||
|
|
||||||
|
from documents.tests import GenericDocumentTestCase, TEST_DOCUMENT_PATH
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentAutoParsingTestCase(GenericDocumentTestCase):
|
||||||
|
test_document_filename = TEST_DOCUMENT_PATH
|
||||||
|
auto_create_document_type = False
|
||||||
|
|
||||||
|
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=False)
|
||||||
|
def test_disable_auto_parsing(self):
|
||||||
|
self.create_document_type()
|
||||||
|
self.document = self.upload_document()
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
self.document.latest_version.content().next()
|
||||||
|
|
||||||
|
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True)
|
||||||
|
def test_enabled_auto_parsing(self):
|
||||||
|
self.create_document_type()
|
||||||
|
self.document = self.upload_document()
|
||||||
|
self.assertTrue('Mayan' in self.document.content().next())
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.test import override_settings
|
||||||
|
|
||||||
from documents.tests import (
|
from documents.tests import (
|
||||||
GenericDocumentViewTestCase, TEST_DOCUMENT_FILENAME,
|
GenericDocumentViewTestCase, TEST_DOCUMENT_FILENAME,
|
||||||
TEST_DOCUMENT_PATH
|
TEST_DOCUMENT_PATH
|
||||||
@@ -9,6 +11,7 @@ from ..permissions import permission_content_view
|
|||||||
from ..utils import get_document_content
|
from ..utils import get_document_content
|
||||||
|
|
||||||
|
|
||||||
|
@override_settings(DOCUMENT_PARSING_AUTO_PARSING=True)
|
||||||
class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
|
class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
|
||||||
_skip_file_descriptor_test = True
|
_skip_file_descriptor_test = True
|
||||||
|
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ from django.conf.urls import url
|
|||||||
from .api_views import APIDocumentPageContentView
|
from .api_views import APIDocumentPageContentView
|
||||||
from .views import (
|
from .views import (
|
||||||
DocumentContentView, DocumentContentDownloadView,
|
DocumentContentView, DocumentContentDownloadView,
|
||||||
DocumentParsingErrorsListView, DocumentSubmitView, DocumentTypeSubmitView,
|
DocumentParsingErrorsListView, DocumentSubmitView,
|
||||||
ParseErrorListView
|
DocumentTypeSettingsEditView, DocumentTypeSubmitView, ParseErrorListView
|
||||||
)
|
)
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
@@ -22,6 +22,11 @@ urlpatterns = [
|
|||||||
r'^document_types/submit/$', DocumentTypeSubmitView.as_view(),
|
r'^document_types/submit/$', DocumentTypeSubmitView.as_view(),
|
||||||
name='document_type_submit'
|
name='document_type_submit'
|
||||||
),
|
),
|
||||||
|
url(
|
||||||
|
r'^document_types/(?P<pk>\d+)/parsing/settings/$',
|
||||||
|
DocumentTypeSettingsEditView.as_view(),
|
||||||
|
name='document_type_parsing_settings'
|
||||||
|
),
|
||||||
url(
|
url(
|
||||||
r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(),
|
r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(),
|
||||||
name='document_submit'
|
name='document_submit'
|
||||||
|
|||||||
@@ -8,13 +8,16 @@ from django.utils.translation import ugettext_lazy as _, ungettext
|
|||||||
|
|
||||||
from common.generics import (
|
from common.generics import (
|
||||||
FormView, MultipleObjectConfirmActionView, SingleObjectDetailView,
|
FormView, MultipleObjectConfirmActionView, SingleObjectDetailView,
|
||||||
SingleObjectDownloadView, SingleObjectListView
|
SingleObjectDownloadView, SingleObjectEditView, SingleObjectListView
|
||||||
)
|
)
|
||||||
from documents.models import Document
|
from documents.models import Document, DocumentType
|
||||||
|
|
||||||
from .forms import DocumentContentForm, DocumentTypeSelectForm
|
from .forms import DocumentContentForm, DocumentTypeSelectForm
|
||||||
from .models import DocumentVersionParseError
|
from .models import DocumentVersionParseError
|
||||||
from .permissions import permission_content_view, permission_parse_document
|
from .permissions import (
|
||||||
|
permission_content_view, permission_document_type_parsing_setup,
|
||||||
|
permission_parse_document
|
||||||
|
)
|
||||||
from .utils import get_document_content
|
from .utils import get_document_content
|
||||||
|
|
||||||
|
|
||||||
@@ -110,6 +113,23 @@ class DocumentSubmitView(MultipleObjectConfirmActionView):
|
|||||||
instance.submit_for_parsing()
|
instance.submit_for_parsing()
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentTypeSettingsEditView(SingleObjectEditView):
|
||||||
|
fields = ('auto_parsing',)
|
||||||
|
view_permission = permission_document_type_parsing_setup
|
||||||
|
|
||||||
|
def get_object(self, queryset=None):
|
||||||
|
return get_object_or_404(
|
||||||
|
DocumentType, pk=self.kwargs['pk']
|
||||||
|
).parsing_settings
|
||||||
|
|
||||||
|
def get_extra_context(self):
|
||||||
|
return {
|
||||||
|
'title': _(
|
||||||
|
'Edit parsing settings for document type: %s'
|
||||||
|
) % self.get_object().document_type
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class DocumentTypeSubmitView(FormView):
|
class DocumentTypeSubmitView(FormView):
|
||||||
form_class = DocumentTypeSelectForm
|
form_class = DocumentTypeSelectForm
|
||||||
extra_context = {
|
extra_context = {
|
||||||
|
|||||||
Reference in New Issue
Block a user