diff --git a/mayan/apps/document_parsing/admin.py b/mayan/apps/document_parsing/admin.py
index 1bb19bf3ac..258da5ec3d 100644
--- a/mayan/apps/document_parsing/admin.py
+++ b/mayan/apps/document_parsing/admin.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
from django.contrib import admin
from .models import (
- DocumentPageContent, DocumentTypeSettings, DocumentVersionOCRError
+ DocumentPageContent, DocumentVersionParseError
)
@@ -12,12 +12,7 @@ class DocumentPageContentAdmin(admin.ModelAdmin):
list_display = ('document_page',)
-@admin.register(DocumentTypeSettings)
-class DocumentTypeSettingsAdmin(admin.ModelAdmin):
- list_display = ('document_type', 'auto_ocr')
-
-
-@admin.register(DocumentVersionOCRError)
-class DocumentVersionOCRErrorAdmin(admin.ModelAdmin):
+@admin.register(DocumentVersionParseError)
+class DocumentVersionParseErrorAdmin(admin.ModelAdmin):
list_display = ('document_version', 'datetime_submitted')
readonly_fields = ('document_version', 'datetime_submitted', 'result')
diff --git a/mayan/apps/document_parsing/api_views.py b/mayan/apps/document_parsing/api_views.py
index ded56e8ed8..a074fec935 100644
--- a/mayan/apps/document_parsing/api_views.py
+++ b/mayan/apps/document_parsing/api_views.py
@@ -1,75 +1,19 @@
from __future__ import absolute_import, unicode_literals
-from rest_framework import generics, status
+from rest_framework import generics
from rest_framework.response import Response
-from documents.models import Document, DocumentPage, DocumentVersion
+from documents.models import DocumentPage
from rest_api.permissions import MayanPermission
from .models import DocumentPageContent
-from .permissions import permission_ocr_content_view, permission_ocr_document
+from .permissions import permission_content_view
from .serializers import DocumentPageContentSerializer
-class APIDocumentOCRView(generics.GenericAPIView):
- mayan_object_permissions = {
- 'POST': (permission_ocr_document,)
- }
- permission_classes = (MayanPermission,)
- queryset = Document.objects.all()
-
- def get_serializer_class(self):
- return None
-
- def post(self, request, *args, **kwargs):
- """
- Submit a document for OCR.
- ---
- omit_serializer: true
- parameters:
- - name: pk
- paramType: path
- type: number
- responseMessages:
- - code: 202
- message: Accepted
- """
-
- self.get_object().submit_for_ocr()
- return Response(status=status.HTTP_202_ACCEPTED)
-
-
-class APIDocumentVersionOCRView(generics.GenericAPIView):
- mayan_object_permissions = {
- 'POST': (permission_ocr_document,)
- }
- permission_classes = (MayanPermission,)
- queryset = DocumentVersion.objects.all()
-
- def get_serializer_class(self):
- return None
-
- def post(self, request, *args, **kwargs):
- """
- Submit a document version for OCR.
- ---
- omit_serializer: true
- parameters:
- - name: pk
- paramType: path
- type: number
- responseMessages:
- - code: 202
- message: Accepted
- """
-
- self.get_object().submit_for_ocr()
- return Response(status=status.HTTP_202_ACCEPTED)
-
-
class APIDocumentPageContentView(generics.RetrieveAPIView):
"""
- Returns the OCR content of the selected document page.
+ Returns the content of the selected document page.
---
GET:
parameters:
@@ -79,7 +23,7 @@ class APIDocumentPageContentView(generics.RetrieveAPIView):
"""
mayan_object_permissions = {
- 'GET': (permission_ocr_content_view,),
+ 'GET': (permission_content_view,),
}
permission_classes = (MayanPermission,)
serializer_class = DocumentPageContentSerializer
@@ -89,9 +33,9 @@ class APIDocumentPageContentView(generics.RetrieveAPIView):
instance = self.get_object()
try:
- ocr_content = instance.ocr_content
+ content = instance.content
except DocumentPageContent.DoesNotExist:
- ocr_content = DocumentPageContent.objects.none()
+ content = DocumentPageContent.objects.none()
- serializer = self.get_serializer(ocr_content)
+ serializer = self.get_serializer(content)
return Response(serializer.data)
diff --git a/mayan/apps/document_parsing/apps.py b/mayan/apps/document_parsing/apps.py
index 6b9a68d499..d65953daf7 100644
--- a/mayan/apps/document_parsing/apps.py
+++ b/mayan/apps/document_parsing/apps.py
@@ -1,11 +1,12 @@
from __future__ import unicode_literals
+from datetime import timedelta
import logging
from kombu import Exchange, Queue
from django.apps import apps
-from django.db.models.signals import post_save
+from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _
from acls import ModelPermission
@@ -21,16 +22,38 @@ from mayan.celery import app
from navigation import SourceColumn
from rest_api.classes import APIEndPoint
+from .events import event_parsing_document_version_submit
from .handlers import handler_parse_document_version
from .links import (
- link_document_content, link_entry_list, link_document_content_errors_list,
- link_document_content_download
+ link_document_content, link_document_content_download,
+ link_document_parsing_errors_list, link_document_submit_multiple,
+ link_document_submit, link_document_type_submit, link_error_list
)
from .permissions import permission_content_view
logger = logging.getLogger(__name__)
+def document_parsing_submit(self):
+ latest_version = self.latest_version
+ # Don't error out if document has no version
+ if latest_version:
+ latest_version.submit_for_parsing()
+
+
+def document_version_parsing_submit(self):
+ from .tasks import task_parse_document_version
+
+ event_parsing_document_version_submit.commit(
+ action_object=self.document, target=self
+ )
+
+ task_parse_document_version.apply_async(
+ eta=now() + timedelta(seconds=settings_db_sync_task_delay.value),
+ kwargs={'document_version_pk': self.pk},
+ )
+
+
class DocumentParsingApp(MayanAppConfig):
has_tests = True
name = 'document_parsing'
@@ -45,16 +68,17 @@ class DocumentParsingApp(MayanAppConfig):
app_label='documents', model_name='Document'
)
- DocumentType = apps.get_model(
- app_label='documents', model_name='DocumentType'
- )
-
DocumentVersion = apps.get_model(
app_label='documents', model_name='DocumentVersion'
)
DocumentVersionParseError = self.get_model('DocumentVersionParseError')
+ Document.add_to_class('submit_for_parsing', document_parsing_submit)
+ DocumentVersion.add_to_class(
+ 'submit_for_parsing', document_version_parsing_submit
+ )
+
ModelPermission.register(
model=Document, permissions=(permission_content_view,)
)
@@ -72,6 +96,18 @@ class DocumentParsingApp(MayanAppConfig):
attribute='result'
)
+ app.conf.CELERY_QUEUES.append(
+ Queue('parsing', Exchange('parsing'), routing_key='parsing'),
+ )
+
+ app.conf.CELERY_ROUTES.update(
+ {
+ 'document_parsing.tasks.task_parse_document_version': {
+ 'queue': 'parsing'
+ },
+ }
+ )
+
document_search.add_model_field(
field='versions__pages__content__content', label=_('Content')
)
@@ -89,32 +125,20 @@ class DocumentParsingApp(MayanAppConfig):
menu_object.bind_links(
links=(link_document_submit,), sources=(Document,)
)
- menu_object.bind_links(
- links=(link_document_type_ocr_settings,), sources=(DocumentType,)
- )
menu_secondary.bind_links(
links=(
- link_document_content, link_document_ocr_erros_list,
- link_document_ocr_download
+ link_document_content, link_document_parsing_errors_list,
+ link_document_content_download
),
sources=(
'document_parsing:document_content',
- 'document_parsing:document_ocr_error_list',
- 'document_parsing:document_ocr_download',
- )
- )
- menu_secondary.bind_links(
- links=(link_entry_list,),
- sources=(
- 'document_parsing:entry_list',
- 'document_parsing:entry_delete_multiple',
- 'document_parsing:entry_re_queue_multiple',
- DocumentVersionParseError
+ 'document_parsing:document_content_download',
+ 'document_parsing:document_parsing_error_list',
)
)
menu_tools.bind_links(
links=(
- link_entry_list
+ link_document_type_submit, link_error_list,
)
)
diff --git a/mayan/apps/document_parsing/events.py b/mayan/apps/document_parsing/events.py
new file mode 100644
index 0000000000..875527e911
--- /dev/null
+++ b/mayan/apps/document_parsing/events.py
@@ -0,0 +1,14 @@
+from __future__ import absolute_import, unicode_literals
+
+from django.utils.translation import ugettext_lazy as _
+
+from events.classes import Event
+
+event_parsing_document_version_submit = Event(
+ name='parsing_document_version_submit',
+ label=_('Document version submitted for parsing')
+)
+event_parsing_document_version_finish = Event(
+ name='parsing_document_version_finish',
+ label=_('Document version parsing finished')
+)
diff --git a/mayan/apps/document_parsing/exceptions.py b/mayan/apps/document_parsing/exceptions.py
index 9fc7a9b90a..76f872cabd 100644
--- a/mayan/apps/document_parsing/exceptions.py
+++ b/mayan/apps/document_parsing/exceptions.py
@@ -1,13 +1,6 @@
from __future__ import unicode_literals
-class OCRError(Exception):
- """
- Raised by the OCR backend
- """
- pass
-
-
class ParserError(Exception):
"""
Base exception for file parsers
diff --git a/mayan/apps/document_parsing/forms.py b/mayan/apps/document_parsing/forms.py
index 0881a9185a..0a7fe44a5a 100644
--- a/mayan/apps/document_parsing/forms.py
+++ b/mayan/apps/document_parsing/forms.py
@@ -6,10 +6,12 @@ from django.utils.html import conditional_escape
from django.utils.safestring import mark_safe
from django.utils.translation import ugettext_lazy as _, ugettext
+from acls.models import AccessControlList
from common.widgets import TextAreaDiv
from documents.models import DocumentType
-from .models import DocumentPageContent, DocumentPageOCRContent
+from .models import DocumentPageContent
+from .permissions import permission_parse_document
class DocumentContentForm(forms.Form):
@@ -29,7 +31,7 @@ class DocumentContentForm(forms.Form):
for page in document_pages:
try:
- page_content = page.ocr_content.content
+ page_content = page.content.content
except DocumentPageContent.DoesNotExist:
pass
else:
@@ -55,50 +57,16 @@ class DocumentContentForm(forms.Form):
)
-class DocumentOCRContentForm(forms.Form):
- """
- Form that concatenates all of a document pages' text content into a
- single textarea widget
- """
- def __init__(self, *args, **kwargs):
- self.document = kwargs.pop('instance', None)
- super(DocumentContentForm, self).__init__(*args, **kwargs)
- content = []
- self.fields['contents'].initial = ''
- try:
- document_pages = self.document.pages.all()
- except AttributeError:
- document_pages = []
-
- for page in document_pages:
- try:
- page_content = page.ocr_content.content
- except DocumentPageOCRContent.DoesNotExist:
- pass
- else:
- content.append(conditional_escape(force_text(page_content)))
- content.append(
- '\n\n\n
- %s -
\n\n\n' % (
- ugettext(
- 'Page %(page_number)d'
- ) % {'page_number': page.page_number}
- )
- )
-
- self.fields['contents'].initial = mark_safe(''.join(content))
-
- contents = forms.CharField(
- label=_('Contents'),
- widget=TextAreaDiv(
- attrs={
- 'class': 'text_area_div full-height',
- 'data-height-difference': 360
- }
- )
- )
-
-
class DocumentTypeSelectForm(forms.Form):
document_type = forms.ModelChoiceField(
- queryset=DocumentType.objects.all(), label=('Document type')
+ queryset=DocumentType.objects.none(), label=('Document type')
)
+
+ def __init__(self, *args, **kwargs):
+ user = kwargs.pop('user')
+ super(DocumentTypeSelectForm, self).__init__(*args, **kwargs)
+ queryset = AccessControlList.objects.filter_by_access(
+ permission=permission_parse_document,
+ queryset=DocumentType.objects.all(), user=user,
+ )
+ self.fields['document_type'].queryset = queryset
diff --git a/mayan/apps/document_parsing/handlers.py b/mayan/apps/document_parsing/handlers.py
index 618826246c..6e3338f484 100644
--- a/mayan/apps/document_parsing/handlers.py
+++ b/mayan/apps/document_parsing/handlers.py
@@ -2,14 +2,8 @@ from __future__ import unicode_literals
import logging
-from django.apps import apps
-
-from .settings import setting_auto_ocr
-from .parsers import Parser
-
logger = logging.getLogger(__name__)
def handler_parse_document_version(sender, instance, **kwargs):
- if kwargs['created']:
- Parser.parse_document_version(document_version=instance)
+ instance.submit_for_parsing()
diff --git a/mayan/apps/document_parsing/links.py b/mayan/apps/document_parsing/links.py
index cce30bcad5..ac84081841 100644
--- a/mayan/apps/document_parsing/links.py
+++ b/mayan/apps/document_parsing/links.py
@@ -4,24 +4,36 @@ from django.utils.translation import ugettext_lazy as _
from navigation import Link
-from .permissions import permission_content_view
+from .permissions import permission_content_view, permission_parse_document
link_document_content = Link(
args='resolved_object.id', icon='fa fa-font',
permissions=(permission_content_view,), text=_('Content'),
view='document_parsing:document_content',
)
-link_entry_list = Link(
- icon='fa fa-file-text-o', permissions=(permission_ocr_document,),
- text=_('Parsing errors'), view='document_parsing:entry_list'
-)
-link_document_content_errors_list = Link(
+link_document_parsing_errors_list = Link(
args='resolved_object.id', icon='fa fa-file-text-o',
- permissions=(permission_ocr_content_view,), text=_('Parsing errors'),
- view='document_parsing:document_page_parsing_error_list'
+ permissions=(permission_content_view,), text=_('Parsing errors'),
+ view='document_parsing:document_parsing_error_list'
)
link_document_content_download = Link(
args='resolved_object.id', icon='fa fa-file-text-o',
- permissions=(permission_ocr_content_view,), text=_('Download content'),
+ permissions=(permission_content_view,), text=_('Download content'),
view='document_parsing:document_content_download'
)
+link_document_submit_multiple = Link(
+ text=_('Submit for parsing'),
+ view='document_parsing:document_submit_multiple'
+)
+link_document_submit = Link(
+ args='resolved_object.id', permissions=(permission_parse_document,),
+ text=_('Submit for parsing'), view='document_parsing:document_submit'
+)
+link_document_type_submit = Link(
+ icon='fa fa-crosshairs', text=_('Parse documents per type'),
+ view='document_parsing:document_type_submit'
+)
+link_error_list = Link(
+ icon='fa fa-file-text-o', permissions=(permission_content_view,),
+ text=_('Parsing errors'), view='document_parsing:error_list'
+)
diff --git a/mayan/apps/document_parsing/managers.py b/mayan/apps/document_parsing/managers.py
index 2e17131486..34f50be8da 100644
--- a/mayan/apps/document_parsing/managers.py
+++ b/mayan/apps/document_parsing/managers.py
@@ -1,14 +1,50 @@
from __future__ import unicode_literals
-from datetime import timedelta
import logging
+import sys
+import traceback
-from django.apps import apps
+from django.conf import settings
from django.db import models
-from django.utils.timezone import now
+
+from .events import event_parsing_document_version_finish
+from .parsers import Parser
logger = logging.getLogger(__name__)
class DocumentPageContentManager(models.Manager):
- pass
+ def process_document_version(self, document_version):
+ logger.info(
+ 'Starting parsing for document version: %s', document_version
+ )
+ logger.debug('document version: %d', document_version.pk)
+
+ try:
+ Parser.parse_document_version(document_version=document_version)
+ except Exception as exception:
+ logger.exception(
+ 'Parsing error for document version: %d; %s',
+ document_version.pk, exception,
+ )
+
+ if settings.DEBUG:
+ result = []
+ type, value, tb = sys.exc_info()
+ result.append('%s: %s' % (type.__name__, value))
+ result.extend(traceback.format_tb(tb))
+ document_version.parsing_errors.create(
+ result='\n'.join(result)
+ )
+ else:
+ document_version.parsing_errors.create(result=exception)
+ else:
+ logger.info(
+ 'Parsing complete for document version: %s', document_version
+ )
+ document_version.parsing_errors.all().delete()
+
+ event_parsing_document_version_finish.commit(
+ action_object=document_version.document,
+ target=document_version
+ )
diff --git a/mayan/apps/document_parsing/migrations/0001_initial.py b/mayan/apps/document_parsing/migrations/0001_initial.py
new file mode 100644
index 0000000000..d421f6cd7b
--- /dev/null
+++ b/mayan/apps/document_parsing/migrations/0001_initial.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.10.7 on 2017-08-23 18:55
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ ('documents', '0041_auto_20170823_1855'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='DocumentPageContent',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('content', models.TextField(blank=True, verbose_name='Content')),
+ ('document_page', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='content', to='documents.DocumentPage', verbose_name='Document page')),
+ ],
+ options={
+ 'verbose_name': 'Document page content',
+ 'verbose_name_plural': 'Document pages contents',
+ },
+ ),
+ migrations.CreateModel(
+ name='DocumentVersionParseError',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('datetime_submitted', models.DateTimeField(auto_now_add=True, db_index=True, verbose_name='Date time submitted')),
+ ('result', models.TextField(blank=True, null=True, verbose_name='Result')),
+ ('document_version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='parse_errors', to='documents.DocumentVersion', verbose_name='Document version')),
+ ],
+ options={
+ 'ordering': ('datetime_submitted',),
+ 'verbose_name': 'Document version parse error',
+ 'verbose_name_plural': 'Document version parse errors',
+ },
+ ),
+ ]
diff --git a/mayan/apps/document_parsing/migrations/__init__.py b/mayan/apps/document_parsing/migrations/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/mayan/apps/document_parsing/models.py b/mayan/apps/document_parsing/models.py
index 38dc9ff7f1..27d3a21266 100644
--- a/mayan/apps/document_parsing/models.py
+++ b/mayan/apps/document_parsing/models.py
@@ -4,7 +4,7 @@ from django.db import models
from django.utils.encoding import force_text, python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _
-from documents.models import DocumentPage, DocumentType, DocumentVersion
+from documents.models import DocumentPage, DocumentVersion
from .managers import DocumentPageContentManager
@@ -30,11 +30,11 @@ class DocumentPageContent(models.Model):
@python_2_unicode_compatible
class DocumentVersionParseError(models.Model):
document_version = models.ForeignKey(
- DocumentVersion, on_delete=models.CASCADE, related_name='parse_errors',
- verbose_name=_('Document version')
+ DocumentVersion, on_delete=models.CASCADE,
+ related_name='parsing_errors', verbose_name=_('Document version')
)
datetime_submitted = models.DateTimeField(
- auto_add_now=True, db_index=True, verbose_name=_('Date time submitted')
+ auto_now_add=True, db_index=True, verbose_name=_('Date time submitted')
)
result = models.TextField(blank=True, null=True, verbose_name=_('Result'))
diff --git a/mayan/apps/document_parsing/parsers.py b/mayan/apps/document_parsing/parsers.py
index 87570afa1f..977b83f437 100644
--- a/mayan/apps/document_parsing/parsers.py
+++ b/mayan/apps/document_parsing/parsers.py
@@ -1,20 +1,15 @@
from __future__ import unicode_literals
-from io import BytesIO
import logging
import os
-from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
-from pdfminer.pdfpage import PDFPage
-from pdfminer.converter import TextConverter
-from pdfminer.layout import LAParams
import subprocess
+from django.apps import apps
from django.utils.translation import ugettext_lazy as _
from common.utils import copyfile, fs_cleanup, mkstemp
from .exceptions import ParserError, NoMIMETypeMatch
-from .models import DocumentPageContent
from .settings import setting_pdftotext_path
logger = logging.getLogger(__name__)
@@ -82,6 +77,10 @@ class Parser(object):
self.process_document_page(document_page=document_page)
def process_document_page(self, document_page):
+ DocumentPageContent = apps.get_model(
+ app_label='document_parsing', model_name='DocumentPageContent'
+ )
+
logger.info(
'Processing page: %d of document version: %s',
document_page.page_number, document_page.document_version
@@ -171,32 +170,7 @@ class PopplerParser(Parser):
return output
-class PDFMinerParser(Parser):
- """
- Parser for PDF files using the PDFMiner library for Python
- """
-
- def execute(self, file_object, page_number):
- logger.debug('Parsing PDF page: %d', page_number)
-
- with BytesIO() as string_buffer:
- rsrcmgr = PDFResourceManager()
- device = TextConverter(
- rsrcmgr, outfp=string_buffer, laparams=LAParams()
- )
- interpreter = PDFPageInterpreter(rsrcmgr, device)
- page = PDFPage.get_pages(
- file_object, maxpages=1, pagenos=(page_number - 1,)
- )
- interpreter.process_page(page.next())
- device.close()
-
- logger.debug('Finished parsing PDF: %d', page_number)
-
- return string_buffer.getvalue()
-
-
Parser.register(
mimetypes=('application/pdf',),
- parser_classes=(PopplerParser, PDFMinerParser)
+ parser_classes=(PopplerParser,)
)
diff --git a/mayan/apps/document_parsing/permissions.py b/mayan/apps/document_parsing/permissions.py
index fd003b8f35..f58deedfcb 100644
--- a/mayan/apps/document_parsing/permissions.py
+++ b/mayan/apps/document_parsing/permissions.py
@@ -9,3 +9,7 @@ namespace = PermissionNamespace('document_parsing', _('Document parsing'))
permission_content_view = namespace.add_permission(
name='content_view', label=_('View the content of a document')
)
+
+permission_parse_document = namespace.add_permission(
+ name='parse_document', label=_('Parse the content of a document')
+)
diff --git a/mayan/apps/document_parsing/queues.py b/mayan/apps/document_parsing/queues.py
index 92297a2524..42036d2420 100644
--- a/mayan/apps/document_parsing/queues.py
+++ b/mayan/apps/document_parsing/queues.py
@@ -4,7 +4,8 @@ from django.utils.translation import ugettext_lazy as _
from task_manager.classes import CeleryQueue
-queue_ocr = CeleryQueue(name='ocr', label=_('OCR'))
+queue_ocr = CeleryQueue(name='parsing', label=_('Parsing'))
queue_ocr.add_task_type(
- name='ocr.tasks.task_do_ocr', label=_('Document version OCR')
+ name='document_parsing.tasks.task_parse_document_version',
+ label=_('Document version parsing')
)
diff --git a/mayan/apps/document_parsing/tasks.py b/mayan/apps/document_parsing/tasks.py
new file mode 100644
index 0000000000..4debffbc60
--- /dev/null
+++ b/mayan/apps/document_parsing/tasks.py
@@ -0,0 +1,29 @@
+from __future__ import unicode_literals
+
+import logging
+
+from django.apps import apps
+
+from mayan.celery import app
+
+logger = logging.getLogger(__name__)
+
+
+@app.task(ignore_result=True)
+def task_parse_document_version(document_version_pk):
+ DocumentVersion = apps.get_model(
+ app_label='documents', model_name='DocumentVersion'
+ )
+ DocumentPageContent = apps.get_model(
+ app_label='document_parsing', model_name='DocumentPageContent'
+ )
+
+ document_version = DocumentVersion.objects.get(
+ pk=document_version_pk
+ )
+ logger.info(
+ 'Starting parsing for document version: %s', document_version
+ )
+ DocumentPageContent.objects.process_document_version(
+ document_version=document_version
+ )
diff --git a/mayan/apps/document_parsing/tests/test_api.py b/mayan/apps/document_parsing/tests/test_api.py
deleted file mode 100644
index fb73bef98d..0000000000
--- a/mayan/apps/document_parsing/tests/test_api.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from django.contrib.auth import get_user_model
-from django.urls import reverse
-
-from rest_framework import status
-
-from documents.models import DocumentType
-from documents.tests import TEST_DOCUMENT_TYPE_LABEL, TEST_SMALL_DOCUMENT_PATH
-from rest_api.tests import BaseAPITestCase
-from user_management.tests import (
- TEST_ADMIN_EMAIL, TEST_ADMIN_PASSWORD, TEST_ADMIN_USERNAME
-)
-
-
-class OCRAPITestCase(BaseAPITestCase):
- """
- Test the OCR app API endpoints
- """
-
- def setUp(self):
- super(OCRAPITestCase, self).setUp()
-
- self.admin_user = get_user_model().objects.create_superuser(
- username=TEST_ADMIN_USERNAME, email=TEST_ADMIN_EMAIL,
- password=TEST_ADMIN_PASSWORD
- )
-
- self.client.login(
- username=TEST_ADMIN_USERNAME, password=TEST_ADMIN_PASSWORD
- )
-
- self.document_type = DocumentType.objects.create(
- label=TEST_DOCUMENT_TYPE_LABEL
- )
-
- with open(TEST_SMALL_DOCUMENT_PATH) as file_object:
- self.document = self.document_type.new_document(
- file_object=file_object,
- )
-
- def tearDown(self):
- self.document_type.delete()
- super(OCRAPITestCase, self).tearDown()
-
- def test_submit_document(self):
- response = self.client.post(
- reverse(
- 'rest_api:document-ocr-submit-view',
- args=(self.document.pk,)
- )
- )
-
- self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED)
-
- content = self.document.pages.first().ocr_content.content
-
- self.assertTrue('Mayan EDMS Documentation' in content)
-
- def test_submit_document_version(self):
- response = self.client.post(
- reverse(
- 'rest_api:document-version-ocr-submit-view',
- args=(self.document.latest_version.pk,)
- )
- )
-
- self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED)
-
- content = self.document.pages.first().ocr_content.content
-
- self.assertTrue('Mayan EDMS Documentation' in content)
-
- def test_get_document_version_page_content(self):
- response = self.client.get(
- reverse(
- 'rest_api:document-page-content-view',
- args=(self.document.latest_version.pages.first().pk,)
- ),
- )
-
- self.assertEqual(response.status_code, status.HTTP_200_OK)
-
- self.assertTrue(
- 'Mayan EDMS Documentation' in json.loads(response.content)['content']
- )
diff --git a/mayan/apps/document_parsing/tests/test_events.py b/mayan/apps/document_parsing/tests/test_events.py
index dc366623d2..073cc90663 100644
--- a/mayan/apps/document_parsing/tests/test_events.py
+++ b/mayan/apps/document_parsing/tests/test_events.py
@@ -2,40 +2,38 @@ from __future__ import unicode_literals
from actstream.models import Action
+from documents.tests.literals import TEST_DOCUMENT_FILENAME
from documents.tests.test_models import GenericDocumentTestCase
from ..events import (
- event_ocr_document_version_submit, event_ocr_document_version_finish
+ event_parsing_document_version_submit,
+ event_parsing_document_version_finish
)
-class OCREventsTestCase(GenericDocumentTestCase):
+class DocumentParsingEventsTestCase(GenericDocumentTestCase):
+ # Ensure we use a PDF file
+ test_document_filename = TEST_DOCUMENT_FILENAME
+
def test_document_version_submit_event(self):
Action.objects.all().delete()
- self.document.submit_for_ocr()
-
- self.assertEqual(
- Action.objects.first().target, self.document.latest_version
- )
- self.assertEqual(
- Action.objects.first().verb,
- event_ocr_document_version_submit.name
- )
-
- def test_document_version_finish_event(self):
- Action.objects.all().delete()
- self.document.submit_for_ocr()
- from ..models import DocumentVersionOCRError, DocumentPageContent
- #print DocumentVersionOCRError.objects.all()
- print DocumentPageContent.objects.all()
-
- for a in Action.objects.all():
- print a
+ self.document.submit_for_parsing()
self.assertEqual(
Action.objects.last().target, self.document.latest_version
)
self.assertEqual(
Action.objects.last().verb,
- event_ocr_document_version_finish.name
+ event_parsing_document_version_submit.name
+ )
+
+ def test_document_version_finish_event(self):
+ Action.objects.all().delete()
+ self.document.submit_for_parsing()
+ self.assertEqual(
+ Action.objects.first().target, self.document.latest_version
+ )
+ self.assertEqual(
+ Action.objects.first().verb,
+ event_parsing_document_version_finish.name
)
diff --git a/mayan/apps/document_parsing/tests/test_models.py b/mayan/apps/document_parsing/tests/test_models.py
deleted file mode 100644
index 36dbb57f67..0000000000
--- a/mayan/apps/document_parsing/tests/test_models.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import unicode_literals
-
-from common.tests import BaseTestCase
-from documents.models import DocumentType
-from documents.settings import setting_language_choices
-from documents.tests import (
- TEST_DEU_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL, TEST_SMALL_DOCUMENT_PATH
-)
-
-
-class DocumentOCRTestCase(BaseTestCase):
- # PyOCR's leak descriptor in get_available_languages and image_to_string
- # Disable descriptor leak test until fixed in upstream
- _skip_file_descriptor_test = True
-
- def setUp(self):
- super(DocumentOCRTestCase, self).setUp()
-
- self.document_type = DocumentType.objects.create(
- label=TEST_DOCUMENT_TYPE_LABEL
- )
-
- with open(TEST_SMALL_DOCUMENT_PATH) as file_object:
- self.document = self.document_type.new_document(
- file_object=file_object,
- )
-
- def tearDown(self):
- self.document.delete()
- self.document_type.delete()
- super(DocumentOCRTestCase, self).tearDown()
-
- def test_ocr_language_backends_end(self):
- content = self.document.pages.first().ocr_content.content
- self.assertTrue('Mayan EDMS Documentation' in content)
-
-
-class GermanOCRSupportTestCase(BaseTestCase):
- # PyOCR's leak descriptor in get_available_languages and image_to_string
- # Disable descriptor leak test until fixed in upstream
- _skip_file_descriptor_test = True
-
- def setUp(self):
- super(GermanOCRSupportTestCase, self).setUp()
-
- self.document_type = DocumentType.objects.create(
- label=TEST_DOCUMENT_TYPE_LABEL
- )
-
- # Get corresponding language code for German from the default language
- # choices list
- language_code = [
- language for language in setting_language_choices.value if language[1] == 'German'
- ][0][0]
-
- self.assertEqual('deu', language_code)
-
- with open(TEST_DEU_DOCUMENT_PATH) as file_object:
- self.document = self.document_type.new_document(
- file_object=file_object, language=language_code
- )
-
- def tearDown(self):
- self.document_type.delete()
- super(GermanOCRSupportTestCase, self).tearDown()
-
- def test_ocr_language_backends_end(self):
- content = self.document.pages.first().ocr_content.content
-
- self.assertTrue(
- 'Repository für elektronische Dokumente.' in content
- )
- self.assertTrue(
- 'Es bietet einen' in content
- )
diff --git a/mayan/apps/document_parsing/tests/test_parsers.py b/mayan/apps/document_parsing/tests/test_parsers.py
index 9d500a572a..a84f180bae 100644
--- a/mayan/apps/document_parsing/tests/test_parsers.py
+++ b/mayan/apps/document_parsing/tests/test_parsers.py
@@ -5,12 +5,9 @@ from django.test import override_settings
from common.tests import BaseTestCase
from documents.models import DocumentType
-from documents.tests import (
- TEST_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL, TEST_HYBRID_DOCUMENT_PATH
-)
+from documents.tests import TEST_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL
-from ..classes import TextExtractor
-from ..parsers import PDFMinerParser, PopplerParser
+from ..parsers import PopplerParser
@override_settings(OCR_AUTO_OCR=False)
@@ -30,54 +27,11 @@ class ParserTestCase(BaseTestCase):
self.document_type.delete()
super(ParserTestCase, self).tearDown()
- def test_pdfminer_parser(self):
- parser = PDFMinerParser()
-
- parser.process_document_version(self.document.latest_version)
-
- self.assertTrue(
- 'Mayan EDMS Documentation' in self.document.pages.first().ocr_content.content
- )
-
def test_poppler_parser(self):
parser = PopplerParser()
parser.process_document_version(self.document.latest_version)
self.assertTrue(
- 'Mayan EDMS Documentation' in self.document.pages.first().ocr_content.content
- )
-
-
-@override_settings(OCR_AUTO_OCR=False)
-class TextExtractorTestCase(BaseTestCase):
- def setUp(self):
- super(TextExtractorTestCase, self).setUp()
-
- self.document_type = DocumentType.objects.create(
- label=TEST_DOCUMENT_TYPE_LABEL
- )
-
- with open(TEST_HYBRID_DOCUMENT_PATH) as file_object:
- self.document = self.document_type.new_document(
- file_object=File(file_object)
- )
-
- def tearDown(self):
- self.document_type.delete()
- super(TextExtractorTestCase, self).tearDown()
-
- def test_text_extractor(self):
- TextExtractor.process_document_version(
- document_version=self.document.latest_version
- )
-
- self.assertEqual(
- self.document.latest_version.pages.first().ocr_content.content,
- 'Sample text',
- )
-
- self.assertEqual(
- self.document.latest_version.pages.last().ocr_content.content,
- 'Sample text in image form',
+ 'Mayan EDMS Documentation' in self.document.pages.first().content.content
)
diff --git a/mayan/apps/document_parsing/tests/test_views.py b/mayan/apps/document_parsing/tests/test_views.py
index 41b0462103..995250e09e 100644
--- a/mayan/apps/document_parsing/tests/test_views.py
+++ b/mayan/apps/document_parsing/tests/test_views.py
@@ -1,26 +1,25 @@
from __future__ import unicode_literals
-from django.test import override_settings
-
+from documents.tests.literals import TEST_DOCUMENT_FILENAME
from documents.tests.test_views import GenericDocumentViewTestCase
-from ..permissions import permission_ocr_content_view
-from ..utils import get_document_ocr_content
+from ..permissions import permission_content_view
+from ..utils import get_document_content
-@override_settings(OCR_AUTO_OCR=True)
-class OCRViewsTestCase(GenericDocumentViewTestCase):
- # PyOCR's leak descriptor in get_available_languages and image_to_string
- # Disable descriptor leak test until fixed in upstream
+class DocumentContentViewsTestCase(GenericDocumentViewTestCase):
_skip_file_descriptor_test = True
+ # Ensure we use a PDF file
+ test_document_filename = TEST_DOCUMENT_FILENAME
+
def setUp(self):
- super(OCRViewsTestCase, self).setUp()
+ super(DocumentContentViewsTestCase, self).setUp()
self.login_user()
def _document_content_view(self):
return self.get(
- 'ocr:document_content', args=(self.document.pk,)
+ 'document_parsing:document_content', args=(self.document.pk,)
)
def test_document_content_view_no_permissions(self):
@@ -29,7 +28,7 @@ class OCRViewsTestCase(GenericDocumentViewTestCase):
self.assertEqual(response.status_code, 403)
def test_document_content_view_with_permission(self):
- self.grant_permission(permission=permission_ocr_content_view)
+ self.grant_permission(permission=permission_content_view)
response = self._document_content_view()
@@ -37,25 +36,25 @@ class OCRViewsTestCase(GenericDocumentViewTestCase):
response, 'Mayan EDMS Documentation', status_code=200
)
- def test_document_ocr_download_view_no_permission(self):
+ def test_document_parsing_download_view_no_permission(self):
response = self.get(
- 'ocr:document_ocr_download', args=(self.document.pk,)
+ 'document_parsing:document_content_download', args=(self.document.pk,)
)
self.assertEqual(response.status_code, 403)
- def test_document_download_view_with_permission(self):
+ def test_download_view_with_permission(self):
self.expected_content_type = 'application/octet-stream; charset=utf-8'
- self.grant_permission(permission=permission_ocr_content_view)
+ self.grant_permission(permission=permission_content_view)
response = self.get(
- 'ocr:document_ocr_download', args=(self.document.pk,)
+ 'document_parsing:document_content_download', args=(self.document.pk,)
)
self.assertEqual(response.status_code, 200)
self.assert_download_response(
response, content=(
- ''.join(get_document_ocr_content(document=self.document))
+ ''.join(get_document_content(document=self.document))
),
)
diff --git a/mayan/apps/document_parsing/urls.py b/mayan/apps/document_parsing/urls.py
index 30f1b59359..334babf9bd 100644
--- a/mayan/apps/document_parsing/urls.py
+++ b/mayan/apps/document_parsing/urls.py
@@ -2,62 +2,43 @@ from __future__ import unicode_literals
from django.conf.urls import url
-from .api_views import (
- APIDocumentOCRView, APIDocumentPageContentView, APIDocumentVersionOCRView
-)
+from .api_views import APIDocumentPageContentView
from .views import (
- DocumentAllSubmitView, DocumentOCRContent, DocumentOCRDownloadView,
- DocumentOCRErrorsListView, DocumentSubmitView, DocumentSubmitManyView,
- DocumentTypeSettingsEditView, DocumentTypeSubmitView, EntryListView
+ DocumentContentView, DocumentContentDownloadView,
+ DocumentParsingErrorsListView, DocumentSubmitView, DocumentTypeSubmitView,
+ ParseErrorListView
)
urlpatterns = [
url(
- r'^(?P\d+)/content/$', DocumentOCRContent.as_view(),
+ r'^documents/(?P\d+)/content/$', DocumentContentView.as_view(),
name='document_content'
),
url(
- r'^document/(?P\d+)/submit/$', DocumentSubmitView.as_view(),
- name='document_submit'
+ r'^documents/(?P\d+)/content/download/$',
+ DocumentContentDownloadView.as_view(), name='document_content_download'
),
url(
- r'^document/all/submit/$', DocumentAllSubmitView.as_view(),
- name='document_submit_all'
- ),
- url(
- r'^document/type/submit/$', DocumentTypeSubmitView.as_view(),
+ r'^document_types/submit/$', DocumentTypeSubmitView.as_view(),
name='document_type_submit'
),
url(
- r'^document/multiple/submit/$', DocumentSubmitManyView.as_view(),
+ r'^documents/(?P\d+)/submit/$', DocumentSubmitView.as_view(),
+ name='document_submit'
+ ),
+ url(
+ r'^documents/multiple/submit/$', DocumentSubmitView.as_view(),
name='document_submit_multiple'
),
url(
- r'^document_type/(?P\d+)/ocr/settings/$',
- DocumentTypeSettingsEditView.as_view(),
- name='document_type_ocr_settings'
+ r'^documents/(?P\d+)/errors/$',
+ DocumentParsingErrorsListView.as_view(),
+ name='document_parsing_error_list'
),
- url(
- r'^documents/(?P\d+)/ocr/errors/$',
- DocumentOCRErrorsListView.as_view(), name='document_ocr_error_list'
- ),
- url(
- r'^documents/(?P\d+)/ocr/download/$',
- DocumentOCRDownloadView.as_view(), name='document_ocr_download'
- ),
- url(r'^all/$', EntryListView.as_view(), name='entry_list'),
+ url(r'^errors/all/$', ParseErrorListView.as_view(), name='error_list'),
]
api_urls = [
- url(
- r'^document/(?P\d+)/submit/$', APIDocumentOCRView.as_view(),
- name='document-ocr-submit-view'
- ),
- url(
- r'^document_version/(?P\d+)/submit/$',
- APIDocumentVersionOCRView.as_view(),
- name='document-version-ocr-submit-view'
- ),
url(
r'^page/(?P\d+)/content/$', APIDocumentPageContentView.as_view(),
name='document-page-content-view'
diff --git a/mayan/apps/document_parsing/utils.py b/mayan/apps/document_parsing/utils.py
index 8175c3040e..bbb498af20 100644
--- a/mayan/apps/document_parsing/utils.py
+++ b/mayan/apps/document_parsing/utils.py
@@ -6,10 +6,10 @@ from django.utils.html import conditional_escape
from .models import DocumentPageContent
-def get_document_ocr_content(document):
+def get_document_content(document):
for page in document.pages.all():
try:
- page_content = page.ocr_content.content
+ page_content = page.content.content
except DocumentPageContent.DoesNotExist:
pass
else:
diff --git a/mayan/apps/document_parsing/views.py b/mayan/apps/document_parsing/views.py
index fa0871735b..032371d853 100644
--- a/mayan/apps/document_parsing/views.py
+++ b/mayan/apps/document_parsing/views.py
@@ -4,137 +4,27 @@ from django.contrib import messages
from django.http import HttpResponseRedirect
from django.shortcuts import get_object_or_404
from django.urls import reverse
-from django.utils.translation import ugettext_lazy as _
+from django.utils.translation import ugettext_lazy as _, ungettext
-from acls.models import AccessControlList
from common.generics import (
- ConfirmView, FormView, SingleObjectDetailView, SingleObjectDownloadView,
- SingleObjectEditView, SingleObjectListView
+ FormView, MultipleObjectConfirmActionView, SingleObjectDetailView,
+ SingleObjectDownloadView, SingleObjectListView
)
-from common.mixins import MultipleInstanceActionMixin
-from documents.models import Document, DocumentType
+from documents.models import Document
from .forms import DocumentContentForm, DocumentTypeSelectForm
-from .models import DocumentVersionOCRError
-from .permissions import (
- permission_ocr_content_view, permission_ocr_document,
- permission_document_type_ocr_setup
-)
-from .utils import get_document_ocr_content
+from .models import DocumentVersionParseError
+from .permissions import permission_content_view, permission_parse_document
+from .utils import get_document_content
-class DocumentAllSubmitView(ConfirmView):
- extra_context = {'title': _('Submit all documents for OCR?')}
-
- def get_post_action_redirect(self):
- return reverse('common:tools_list')
-
- def view_action(self):
- count = 0
- for document in Document.objects.all():
- document.submit_for_ocr()
- count += 1
-
- messages.success(
- self.request, _('%d documents added to the OCR queue.') % count
- )
-
-
-class DocumentSubmitView(ConfirmView):
- def get_extra_context(self):
- return {
- 'object': self.get_object(),
- 'title': _('Submit "%s" to the OCR queue?') % self.get_object()
- }
-
- def get_object(self):
- return Document.objects.get(pk=self.kwargs['pk'])
-
- def object_action(self, instance):
- AccessControlList.objects.check_access(
- permissions=permission_ocr_document, user=self.request.user,
- obj=instance
- )
-
- instance.submit_for_ocr()
-
- def view_action(self):
- instance = self.get_object()
-
- self.object_action(instance=instance)
-
- messages.success(
- self.request,
- _('Document: %(document)s was added to the OCR queue.') % {
- 'document': instance
- }
- )
-
-
-class DocumentSubmitManyView(MultipleInstanceActionMixin, DocumentSubmitView):
- model = Document
- success_message = '%(count)d document submitted to the OCR queue.'
- success_message_plural = '%(count)d documents submitted to the OCR queue.'
-
- def get_extra_context(self):
- # Override the base class method
- return {
- 'title': _('Submit the selected documents to the OCR queue?')
- }
-
-
-class DocumentTypeSubmitView(FormView):
- form_class = DocumentTypeSelectForm
- extra_context = {
- 'title': _('Submit all documents of a type for OCR')
- }
-
- def get_post_action_redirect(self):
- return reverse('common:tools_list')
-
- def form_valid(self, form):
- count = 0
- for document in form.cleaned_data['document_type'].documents.all():
- document.submit_for_ocr()
- count += 1
-
- messages.success(
- self.request, _(
- '%(count)d documents of type "%(document_type)s" added to the '
- 'OCR queue.'
- ) % {
- 'count': count,
- 'document_type': form.cleaned_data['document_type']
- }
- )
-
- return HttpResponseRedirect(self.get_success_url())
-
-
-class DocumentTypeSettingsEditView(SingleObjectEditView):
- fields = ('auto_ocr',)
- view_permission = permission_document_type_ocr_setup
-
- def get_object(self, queryset=None):
- return get_object_or_404(
- DocumentType, pk=self.kwargs['pk']
- ).ocr_settings
-
- def get_extra_context(self):
- return {
- 'title': _(
- 'Edit OCR settings for document type: %s'
- ) % self.get_object().document_type
- }
-
-
-class DocumentOCRContent(SingleObjectDetailView):
+class DocumentContentView(SingleObjectDetailView):
form_class = DocumentContentForm
model = Document
- object_permission = permission_ocr_content_view
+ object_permission = permission_content_view
def dispatch(self, request, *args, **kwargs):
- result = super(DocumentOCRContent, self).dispatch(
+ result = super(DocumentContentView, self).dispatch(
request, *args, **kwargs
)
self.get_object().add_as_recent_document_for_user(request.user)
@@ -145,23 +35,25 @@ class DocumentOCRContent(SingleObjectDetailView):
'document': self.get_object(),
'hide_labels': True,
'object': self.get_object(),
- 'title': _('OCR result for document: %s') % self.get_object(),
+ 'title': _('Content for document: %s') % self.get_object(),
}
-class EntryListView(SingleObjectListView):
- extra_context = {
- 'hide_object': True,
- 'title': _('OCR errors'),
- }
- view_permission = permission_ocr_document
+class DocumentContentDownloadView(SingleObjectDownloadView):
+ model = Document
+ object_permission = permission_content_view
- def get_object_list(self):
- return DocumentVersionOCRError.objects.all()
+ def get_file(self):
+ file_object = DocumentContentDownloadView.TextIteratorIO(
+ iterator=get_document_content(document=self.get_object())
+ )
+ return DocumentContentDownloadView.VirtualFile(
+ file=file_object, name='{}-content'.format(self.get_object())
+ )
-class DocumentOCRErrorsListView(SingleObjectListView):
- view_permission = permission_ocr_document
+class DocumentParsingErrorsListView(SingleObjectListView):
+ view_permission = permission_content_view
def get_document(self):
return get_object_or_404(Document, pk=self.kwargs['pk'])
@@ -170,21 +62,93 @@ class DocumentOCRErrorsListView(SingleObjectListView):
return {
'hide_object': True,
'object': self.get_document(),
- 'title': _('OCR errors for document: %s') % self.get_document(),
+ 'title': _(
+ 'Parsing errors for document: %s'
+ ) % self.get_document(),
}
def get_object_list(self):
- return self.get_document().latest_version.ocr_errors.all()
+ return self.get_document().latest_version.parsing_errors.all()
-class DocumentOCRDownloadView(SingleObjectDownloadView):
+class DocumentSubmitView(MultipleObjectConfirmActionView):
model = Document
- object_permission = permission_ocr_content_view
+ object_permission = permission_parse_document
+ success_message = _(
+ '%(count)d document added to the parsing queue'
+ )
+ success_message_plural = _(
+ '%(count)d documents added to the parsing queue'
+ )
- def get_file(self):
- file_object = DocumentOCRDownloadView.TextIteratorIO(
- iterator=get_document_ocr_content(document=self.get_object())
- )
- return DocumentOCRDownloadView.VirtualFile(
- file=file_object, name='{}-OCR'.format(self.get_object())
+ def get_extra_context(self):
+ queryset = self.get_queryset()
+
+ result = {
+ 'title': ungettext(
+ singular='Submit %(count)d document to the parsing queue?',
+ plural='Submit %(count)d documents to the parsing queue',
+ number=queryset.count()
+ ) % {
+ 'count': queryset.count(),
+ }
+ }
+
+ if queryset.count() == 1:
+ result.update(
+ {
+ 'object': queryset.first(),
+ 'title': _(
+ 'Submit document "%s" to the parsing queue'
+ ) % queryset.first()
+ }
+ )
+
+ return result
+
+ def object_action(self, instance, form=None):
+ instance.submit_for_parsing()
+
+
+class DocumentTypeSubmitView(FormView):
+ form_class = DocumentTypeSelectForm
+ extra_context = {
+ 'title': _('Submit all documents of a type for parsing')
+ }
+
+ def get_form_extra_kwargs(self):
+ return {
+ 'user': self.request.user
+ }
+
+ def get_post_action_redirect(self):
+ return reverse('common:tools_list')
+
+ def form_valid(self, form):
+ count = 0
+ for document in form.cleaned_data['document_type'].documents.all():
+ document.submit_for_parsing()
+ count += 1
+
+ messages.success(
+ self.request, _(
+ '%(count)d documents of type "%(document_type)s" added to the '
+ 'parsing queue.'
+ ) % {
+ 'count': count,
+ 'document_type': form.cleaned_data['document_type']
+ }
)
+
+ return HttpResponseRedirect(self.get_success_url())
+
+
+class ParseErrorListView(SingleObjectListView):
+ extra_context = {
+ 'hide_object': True,
+ 'title': _('Parsing errors'),
+ }
+ view_permission = permission_content_view
+
+ def get_object_list(self):
+ return DocumentVersionParseError.objects.all()
diff --git a/mayan/apps/documents/migrations/0041_auto_20170823_1855.py b/mayan/apps/documents/migrations/0041_auto_20170823_1855.py
new file mode 100644
index 0000000000..fcc01e0c6f
--- /dev/null
+++ b/mayan/apps/documents/migrations/0041_auto_20170823_1855.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.10.7 on 2017-08-23 18:55
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import uuid
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('documents', '0040_auto_20170725_1111'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='document',
+ name='uuid',
+ field=models.UUIDField(default=uuid.uuid4, editable=False),
+ ),
+ ]