diff --git a/mayan/apps/document_parsing/apps.py b/mayan/apps/document_parsing/apps.py
index 7def216213..504e5efc3d 100644
--- a/mayan/apps/document_parsing/apps.py
+++ b/mayan/apps/document_parsing/apps.py
@@ -45,7 +45,7 @@ from .permissions import (
permission_parse_document
)
from .signals import post_document_version_parsing
-from .utils import get_document_content
+from .utils import get_document_content, get_document_version_content
logger = logging.getLogger(__name__)
@@ -65,7 +65,7 @@ class DocumentParsingApp(MayanAppConfig):
app_label='documents', model_name='Document'
)
DocumentPage = apps.get_model(
- app_label='documents', model_name='DocumentVersionPage'
+ app_label='documents', model_name='DocumentPage'
)
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'
@@ -76,6 +76,9 @@ class DocumentParsingApp(MayanAppConfig):
DocumentVersion = apps.get_model(
app_label='documents', model_name='DocumentVersion'
)
+ DocumentVersionPage = apps.get_model(
+ app_label='documents', model_name='DocumentVersionPage'
+ )
DocumentVersionParseError = self.get_model(
model_name='DocumentVersionParseError'
)
@@ -87,7 +90,7 @@ class DocumentParsingApp(MayanAppConfig):
name='content', value=get_document_content
)
DocumentVersion.add_to_class(
- name='content', value=get_document_content
+ name='content', value=get_document_version_content
)
DocumentVersion.add_to_class(
name='submit_for_parsing',
diff --git a/mayan/apps/document_parsing/forms.py b/mayan/apps/document_parsing/forms.py
index 12778c6acb..2803c9cd59 100644
--- a/mayan/apps/document_parsing/forms.py
+++ b/mayan/apps/document_parsing/forms.py
@@ -26,9 +26,9 @@ class DocumentContentForm(forms.Form):
except AttributeError:
document_pages = []
- for page in document_pages:
+ for document_page in document_pages:
try:
- page_content = page.content.content
+ page_content = document_page.content_object.content.content
except DocumentVersionPageContent.DoesNotExist:
pass
else:
@@ -37,7 +37,7 @@ class DocumentContentForm(forms.Form):
'\n\n\n
- %s -
\n\n\n' % (
ugettext(
'Page %(page_number)d'
- ) % {'page_number': page.page_number}
+ ) % {'page_number': document_page.page_number}
)
)
@@ -72,7 +72,7 @@ class DocumentPageContentForm(forms.Form):
self.fields['contents'].initial = ''
try:
- page_content = document_page.content.content
+ page_content = document_page.content_object.content.content
except DocumentVersionPageContent.DoesNotExist:
pass
else:
diff --git a/mayan/apps/document_parsing/managers.py b/mayan/apps/document_parsing/managers.py
index 1669f89e70..7748596f2e 100644
--- a/mayan/apps/document_parsing/managers.py
+++ b/mayan/apps/document_parsing/managers.py
@@ -22,7 +22,9 @@ class DocumentPageContentManager(models.Manager):
def delete_content_for(self, document, user=None):
with transaction.atomic():
for document_page in document.pages.all():
- self.filter(document_page=document_page).delete()
+ self.filter(
+ document_version_page=document_page.content_object
+ ).delete()
event_parsing_document_content_deleted.commit(
actor=user, target=document
diff --git a/mayan/apps/document_parsing/models.py b/mayan/apps/document_parsing/models.py
index 9c7483f448..05811b63c1 100644
--- a/mayan/apps/document_parsing/models.py
+++ b/mayan/apps/document_parsing/models.py
@@ -11,32 +11,6 @@ from mayan.apps.documents.models import (
from .managers import DocumentPageContentManager, DocumentTypeSettingsManager
-@python_2_unicode_compatible
-class DocumentVersionPageContent(models.Model):
- """
- This model store's the parsed content of a document page.
- """
- document_version_page = models.OneToOneField(
- on_delete=models.CASCADE, related_name='content',
- to=DocumentVersionPage, verbose_name=_('Document version page')
- )
- content = models.TextField(
- blank=True, help_text=_(
- 'The actual text content as extracted by the document '
- 'parsing backend.'
- ), verbose_name=_('Content')
- )
-
- objects = DocumentPageContentManager()
-
- class Meta:
- verbose_name = _('Document version page content')
- verbose_name_plural = _('Document version pages contents')
-
- def __str__(self):
- return force_text(self.document_page)
-
-
class DocumentTypeSettings(models.Model):
"""
This model stores the parsing settings for a document type.
@@ -62,6 +36,32 @@ class DocumentTypeSettings(models.Model):
verbose_name_plural = _('Document types settings')
+@python_2_unicode_compatible
+class DocumentVersionPageContent(models.Model):
+ """
+ This model store's the parsed content of a document page.
+ """
+ document_version_page = models.OneToOneField(
+ on_delete=models.CASCADE, related_name='content',
+ to=DocumentVersionPage, verbose_name=_('Document version page')
+ )
+ content = models.TextField(
+ blank=True, help_text=_(
+ 'The actual text content as extracted by the document '
+ 'parsing backend.'
+ ), verbose_name=_('Content')
+ )
+
+ objects = DocumentPageContentManager()
+
+ class Meta:
+ verbose_name = _('Document version page content')
+ verbose_name_plural = _('Document version pages contents')
+
+ def __str__(self):
+ return force_text(self.document_page)
+
+
@python_2_unicode_compatible
class DocumentVersionParseError(models.Model):
"""
diff --git a/mayan/apps/document_parsing/parsers.py b/mayan/apps/document_parsing/parsers.py
index 8ad24b4115..469b974377 100644
--- a/mayan/apps/document_parsing/parsers.py
+++ b/mayan/apps/document_parsing/parsers.py
@@ -23,11 +23,13 @@ class Parser(object):
_registry = {}
@classmethod
- def parse_document_page(cls, document_page):
- for parser_class in cls._registry.get(document_page.document_version.mimetype, ()):
+ def parse_document_version_page(cls, document_version_page):
+ for parser_class in cls._registry.get(document_version_page.document_version.mimetype, ()):
try:
parser = parser_class()
- parser.process_document_page(document_page)
+ parser.process_document_page(
+ document_version_page=document_version_page
+ )
except ParserError:
# If parser raises error, try next parser in the list
pass
@@ -41,7 +43,9 @@ class Parser(object):
for parser_class in cls._registry.get(document_version.mimetype, ()):
try:
parser = parser_class()
- parser.process_document_version(document_version)
+ parser.process_document_version(
+ document_version=document_version
+ )
except ParserError:
# If parser raises error, try next parser in the list
pass
@@ -64,10 +68,12 @@ class Parser(object):
)
logger.debug('document version: %d', document_version.pk)
- for document_page in document_version.pages.all():
- self.process_document_page(document_page=document_page)
+ for document_version_page in document_version.pages.all():
+ self.process_document_version_page(
+ document_version_page=document_version_page
+ )
- def process_document_page(self, document_page):
+ def process_document_version_page(self, document_version_page):
DocumentVersionPageContent = apps.get_model(
app_label='document_parsing',
model_name='DocumentVersionPageContent'
@@ -75,19 +81,20 @@ class Parser(object):
logger.info(
'Processing page: %d of document version: %s',
- document_page.page_number, document_page.document_version
+ document_version_page.page_number,
+ document_version_page.document_version
)
- file_object = document_page.document_version.get_intermediate_file()
+ file_object = document_version_page.document_version.get_intermediate_file()
try:
- document_page_content, created = DocumentVersionPageContent.objects.get_or_create(
- document_page=document_page
+ document_version_page_content, created = DocumentVersionPageContent.objects.get_or_create(
+ document_version_page=document_version_page
)
- document_page_content.content = self.execute(
- file_object=file_object, page_number=document_page.page_number
+ document_version_page_content.content = self.execute(
+ file_object=file_object, page_number=document_version_page.page_number
)
- document_page_content.save()
+ document_version_page_content.save()
except Exception as exception:
error_message = _('Exception parsing page; %s') % exception
logger.error(error_message)
@@ -97,7 +104,8 @@ class Parser(object):
logger.info(
'Finished processing page: %d of document version: %s',
- document_page.page_number, document_page.document_version
+ document_version_page.page_number,
+ document_version_page.document_version
)
def execute(self, file_object, page_number):
diff --git a/mayan/apps/document_parsing/tasks.py b/mayan/apps/document_parsing/tasks.py
index 4debffbc60..653552f741 100644
--- a/mayan/apps/document_parsing/tasks.py
+++ b/mayan/apps/document_parsing/tasks.py
@@ -14,8 +14,8 @@ def task_parse_document_version(document_version_pk):
DocumentVersion = apps.get_model(
app_label='documents', model_name='DocumentVersion'
)
- DocumentPageContent = apps.get_model(
- app_label='document_parsing', model_name='DocumentPageContent'
+ DocumentVersionPageContent = apps.get_model(
+ app_label='document_parsing', model_name='DocumentVersionPageContent'
)
document_version = DocumentVersion.objects.get(
@@ -24,6 +24,6 @@ def task_parse_document_version(document_version_pk):
logger.info(
'Starting parsing for document version: %s', document_version
)
- DocumentPageContent.objects.process_document_version(
+ DocumentVersionPageContent.objects.process_document_version(
document_version=document_version
)
diff --git a/mayan/apps/document_parsing/tests/test_events.py b/mayan/apps/document_parsing/tests/test_events.py
index 1a2860305d..17bbe198fb 100644
--- a/mayan/apps/document_parsing/tests/test_events.py
+++ b/mayan/apps/document_parsing/tests/test_events.py
@@ -10,7 +10,7 @@ from ..events import (
event_parsing_document_version_submit,
event_parsing_document_version_finish
)
-from ..models import DocumentPageContent
+from ..models import DocumentVersionPageContent
class DocumentParsingEventsTestCase(GenericDocumentTestCase):
@@ -19,7 +19,7 @@ class DocumentParsingEventsTestCase(GenericDocumentTestCase):
def test_document_content_deleted_event(self):
Action.objects.all().delete()
- DocumentPageContent.objects.delete_content_for(
+ DocumentVersionPageContent.objects.delete_content_for(
document=self.test_document
)
diff --git a/mayan/apps/document_parsing/tests/test_parsers.py b/mayan/apps/document_parsing/tests/test_parsers.py
index 237bccc567..2eeb8703a6 100644
--- a/mayan/apps/document_parsing/tests/test_parsers.py
+++ b/mayan/apps/document_parsing/tests/test_parsers.py
@@ -18,5 +18,5 @@ class ParserTestCase(DocumentTestMixin, BaseTestCase):
parser.process_document_version(self.test_document.latest_version)
self.assertTrue(
- TEST_DOCUMENT_CONTENT in self.test_document.pages.first().content.content
+ TEST_DOCUMENT_CONTENT in self.test_document.pages.first().content_object.content.content
)
diff --git a/mayan/apps/document_parsing/tests/test_views.py b/mayan/apps/document_parsing/tests/test_views.py
index bb88c1817b..21ca0ef808 100644
--- a/mayan/apps/document_parsing/tests/test_views.py
+++ b/mayan/apps/document_parsing/tests/test_views.py
@@ -5,7 +5,7 @@ from django.test import override_settings
from mayan.apps.documents.tests.base import GenericDocumentViewTestCase
from mayan.apps.documents.tests.literals import TEST_HYBRID_DOCUMENT
-from ..models import DocumentPageContent
+from ..models import DocumentVersionPageContent
from ..permissions import (
permission_content_view, permission_document_type_parsing_setup,
permission_parse_document
@@ -72,8 +72,8 @@ class DocumentContentViewsTestCase(
self.assertEqual(response.status_code, 404)
self.assertTrue(
- DocumentPageContent.objects.filter(
- document_page=self.test_document.pages.first()
+ DocumentVersionPageContent.objects.filter(
+ document_version_page=self.test_document.pages.first().content_object
).exists()
)
@@ -86,8 +86,8 @@ class DocumentContentViewsTestCase(
self.assertEqual(response.status_code, 302)
self.assertFalse(
- DocumentPageContent.objects.filter(
- document_page=self.test_document.pages.first()
+ DocumentVersionPageContent.objects.filter(
+ document_version_page=self.test_document.pages.first().content_object
).exists()
)
diff --git a/mayan/apps/document_parsing/utils.py b/mayan/apps/document_parsing/utils.py
index ab8e049450..5086d5cf02 100644
--- a/mayan/apps/document_parsing/utils.py
+++ b/mayan/apps/document_parsing/utils.py
@@ -6,14 +6,28 @@ from django.utils.html import conditional_escape
def get_document_content(document):
- DocumentPageContent = apps.get_model(
- app_label='document_parsing', model_name='DocumentPageContent'
+ DocumentVersionPageContent = apps.get_model(
+ app_label='document_parsing', model_name='DocumentVersionPageContent'
)
- for page in document.pages.all():
+ for document_page in document.pages.all():
try:
- page_content = page.content.content
- except DocumentPageContent.DoesNotExist:
+ page_content = document_page.content_object.content.content
+ except DocumentVersionPageContent.DoesNotExist:
+ pass
+ else:
+ yield conditional_escape(force_text(page_content))
+
+
+def get_document_version_content(document_version):
+ DocumentVersionPageContent = apps.get_model(
+ app_label='document_parsing', model_name='DocumentVersionPageContent'
+ )
+
+ for document_version_page in document_version.pages.all():
+ try:
+ page_content = document_version_page.content.content
+ except DocumentVersionPageContent.DoesNotExist:
pass
else:
yield conditional_escape(force_text(page_content))
diff --git a/mayan/apps/documents/apps.py b/mayan/apps/documents/apps.py
index 4faae2f8bd..541134fb4b 100644
--- a/mayan/apps/documents/apps.py
+++ b/mayan/apps/documents/apps.py
@@ -235,7 +235,7 @@ class DocumentsApp(MayanAppConfig):
model=DocumentPage, manager_name='passthrough'
)
ModelPermission.register_inheritance(
- model=DocumentPageResult, related='document_version__document',
+ model=DocumentPageResult, related='document',
)
ModelPermission.register_manager(
model=DocumentPageResult, manager_name='passthrough'
diff --git a/mayan/apps/ocr/apps.py b/mayan/apps/ocr/apps.py
index 1d0410bd44..772ec287fa 100644
--- a/mayan/apps/ocr/apps.py
+++ b/mayan/apps/ocr/apps.py
@@ -66,7 +66,7 @@ class OCRApp(MayanAppConfig):
app_label='documents', model_name='Document'
)
DocumentPage = apps.get_model(
- app_label='documents', model_name='DocumentVersionPage'
+ app_label='documents', model_name='DocumentPage'
)
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'