Files
mayan-edms/mayan/apps/document_parsing/models.py
Roberto Rosario 0699ad0556 Add support for new document page structure
Documents now have their own dedicated DocumentPage
submodel. The old DocumentPage is now called DocumentVersionPage.
This allows mappings between document pages and document version
pages, allowing renumbering, appending pages.
DocumentPages have a content_object to map them to any other
object. For now they only map to DocumentVersionPages.
New option added to the version upload form to append the
pages of the new version.
A new view was added to just append new pages with wraps the
new document version upload form and hides the append pages
checkbox set to True.
Add a new action, reset_pages to reset the pages of the
document to those of the latest version.

Missing: appending tests, checks for proper content_object in OCR and
document parsing.

Author: Roberto Rosario <roberto.rosario@mayan-edms.com>
Date:   Thu Oct 11 12:00:25 2019 -0400
2019-10-10 11:55:42 -04:00

89 lines
2.8 KiB
Python

from __future__ import unicode_literals
from django.db import models
from django.utils.encoding import force_text, python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _
from mayan.apps.documents.models import (
DocumentPage, DocumentType, DocumentVersion, DocumentVersionPage
)
from .managers import (
DocumentVersionPageContentManager, DocumentTypeSettingsManager
)
class DocumentTypeSettings(models.Model):
"""
This model stores the parsing settings for a document type.
"""
document_type = models.OneToOneField(
on_delete=models.CASCADE, related_name='parsing_settings',
to=DocumentType, unique=True, verbose_name=_('Document type')
)
auto_parsing = models.BooleanField(
default=True, verbose_name=_(
'Automatically queue newly created documents for parsing.'
)
)
objects = DocumentTypeSettingsManager()
def natural_key(self):
return self.document_type.natural_key()
natural_key.dependencies = ['documents.DocumentType']
class Meta:
verbose_name = _('Document type settings')
verbose_name_plural = _('Document types settings')
@python_2_unicode_compatible
class DocumentVersionPageContent(models.Model):
"""
This model store's the parsed content of a document page.
"""
document_version_page = models.OneToOneField(
on_delete=models.CASCADE, related_name='content',
to=DocumentVersionPage, verbose_name=_('Document version page')
)
content = models.TextField(
blank=True, help_text=_(
'The actual text content as extracted by the document '
'parsing backend.'
), verbose_name=_('Content')
)
objects = DocumentVersionPageContentManager()
class Meta:
verbose_name = _('Document version page content')
verbose_name_plural = _('Document version pages contents')
def __str__(self):
return force_text(self.document_page)
@python_2_unicode_compatible
class DocumentVersionParseError(models.Model):
"""
This module stores the errors captures when attempting to parse a
document version.
"""
document_version = models.ForeignKey(
on_delete=models.CASCADE, related_name='parsing_errors',
to=DocumentVersion, verbose_name=_('Document version')
)
datetime_submitted = models.DateTimeField(
auto_now_add=True, db_index=True, verbose_name=_('Date time submitted')
)
result = models.TextField(blank=True, null=True, verbose_name=_('Result'))
class Meta:
ordering = ('datetime_submitted',)
verbose_name = _('Document version parse error')
verbose_name_plural = _('Document version parse errors')
def __str__(self):
return force_text(self.document_version)