Files
mayan-edms/mayan/apps/ocr/models.py
Roberto Rosario acd8fd2a3e PEP8 cleanup
Signed-off-by: Roberto Rosario <roberto.rosario@mayan-edms.com>
2019-10-11 10:22:42 -04:00

87 lines
2.7 KiB
Python

from __future__ import unicode_literals
from django.db import models
from django.utils.encoding import force_text, python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _
from mayan.apps.documents.models import (
DocumentType, DocumentVersion, DocumentVersionPage
)
from .managers import (
DocumentVesionPageOCRContentManager, DocumentTypeSettingsManager
)
class DocumentTypeSettings(models.Model):
"""
Model to store the OCR settings for a document type.
"""
document_type = models.OneToOneField(
on_delete=models.CASCADE, related_name='ocr_settings',
to=DocumentType, unique=True, verbose_name=_('Document type')
)
auto_ocr = models.BooleanField(
default=True,
verbose_name=_('Automatically queue newly created documents for OCR.')
)
objects = DocumentTypeSettingsManager()
class Meta:
verbose_name = _('Document type settings')
verbose_name_plural = _('Document types settings')
def natural_key(self):
return self.document_type.natural_key()
natural_key.dependencies = ['documents.DocumentType']
@python_2_unicode_compatible
class DocumentVersionPageOCRContent(models.Model):
"""
This model stores the OCR results for a document page.
"""
document_version_page = models.OneToOneField(
on_delete=models.CASCADE, related_name='ocr_content',
to=DocumentVersionPage, verbose_name=_('Document version page')
)
content = models.TextField(
blank=True, help_text=_(
'The actual text content extracted by the OCR backend.'
), verbose_name=_('Content')
)
objects = DocumentVesionPageOCRContentManager()
class Meta:
verbose_name = _('Document version page OCR content')
verbose_name_plural = _('Document version pages OCR contents')
def __str__(self):
return force_text(self.document_page)
@python_2_unicode_compatible
class DocumentVersionOCRError(models.Model):
"""
This models keeps track of the errors captured during the OCR of a
document version.
"""
document_version = models.ForeignKey(
on_delete=models.CASCADE, related_name='ocr_errors',
to=DocumentVersion, verbose_name=_('Document version')
)
datetime_submitted = models.DateTimeField(
auto_now_add=True, db_index=True, verbose_name=_('Date time submitted')
)
result = models.TextField(blank=True, null=True, verbose_name=_('Result'))
class Meta:
ordering = ('datetime_submitted',)
verbose_name = _('Document version OCR error')
verbose_name_plural = _('Document version OCR errors')
def __str__(self):
return force_text(self.document_version)