Refactor OCR app. Removes document parsing. Moves OCR processing to
model manager. Add submit and finish events. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -6,6 +6,8 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from documents.models import DocumentPage, DocumentType, DocumentVersion
|
||||
|
||||
from .managers import DocumentPageOCRContentManager
|
||||
|
||||
|
||||
class DocumentTypeSettings(models.Model):
|
||||
"""
|
||||
@@ -25,6 +27,24 @@ class DocumentTypeSettings(models.Model):
|
||||
verbose_name_plural = _('Document types settings')
|
||||
|
||||
|
||||
@python_2_unicode_compatible
|
||||
class DocumentPageOCRContent(models.Model):
|
||||
document_page = models.OneToOneField(
|
||||
DocumentPage, on_delete=models.CASCADE, related_name='ocr_content',
|
||||
verbose_name=_('Document page')
|
||||
)
|
||||
content = models.TextField(blank=True, verbose_name=_('Content'))
|
||||
|
||||
objects = DocumentPageOCRContentManager()
|
||||
|
||||
def __str__(self):
|
||||
return force_text(self.document_page)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Document page OCR content')
|
||||
verbose_name_plural = _('Document pages OCR contents')
|
||||
|
||||
|
||||
@python_2_unicode_compatible
|
||||
class DocumentVersionOCRError(models.Model):
|
||||
document_version = models.ForeignKey(
|
||||
@@ -32,7 +52,7 @@ class DocumentVersionOCRError(models.Model):
|
||||
verbose_name=_('Document version')
|
||||
)
|
||||
datetime_submitted = models.DateTimeField(
|
||||
auto_now=True, db_index=True, verbose_name=_('Date time submitted')
|
||||
auto_now_add=True, db_index=True, verbose_name=_('Date time submitted')
|
||||
)
|
||||
result = models.TextField(blank=True, null=True, verbose_name=_('Result'))
|
||||
|
||||
@@ -41,24 +61,5 @@ class DocumentVersionOCRError(models.Model):
|
||||
|
||||
class Meta:
|
||||
ordering = ('datetime_submitted',)
|
||||
verbose_name = _('Document Version OCR Error')
|
||||
verbose_name_plural = _('Document Version OCR Errors')
|
||||
|
||||
|
||||
@python_2_unicode_compatible
|
||||
class DocumentPageContent(models.Model):
|
||||
"""
|
||||
Model that describes a document page content
|
||||
"""
|
||||
document_page = models.OneToOneField(
|
||||
DocumentPage, on_delete=models.CASCADE, related_name='ocr_content',
|
||||
verbose_name=_('Document page')
|
||||
)
|
||||
content = models.TextField(blank=True, verbose_name=_('Content'))
|
||||
|
||||
def __str__(self):
|
||||
return force_text(self.document_page)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Document page content')
|
||||
verbose_name_plural = _('Document pages contents')
|
||||
verbose_name = _('Document version OCR error')
|
||||
verbose_name_plural = _('Document version OCR errors')
|
||||
|
||||
Reference in New Issue
Block a user