Update the OCR app to work based on document versions not documents, document version are the module which hold the document pages instances. Remove old OCR document queue and replace with a single module for OCR processing error entries. Increase compatibility with Django 1.7 and Python 3.

This commit is contained in:
Roberto Rosario
2015-01-15 03:01:43 -04:00
parent 2371d3a49d
commit e6754c9a6f
24 changed files with 375 additions and 328 deletions

View File

@@ -1,39 +1,22 @@
from __future__ import absolute_import
from __future__ import unicode_literals
from django.db import models
from django.core.exceptions import ObjectDoesNotExist
from django.utils.translation import ugettext
from django.utils.encoding import python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _
from documents.models import Document
from documents.models import DocumentVersion
class DocumentQueue(models.Model):
name = models.CharField(max_length=64, unique=True, verbose_name=_(u'Name'))
label = models.CharField(max_length=64, verbose_name=_(u'Label'))
@python_2_unicode_compatible
class DocumentVersionOCRError(models.Model):
document_version = models.ForeignKey(DocumentVersion, verbose_name=_('Document version'))
datetime_submitted = models.DateTimeField(verbose_name=_('Date time submitted'), auto_now=True, db_index=True)
result = models.TextField(blank=True, null=True, verbose_name=_('Result'))
class Meta:
verbose_name = _(u'Document queue')
verbose_name_plural = _(u'Document queues')
def __unicode__(self):
return self.label
class QueueDocument(models.Model):
document_queue = models.ForeignKey(DocumentQueue, related_name='documents', verbose_name=_(u'Document queue'))
document = models.ForeignKey(Document, verbose_name=_(u'Document'))
datetime_submitted = models.DateTimeField(verbose_name=_(u'Date time submitted'), auto_now=True, db_index=True)
result = models.TextField(blank=True, null=True, verbose_name=_(u'Result'))
node_name = models.CharField(max_length=256, verbose_name=_(u'Node name'), blank=True, null=True)
def __str__(self):
return unicode(self.document_version)
class Meta:
ordering = ('datetime_submitted',)
verbose_name = _(u'Queue document')
verbose_name_plural = _(u'Queue documents')
def __unicode__(self):
try:
return unicode(self.document)
except ObjectDoesNotExist:
return ugettext(u'Missing document.')
verbose_name = _('Document Version OCR Error')
verbose_name_plural = _('Document Version OCR Errors')