Issue #75, move OCR queueing from a setting to a DocumentType model field

This commit is contained in:
Roberto Rosario
2014-10-21 16:53:42 -04:00
parent 3c72d62087
commit 549f0fdc87
6 changed files with 127 additions and 15 deletions

View File

@@ -288,16 +288,6 @@ Default: ``eng``
Language code passed to the ``tesseract`` executable.
.. setting:: OCR_AUTOMATIC_OCR
**OCR_AUTOMATIC_OCR**
Default: ``True``
Automatically queue newly created documents or newly uploaded versions
of existing documents for OCR.
.. setting:: OCR_UNPAPER_PATH
**OCR_UNPAPER_PATH**

View File

@@ -57,6 +57,7 @@ class DocumentType(models.Model):
properties can be attached
"""
name = models.CharField(max_length=32, verbose_name=_(u'Name'), unique=True)
ocr = models.BooleanField(default=True, verbose_name=_(u'Automatically queue newly created documents for OCR.'))
objects = DocumentTypeManager()

View File

@@ -0,0 +1,120 @@
# -*- coding: utf-8 -*-
from south.utils import datetime_utils as datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding field 'DocumentType.ocr'
db.add_column(u'documents_documenttype', 'ocr',
self.gf('django.db.models.fields.BooleanField')(default=True),
keep_default=False)
def backwards(self, orm):
# Deleting field 'DocumentType.ocr'
db.delete_column(u'documents_documenttype', 'ocr')
models = {
u'auth.group': {
'Meta': {'object_name': 'Group'},
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
},
u'auth.permission': {
'Meta': {'ordering': "(u'content_type__app_label', u'content_type__model', u'codename')", 'unique_together': "((u'content_type', u'codename'),)", 'object_name': 'Permission'},
'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['contenttypes.ContentType']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
},
u'auth.user': {
'Meta': {'object_name': 'User'},
'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
'groups': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "u'user_set'", 'blank': 'True', 'to': u"orm['auth.Group']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "u'user_set'", 'blank': 'True', 'to': u"orm['auth.Permission']"}),
'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
},
u'contenttypes.contenttype': {
'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
},
u'documents.document': {
'Meta': {'ordering': "['-date_added']", 'object_name': 'Document'},
'date_added': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'document_type': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'documents'", 'to': u"orm['documents.DocumentType']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'uuid': ('django.db.models.fields.CharField', [], {'max_length': '48', 'blank': 'True'})
},
u'documents.documentpage': {
'Meta': {'ordering': "['page_number']", 'object_name': 'DocumentPage'},
'content': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'document_version': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'pages'", 'to': u"orm['documents.DocumentVersion']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'page_label': ('django.db.models.fields.CharField', [], {'max_length': '40', 'null': 'True', 'blank': 'True'}),
'page_number': ('django.db.models.fields.PositiveIntegerField', [], {'default': '1', 'db_index': 'True'})
},
u'documents.documentpagetransformation': {
'Meta': {'ordering': "('order',)", 'object_name': 'DocumentPageTransformation'},
'arguments': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'document_page': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.DocumentPage']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'order': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'null': 'True', 'db_index': 'True', 'blank': 'True'}),
'transformation': ('django.db.models.fields.CharField', [], {'max_length': '128'})
},
u'documents.documenttype': {
'Meta': {'ordering': "['name']", 'object_name': 'DocumentType'},
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}),
'ocr': ('django.db.models.fields.BooleanField', [], {'default': 'True'})
},
u'documents.documenttypefilename': {
'Meta': {'ordering': "['filename']", 'object_name': 'DocumentTypeFilename'},
'document_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.DocumentType']"}),
'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'filename': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
u'documents.documentversion': {
'Meta': {'unique_together': "(('document', 'major', 'minor', 'micro'),)", 'object_name': 'DocumentVersion'},
'checksum': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'comment': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
'document': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'versions'", 'to': u"orm['documents.Document']"}),
'encoding': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True', 'blank': 'True'}),
'file': ('django.db.models.fields.files.FileField', [], {'max_length': '100'}),
'filename': ('django.db.models.fields.CharField', [], {'default': "u''", 'max_length': '255', 'db_index': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'major': ('django.db.models.fields.PositiveIntegerField', [], {'default': '1'}),
'micro': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0'}),
'mimetype': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'minor': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0'}),
'timestamp': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'})
},
u'documents.recentdocument': {
'Meta': {'ordering': "('-datetime_accessed',)", 'object_name': 'RecentDocument'},
'datetime_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'db_index': 'True', 'blank': 'True'}),
'document': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.Document']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'user': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['auth.User']"})
}
}
complete_apps = ['documents']

View File

@@ -808,6 +808,10 @@ def document_type_list(request):
'title': _(u'Document types'),
'hide_link': True,
'list_object_variable_name': 'document_type',
'extra_columns': [
{'name': _('OCR'), 'attribute': 'ocr'},
{'name': _('Documents'), 'attribute': encapsulate(lambda x: x.documents.count())}
]
}
return render_to_response('main/generic_list.html', context,
@@ -853,7 +857,6 @@ def document_type_delete(request, document_type_id):
if request.method == 'POST':
try:
Document.objects.filter(document_type=document_type).update(document_type=None)
document_type.delete()
messages.success(request, _(u'Document type: %s deleted successfully.') % document_type)
except Exception as exception:
@@ -870,7 +873,7 @@ def document_type_delete(request, document_type_id):
'object_name': _(u'Document type'),
'previous': previous,
'title': _(u'Are you sure you wish to delete the document type: %s?') % document_type,
'message': _(u'The document type of all documents using this document type will be set to none.'),
'message': _(u'All documents of this type will be deleted too.'),
'form_icon': u'layout_delete.png',
}

View File

@@ -20,7 +20,6 @@ from .links import (all_document_ocr_cleanup, ocr_tool_link,
submit_document_multiple)
from .models import DocumentQueue
from .permissions import PERMISSION_OCR_DOCUMENT
from .settings import AUTOMATIC_OCR
from .tasks import task_do_ocr
logger = logging.getLogger(__name__)
@@ -42,7 +41,7 @@ def document_post_save(sender, instance, **kwargs):
logger.debug('received post save signal')
logger.debug('instance: %s' % instance)
if kwargs.get('created', False):
if AUTOMATIC_OCR:
if instance.document.document_type.ocr:
instance.document.submit_for_ocr()

View File

@@ -10,7 +10,6 @@ register_settings(
settings=[
{'name': u'TESSERACT_PATH', 'global_name': u'OCR_TESSERACT_PATH', 'default': u'/usr/bin/tesseract', 'exists': True},
{'name': u'LANGUAGE', 'global_name': u'OCR_LANGUAGE', 'default': u'eng'},
{'name': u'AUTOMATIC_OCR', 'global_name': u'OCR_AUTOMATIC_OCR', 'default': True, 'description': _(u'Automatically queue newly created documents for OCR.')},
{'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True},
{'name': u'PDFTOTEXT_PATH', 'global_name': u'OCR_PDFTOTEXT_PATH', 'default': u'/usr/bin/pdftotext', 'description': _(u'File path to poppler\'s pdftotext program used to extract text from PDF files.'), 'exists': True},
{'name': u'BACKEND', 'global_name': u'OCR_BACKEND', 'default': u'ocr.backends.tesseract.Tesseract', 'description': _(u'Full path to the backend to be used to do OCR.')},