From 549f0fdc8795a7f6c56e47c95e6caac2a5baf24b Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 21 Oct 2014 16:53:42 -0400 Subject: [PATCH] Issue #75, move OCR queueing from a setting to a DocumentType model field --- docs/topics/settings.rst | 10 -- mayan/apps/documents/models.py | 1 + .../0024_auto__add_field_documenttype_ocr.py | 120 ++++++++++++++++++ mayan/apps/documents/views.py | 7 +- mayan/apps/ocr/__init__.py | 3 +- mayan/apps/ocr/settings.py | 1 - 6 files changed, 127 insertions(+), 15 deletions(-) create mode 100644 mayan/apps/documents/south_migrations/0024_auto__add_field_documenttype_ocr.py diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst index 80f5990856..8f002de789 100644 --- a/docs/topics/settings.rst +++ b/docs/topics/settings.rst @@ -288,16 +288,6 @@ Default: ``eng`` Language code passed to the ``tesseract`` executable. -.. setting:: OCR_AUTOMATIC_OCR - -**OCR_AUTOMATIC_OCR** - -Default: ``True`` - -Automatically queue newly created documents or newly uploaded versions -of existing documents for OCR. - - .. setting:: OCR_UNPAPER_PATH **OCR_UNPAPER_PATH** diff --git a/mayan/apps/documents/models.py b/mayan/apps/documents/models.py index d9f5b23d3e..0fbb42f94b 100644 --- a/mayan/apps/documents/models.py +++ b/mayan/apps/documents/models.py @@ -57,6 +57,7 @@ class DocumentType(models.Model): properties can be attached """ name = models.CharField(max_length=32, verbose_name=_(u'Name'), unique=True) + ocr = models.BooleanField(default=True, verbose_name=_(u'Automatically queue newly created documents for OCR.')) objects = DocumentTypeManager() diff --git a/mayan/apps/documents/south_migrations/0024_auto__add_field_documenttype_ocr.py b/mayan/apps/documents/south_migrations/0024_auto__add_field_documenttype_ocr.py new file mode 100644 index 0000000000..baffa55cd1 --- /dev/null +++ b/mayan/apps/documents/south_migrations/0024_auto__add_field_documenttype_ocr.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +from south.utils import datetime_utils as datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding field 'DocumentType.ocr' + db.add_column(u'documents_documenttype', 'ocr', + self.gf('django.db.models.fields.BooleanField')(default=True), + keep_default=False) + + + def backwards(self, orm): + # Deleting field 'DocumentType.ocr' + db.delete_column(u'documents_documenttype', 'ocr') + + + models = { + u'auth.group': { + 'Meta': {'object_name': 'Group'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), + 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) + }, + u'auth.permission': { + 'Meta': {'ordering': "(u'content_type__app_label', u'content_type__model', u'codename')", 'unique_together': "((u'content_type', u'codename'),)", 'object_name': 'Permission'}, + 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['contenttypes.ContentType']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) + }, + u'auth.user': { + 'Meta': {'object_name': 'User'}, + 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), + 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "u'user_set'", 'blank': 'True', 'to': u"orm['auth.Group']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "u'user_set'", 'blank': 'True', 'to': u"orm['auth.Permission']"}), + 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) + }, + u'contenttypes.contenttype': { + 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, + 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) + }, + u'documents.document': { + 'Meta': {'ordering': "['-date_added']", 'object_name': 'Document'}, + 'date_added': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'document_type': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'documents'", 'to': u"orm['documents.DocumentType']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'uuid': ('django.db.models.fields.CharField', [], {'max_length': '48', 'blank': 'True'}) + }, + u'documents.documentpage': { + 'Meta': {'ordering': "['page_number']", 'object_name': 'DocumentPage'}, + 'content': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'document_version': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'pages'", 'to': u"orm['documents.DocumentVersion']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'page_label': ('django.db.models.fields.CharField', [], {'max_length': '40', 'null': 'True', 'blank': 'True'}), + 'page_number': ('django.db.models.fields.PositiveIntegerField', [], {'default': '1', 'db_index': 'True'}) + }, + u'documents.documentpagetransformation': { + 'Meta': {'ordering': "('order',)", 'object_name': 'DocumentPageTransformation'}, + 'arguments': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'document_page': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.DocumentPage']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'order': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'null': 'True', 'db_index': 'True', 'blank': 'True'}), + 'transformation': ('django.db.models.fields.CharField', [], {'max_length': '128'}) + }, + u'documents.documenttype': { + 'Meta': {'ordering': "['name']", 'object_name': 'DocumentType'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}), + 'ocr': ('django.db.models.fields.BooleanField', [], {'default': 'True'}) + }, + u'documents.documenttypefilename': { + 'Meta': {'ordering': "['filename']", 'object_name': 'DocumentTypeFilename'}, + 'document_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.DocumentType']"}), + 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'filename': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, + u'documents.documentversion': { + 'Meta': {'unique_together': "(('document', 'major', 'minor', 'micro'),)", 'object_name': 'DocumentVersion'}, + 'checksum': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'comment': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'document': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'versions'", 'to': u"orm['documents.Document']"}), + 'encoding': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True', 'blank': 'True'}), + 'file': ('django.db.models.fields.files.FileField', [], {'max_length': '100'}), + 'filename': ('django.db.models.fields.CharField', [], {'default': "u''", 'max_length': '255', 'db_index': 'True'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'major': ('django.db.models.fields.PositiveIntegerField', [], {'default': '1'}), + 'micro': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0'}), + 'mimetype': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'minor': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0'}), + 'timestamp': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}) + }, + u'documents.recentdocument': { + 'Meta': {'ordering': "('-datetime_accessed',)", 'object_name': 'RecentDocument'}, + 'datetime_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'db_index': 'True', 'blank': 'True'}), + 'document': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.Document']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['auth.User']"}) + } + } + + complete_apps = ['documents'] \ No newline at end of file diff --git a/mayan/apps/documents/views.py b/mayan/apps/documents/views.py index f95308247f..63b75ba10c 100644 --- a/mayan/apps/documents/views.py +++ b/mayan/apps/documents/views.py @@ -808,6 +808,10 @@ def document_type_list(request): 'title': _(u'Document types'), 'hide_link': True, 'list_object_variable_name': 'document_type', + 'extra_columns': [ + {'name': _('OCR'), 'attribute': 'ocr'}, + {'name': _('Documents'), 'attribute': encapsulate(lambda x: x.documents.count())} + ] } return render_to_response('main/generic_list.html', context, @@ -853,7 +857,6 @@ def document_type_delete(request, document_type_id): if request.method == 'POST': try: - Document.objects.filter(document_type=document_type).update(document_type=None) document_type.delete() messages.success(request, _(u'Document type: %s deleted successfully.') % document_type) except Exception as exception: @@ -870,7 +873,7 @@ def document_type_delete(request, document_type_id): 'object_name': _(u'Document type'), 'previous': previous, 'title': _(u'Are you sure you wish to delete the document type: %s?') % document_type, - 'message': _(u'The document type of all documents using this document type will be set to none.'), + 'message': _(u'All documents of this type will be deleted too.'), 'form_icon': u'layout_delete.png', } diff --git a/mayan/apps/ocr/__init__.py b/mayan/apps/ocr/__init__.py index a8406673ec..5cbb099ad6 100644 --- a/mayan/apps/ocr/__init__.py +++ b/mayan/apps/ocr/__init__.py @@ -20,7 +20,6 @@ from .links import (all_document_ocr_cleanup, ocr_tool_link, submit_document_multiple) from .models import DocumentQueue from .permissions import PERMISSION_OCR_DOCUMENT -from .settings import AUTOMATIC_OCR from .tasks import task_do_ocr logger = logging.getLogger(__name__) @@ -42,7 +41,7 @@ def document_post_save(sender, instance, **kwargs): logger.debug('received post save signal') logger.debug('instance: %s' % instance) if kwargs.get('created', False): - if AUTOMATIC_OCR: + if instance.document.document_type.ocr: instance.document.submit_for_ocr() diff --git a/mayan/apps/ocr/settings.py b/mayan/apps/ocr/settings.py index b33e3801fe..068c11f913 100644 --- a/mayan/apps/ocr/settings.py +++ b/mayan/apps/ocr/settings.py @@ -10,7 +10,6 @@ register_settings( settings=[ {'name': u'TESSERACT_PATH', 'global_name': u'OCR_TESSERACT_PATH', 'default': u'/usr/bin/tesseract', 'exists': True}, {'name': u'LANGUAGE', 'global_name': u'OCR_LANGUAGE', 'default': u'eng'}, - {'name': u'AUTOMATIC_OCR', 'global_name': u'OCR_AUTOMATIC_OCR', 'default': True, 'description': _(u'Automatically queue newly created documents for OCR.')}, {'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True}, {'name': u'PDFTOTEXT_PATH', 'global_name': u'OCR_PDFTOTEXT_PATH', 'default': u'/usr/bin/pdftotext', 'description': _(u'File path to poppler\'s pdftotext program used to extract text from PDF files.'), 'exists': True}, {'name': u'BACKEND', 'global_name': u'OCR_BACKEND', 'default': u'ocr.backends.tesseract.Tesseract', 'description': _(u'Full path to the backend to be used to do OCR.')},