Made automatic OCR a function of the OCR app and not of Documents app (via signals)

Renamed setup option DOCUMENT_AUTOMATIC_OCR to OCR_AUTOMATIC_OCR
2011-04-04 15:36:00 -04:00
parent 664ece7a60
commit 283df926d1
8 changed files with 17 additions and 11 deletions
--- a/apps/documents/conf/settings.py
+++ b/apps/documents/conf/settings.py
@@ -29,7 +29,6 @@ USE_STAGING_DIRECTORY = getattr(settings, 'DOCUMENTS_USE_STAGING_DIRECTORY', Fal
 STAGING_DIRECTORY = getattr(settings, 'DOCUMENTS_STAGING_DIRECTORY', u'/tmp/mayan/staging')
 DELETE_STAGING_FILE_AFTER_UPLOAD = getattr(settings, 'DOCUMENTS_DELETE_STAGING_FILE_AFTER_UPLOAD', False)
 STAGING_FILES_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_STAGING_FILES_PREVIEW_SIZE', '640x480')
-AUTOMATIC_OCR = getattr(settings, 'DOCUMENTS_AUTOMATIC_OCR', False)
 ENABLE_SINGLE_DOCUMENT_UPLOAD = getattr(settings, 'DOCUMENTS_ENABLE_SINGLE_DOCUMENT_UPLOAD', True)
 UNCOMPRESS_COMPRESSED_LOCAL_FILES = getattr(settings, 'DOCUMENTS_UNCOMPRESS_COMPRESSED_LOCAL_FILES', True)
 UNCOMPRESS_COMPRESSED_STAGING_FILES = getattr(settings, 'DOCUMENTS_UNCOMPRESS_COMPRESSED_STAGING_FILES', True)
--- a/apps/documents/views.py
+++ b/apps/documents/views.py
@@ -24,7 +24,6 @@ from filetransfers.api import serve_file
 from filesystem_serving.api import document_create_fs_links, document_delete_fs_links
 from filesystem_serving.conf.settings import FILESERVING_ENABLE
 from permissions.api import check_permissions
-from ocr.views import submit_document_to_queue

 from documents.conf.settings import DELETE_STAGING_FILE_AFTER_UPLOAD
 from documents.conf.settings import USE_STAGING_DIRECTORY
@@ -36,7 +35,6 @@ from documents.conf.settings import GROUP_MAX_RESULTS
 from documents.conf.settings import GROUP_SHOW_EMPTY
 from documents.conf.settings import GROUP_SHOW_THUMBNAIL
 from documents.conf.settings import DEFAULT_TRANSFORMATIONS
-from documents.conf.settings import AUTOMATIC_OCR
 from documents.conf.settings import UNCOMPRESS_COMPRESSED_LOCAL_FILES
 from documents.conf.settings import UNCOMPRESS_COMPRESSED_STAGING_FILES
 from documents.conf.settings import STORAGE_BACKEND
@@ -115,10 +113,6 @@ def document_create_sibling(request, document_id, multiple=True):


 def _handle_save_document(request, document, form=None):
-    #TODO: move this to OCR app as a post_save signal on create==True
-    if AUTOMATIC_OCR:
-        submit_document_to_queue(request, document)
-    
    if form and 'document_type_available_filenames' in form.cleaned_data:
        if form.cleaned_data['document_type_available_filenames']:
            document.file_filename = form.cleaned_data['document_type_available_filenames'].filename
--- a/apps/main/views.py
+++ b/apps/main/views.py
@@ -37,7 +37,6 @@ def check_settings(request):
        {'name':'DOCUMENTS_THUMBNAIL_SIZE', 'value':documents_settings.THUMBNAIL_SIZE},
        {'name':'DOCUMENTS_DISPLAY_SIZE', 'value':documents_settings.DISPLAY_SIZE},
        {'name':'DOCUMENTS_TRANFORMATION_PREVIEW_SIZE', 'value':documents_settings.TRANFORMATION_PREVIEW_SIZE},
-        {'name':'DOCUMENTS_AUTOMATIC_OCR', 'value':documents_settings.AUTOMATIC_OCR},
        {'name':'DOCUMENTS_ENABLE_SINGLE_DOCUMENT_UPLOAD', 'value':documents_settings.ENABLE_SINGLE_DOCUMENT_UPLOAD},
        {'name':'DOCUMENTS_UNCOMPRESS_COMPRESSED_LOCAL_FILES', 'value':documents_settings.UNCOMPRESS_COMPRESSED_LOCAL_FILES},
        {'name':'DOCUMENTS_UNCOMPRESS_COMPRESSED_STAGING_FILES', 'value':documents_settings.UNCOMPRESS_COMPRESSED_STAGING_FILES},
@@ -84,6 +83,7 @@ def check_settings(request):
        {'name':'CONVERTER_HIGH_QUALITY_OPTIONS', 'value':converter_settings.HIGH_QUALITY_OPTIONS},

        # OCR
+        {'name':'OCR_AUTOMATIC_OCR', 'value':ocr_settings.AUTOMATIC_OCR},
        {'name':'OCR_TESSERACT_PATH', 'value':ocr_settings.TESSERACT_PATH, 'exists':True},
        {'name':'OCR_TESSERACT_LANGUAGE', 'value':ocr_settings.TESSERACT_LANGUAGE},
        {'name':'OCR_NODE_CONCURRENT_EXECUTION', 'value':ocr_settings.NODE_CONCURRENT_EXECUTION},
--- a/apps/ocr/init.py
+++ b/apps/ocr/init.py
@@ -3,12 +3,13 @@ from multiprocessing import Queue
 from django.utils.translation import ugettext_lazy as _
 from django.utils.translation import ugettext
 from django.db.utils import DatabaseError
+from django.db.models.signals import post_save

 from navigation.api import register_links, register_menu, register_multi_item_links
 from permissions.api import register_permissions
-
 from documents.models import Document

+from ocr.conf.settings import AUTOMATIC_OCR
 from models import DocumentQueue, QueueDocument
 from literals import QUEUEDOCUMENT_STATE_PROCESSING, \
    QUEUEDOCUMENT_STATE_PENDING, DOCUMENTQUEUE_STATE_STOPPED, \
@@ -55,3 +56,11 @@ try:
 except DatabaseError:
    #syncdb
    pass
+
+
+def document_post_save(sender, instance, **kwargs):
+    if kwargs.get('created', False):
+        if AUTOMATIC_OCR:
+            DocumentQueue.objects.queue_document(instance)
+
+post_save.connect(document_post_save, sender=Document)
--- a/apps/ocr/conf/settings.py
+++ b/apps/ocr/conf/settings.py
@@ -4,3 +4,4 @@ TESSERACT_PATH = getattr(settings, 'OCR_TESSERACT_PATH', u'/usr/bin/tesseract')
 TESSERACT_LANGUAGE = getattr(settings, 'OCR_TESSERACT_LANGUAGE', None)
 REPLICATION_DELAY = getattr(settings, 'OCR_REPLICATION_DELAY', 10) #In seconds
 NODE_CONCURRENT_EXECUTION = getattr(settings, 'OCR_NODE_CONCURRENT_EXECUTION', 1)
+AUTOMATIC_OCR = getattr(settings, 'OCR_AUTOMATIC_OCR', False)
--- a/docs/Changelog.txt
+++ b/docs/Changelog.txt
@@ -2,6 +2,9 @@
 * Added a new setup option:
    FILESYSTEM_INDEXING_AVAILABLE_FUNCTIONS - a dictionary to allow users
    to add custom functions
+* Made automatic OCR a function of the OCR app and not of Documents app
+  (via signals)
+  Renamed setup option DOCUMENT_AUTOMATIC_OCR to OCR_AUTOMATIC_OCR

 2011-Apr-01
 * Added support for editing the metadata of multiple documents at the
--- a/docs/TODO
+++ b/docs/TODO
@@ -179,7 +179,7 @@ OCR
 * Don't allow duplicate documents in queues                            - DONE
 * OCR queue schedule support
 * Make automatic OCR a function of OCR app and not of Documents app
-  (via signals)
+  (via signals)                                                        - DONE
 * Two types of OCR nodes: thin, fat (thin = document file is passed
  serialized to node, fat = has direct access to document storage read
  document file)
--- a/settings.py
+++ b/settings.py
@@ -166,7 +166,6 @@ TEMPLATE_CONTEXT_PROCESSORS = (
 #DOCUMENTS_STAGING_DIRECTORY = u'/tmp/mayan/staging'
 #DOCUMENTS_DELETE_STAGING_FILE_AFTER_UPLOAD = False
 #DOCUMENTS_STAGING_FILES_PREVIEW_SIZE = '640x480'
-#DOCUMENTS_AUTOMATIC_OCR = False
 #DOCUMENTS_ENABLE_SINGLE_DOCUMENT_UPLOAD = True
 #DOCUMENTS_UNCOMPRESS_COMPRESSED_LOCAL_FILES = True
 #DOCUMENTS_UNCOMPRESS_COMPRESSED_STAGING_FILES = True
@@ -224,6 +223,7 @@ TEMPLATE_CONTEXT_PROCESSORS = (
 #OCR_NODE_CONCURRENT_EXECUTION = 1
 #OCR_TESSERACT_LANGUAGE = None
 #OCR_REPLICATION_DELAY = 10
+#OCR_AUTOMATIC_OCR = False

 # Permissions
 #ROLES_DEFAULT_ROLES = []