diff --git a/apps/documents/conf/settings.py b/apps/documents/conf/settings.py index 9c91c9b03d..1dc2bc77a7 100644 --- a/apps/documents/conf/settings.py +++ b/apps/documents/conf/settings.py @@ -29,7 +29,6 @@ USE_STAGING_DIRECTORY = getattr(settings, 'DOCUMENTS_USE_STAGING_DIRECTORY', Fal STAGING_DIRECTORY = getattr(settings, 'DOCUMENTS_STAGING_DIRECTORY', u'/tmp/mayan/staging') DELETE_STAGING_FILE_AFTER_UPLOAD = getattr(settings, 'DOCUMENTS_DELETE_STAGING_FILE_AFTER_UPLOAD', False) STAGING_FILES_PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_STAGING_FILES_PREVIEW_SIZE', '640x480') -AUTOMATIC_OCR = getattr(settings, 'DOCUMENTS_AUTOMATIC_OCR', False) ENABLE_SINGLE_DOCUMENT_UPLOAD = getattr(settings, 'DOCUMENTS_ENABLE_SINGLE_DOCUMENT_UPLOAD', True) UNCOMPRESS_COMPRESSED_LOCAL_FILES = getattr(settings, 'DOCUMENTS_UNCOMPRESS_COMPRESSED_LOCAL_FILES', True) UNCOMPRESS_COMPRESSED_STAGING_FILES = getattr(settings, 'DOCUMENTS_UNCOMPRESS_COMPRESSED_STAGING_FILES', True) diff --git a/apps/documents/views.py b/apps/documents/views.py index aa10d9adc0..7b13b3d4e3 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -24,7 +24,6 @@ from filetransfers.api import serve_file from filesystem_serving.api import document_create_fs_links, document_delete_fs_links from filesystem_serving.conf.settings import FILESERVING_ENABLE from permissions.api import check_permissions -from ocr.views import submit_document_to_queue from documents.conf.settings import DELETE_STAGING_FILE_AFTER_UPLOAD from documents.conf.settings import USE_STAGING_DIRECTORY @@ -36,7 +35,6 @@ from documents.conf.settings import GROUP_MAX_RESULTS from documents.conf.settings import GROUP_SHOW_EMPTY from documents.conf.settings import GROUP_SHOW_THUMBNAIL from documents.conf.settings import DEFAULT_TRANSFORMATIONS -from documents.conf.settings import AUTOMATIC_OCR from documents.conf.settings import UNCOMPRESS_COMPRESSED_LOCAL_FILES from documents.conf.settings import UNCOMPRESS_COMPRESSED_STAGING_FILES from documents.conf.settings import STORAGE_BACKEND @@ -115,10 +113,6 @@ def document_create_sibling(request, document_id, multiple=True): def _handle_save_document(request, document, form=None): - #TODO: move this to OCR app as a post_save signal on create==True - if AUTOMATIC_OCR: - submit_document_to_queue(request, document) - if form and 'document_type_available_filenames' in form.cleaned_data: if form.cleaned_data['document_type_available_filenames']: document.file_filename = form.cleaned_data['document_type_available_filenames'].filename diff --git a/apps/main/views.py b/apps/main/views.py index c34e4074e7..58482d6a01 100644 --- a/apps/main/views.py +++ b/apps/main/views.py @@ -37,7 +37,6 @@ def check_settings(request): {'name':'DOCUMENTS_THUMBNAIL_SIZE', 'value':documents_settings.THUMBNAIL_SIZE}, {'name':'DOCUMENTS_DISPLAY_SIZE', 'value':documents_settings.DISPLAY_SIZE}, {'name':'DOCUMENTS_TRANFORMATION_PREVIEW_SIZE', 'value':documents_settings.TRANFORMATION_PREVIEW_SIZE}, - {'name':'DOCUMENTS_AUTOMATIC_OCR', 'value':documents_settings.AUTOMATIC_OCR}, {'name':'DOCUMENTS_ENABLE_SINGLE_DOCUMENT_UPLOAD', 'value':documents_settings.ENABLE_SINGLE_DOCUMENT_UPLOAD}, {'name':'DOCUMENTS_UNCOMPRESS_COMPRESSED_LOCAL_FILES', 'value':documents_settings.UNCOMPRESS_COMPRESSED_LOCAL_FILES}, {'name':'DOCUMENTS_UNCOMPRESS_COMPRESSED_STAGING_FILES', 'value':documents_settings.UNCOMPRESS_COMPRESSED_STAGING_FILES}, @@ -84,6 +83,7 @@ def check_settings(request): {'name':'CONVERTER_HIGH_QUALITY_OPTIONS', 'value':converter_settings.HIGH_QUALITY_OPTIONS}, # OCR + {'name':'OCR_AUTOMATIC_OCR', 'value':ocr_settings.AUTOMATIC_OCR}, {'name':'OCR_TESSERACT_PATH', 'value':ocr_settings.TESSERACT_PATH, 'exists':True}, {'name':'OCR_TESSERACT_LANGUAGE', 'value':ocr_settings.TESSERACT_LANGUAGE}, {'name':'OCR_NODE_CONCURRENT_EXECUTION', 'value':ocr_settings.NODE_CONCURRENT_EXECUTION}, diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index 5f22710a51..d9796f3518 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -3,12 +3,13 @@ from multiprocessing import Queue from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext from django.db.utils import DatabaseError +from django.db.models.signals import post_save from navigation.api import register_links, register_menu, register_multi_item_links from permissions.api import register_permissions - from documents.models import Document +from ocr.conf.settings import AUTOMATIC_OCR from models import DocumentQueue, QueueDocument from literals import QUEUEDOCUMENT_STATE_PROCESSING, \ QUEUEDOCUMENT_STATE_PENDING, DOCUMENTQUEUE_STATE_STOPPED, \ @@ -55,3 +56,11 @@ try: except DatabaseError: #syncdb pass + + +def document_post_save(sender, instance, **kwargs): + if kwargs.get('created', False): + if AUTOMATIC_OCR: + DocumentQueue.objects.queue_document(instance) + +post_save.connect(document_post_save, sender=Document) diff --git a/apps/ocr/conf/settings.py b/apps/ocr/conf/settings.py index 4f6c00ccba..a0f0a80612 100644 --- a/apps/ocr/conf/settings.py +++ b/apps/ocr/conf/settings.py @@ -4,3 +4,4 @@ TESSERACT_PATH = getattr(settings, 'OCR_TESSERACT_PATH', u'/usr/bin/tesseract') TESSERACT_LANGUAGE = getattr(settings, 'OCR_TESSERACT_LANGUAGE', None) REPLICATION_DELAY = getattr(settings, 'OCR_REPLICATION_DELAY', 10) #In seconds NODE_CONCURRENT_EXECUTION = getattr(settings, 'OCR_NODE_CONCURRENT_EXECUTION', 1) +AUTOMATIC_OCR = getattr(settings, 'OCR_AUTOMATIC_OCR', False) diff --git a/docs/Changelog.txt b/docs/Changelog.txt index 5a30a79fab..5197030bda 100644 --- a/docs/Changelog.txt +++ b/docs/Changelog.txt @@ -2,6 +2,9 @@ * Added a new setup option: FILESYSTEM_INDEXING_AVAILABLE_FUNCTIONS - a dictionary to allow users to add custom functions +* Made automatic OCR a function of the OCR app and not of Documents app + (via signals) + Renamed setup option DOCUMENT_AUTOMATIC_OCR to OCR_AUTOMATIC_OCR 2011-Apr-01 * Added support for editing the metadata of multiple documents at the diff --git a/docs/TODO b/docs/TODO index 4a97384adc..2d23c32cd2 100644 --- a/docs/TODO +++ b/docs/TODO @@ -179,7 +179,7 @@ OCR * Don't allow duplicate documents in queues - DONE * OCR queue schedule support * Make automatic OCR a function of OCR app and not of Documents app - (via signals) + (via signals) - DONE * Two types of OCR nodes: thin, fat (thin = document file is passed serialized to node, fat = has direct access to document storage read document file) diff --git a/settings.py b/settings.py index e37d276e77..bfdbb4159a 100644 --- a/settings.py +++ b/settings.py @@ -166,7 +166,6 @@ TEMPLATE_CONTEXT_PROCESSORS = ( #DOCUMENTS_STAGING_DIRECTORY = u'/tmp/mayan/staging' #DOCUMENTS_DELETE_STAGING_FILE_AFTER_UPLOAD = False #DOCUMENTS_STAGING_FILES_PREVIEW_SIZE = '640x480' -#DOCUMENTS_AUTOMATIC_OCR = False #DOCUMENTS_ENABLE_SINGLE_DOCUMENT_UPLOAD = True #DOCUMENTS_UNCOMPRESS_COMPRESSED_LOCAL_FILES = True #DOCUMENTS_UNCOMPRESS_COMPRESSED_STAGING_FILES = True @@ -224,6 +223,7 @@ TEMPLATE_CONTEXT_PROCESSORS = ( #OCR_NODE_CONCURRENT_EXECUTION = 1 #OCR_TESSERACT_LANGUAGE = None #OCR_REPLICATION_DELAY = 10 +#OCR_AUTOMATIC_OCR = False # Permissions #ROLES_DEFAULT_ROLES = []