Update and re-enable ocr app
This commit is contained in:
@@ -1,75 +1 @@
|
|||||||
from __future__ import absolute_import
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from django.db import transaction
|
|
||||||
from django.utils.translation import ugettext_lazy as _
|
|
||||||
from django.utils.translation import ugettext
|
|
||||||
from django.db.models.signals import post_save, post_syncdb
|
|
||||||
from django.dispatch import receiver
|
|
||||||
from django.db.utils import DatabaseError
|
|
||||||
|
|
||||||
#from navigation.api import (bind_links, register_multi_item_links,
|
|
||||||
# register_multi_item_links)
|
|
||||||
#from documents.models import Document, DocumentVersion
|
|
||||||
from maintenance.api import MaintenanceNamespace
|
|
||||||
from project_tools.api import register_tool
|
|
||||||
from acls.api import class_permissions
|
|
||||||
from job_processor.models import JobQueue, JobType
|
|
||||||
from job_processor.exceptions import JobQueuePushError
|
|
||||||
|
|
||||||
#from .conf.settings import (AUTOMATIC_OCR, QUEUE_PROCESSING_INTERVAL)
|
|
||||||
#from .models import OCRProcessingSingleton
|
|
||||||
#from .api import do_document_ocr
|
|
||||||
from .permissions import PERMISSION_OCR_DOCUMENT
|
|
||||||
from .exceptions import AlreadyQueued
|
|
||||||
from . import models as ocr_models
|
|
||||||
from .literals import OCR_QUEUE_NAME
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
ocr_job_queue = None
|
|
||||||
|
|
||||||
from .links import (submit_document, ocr_disable,
|
|
||||||
ocr_enable, all_document_ocr_cleanup, ocr_log,
|
|
||||||
ocr_tool_link, submit_document_multiple)
|
|
||||||
|
|
||||||
bind_links([Document], [submit_document])
|
|
||||||
bind_links([OCRProcessingSingleton], [ocr_disable, ocr_enable])
|
|
||||||
|
|
||||||
namespace = MaintenanceNamespace(label=_(u'OCR'))
|
|
||||||
namespace.create_tool(all_document_ocr_cleanup)
|
|
||||||
register_multi_item_links(['folder_view', 'search', 'results', 'index_instance_node_view', 'document_find_duplicates', 'document_type_document_list', 'document_group_view', 'document_list', 'document_list_recent'], [submit_document_multiple])
|
|
||||||
|
|
||||||
|
|
||||||
@transaction.commit_on_success
|
|
||||||
def create_ocr_job_queue():
|
|
||||||
global ocr_job_queue
|
|
||||||
try:
|
|
||||||
ocr_job_queue, created = JobQueue.objects.get_or_create(name=OCR_QUEUE_NAME, defaults={'label': _('OCR'), 'unique_jobs': True})
|
|
||||||
except DatabaseError:
|
|
||||||
transaction.rollback()
|
|
||||||
|
|
||||||
|
|
||||||
@receiver(post_save, dispatch_uid='document_post_save', sender=DocumentVersion)
|
|
||||||
def document_post_save(sender, instance, **kwargs):
|
|
||||||
logger.debug('received post save signal')
|
|
||||||
logger.debug('instance: %s' % instance)
|
|
||||||
if kwargs.get('created', False):
|
|
||||||
#if AUTOMATIC_OCR:
|
|
||||||
try:
|
|
||||||
instance.submit_for_ocr()
|
|
||||||
except JobQueuePushError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
register_tool(ocr_tool_link)
|
|
||||||
|
|
||||||
class_permissions(Document, [
|
|
||||||
PERMISSION_OCR_DOCUMENT,
|
|
||||||
])
|
|
||||||
|
|
||||||
create_ocr_job_queue()
|
|
||||||
ocr_job_type = JobType('ocr', _(u'OCR'), do_document_ocr)
|
|
||||||
|
|
||||||
Document.add_to_class('submit_for_ocr', lambda document: ocr_job_queue.push(ocr_job_type, document_version_pk=document.latest_version.pk))
|
|
||||||
DocumentVersion.add_to_class('submit_for_ocr', lambda document_version: ocr_job_queue.push(ocr_job_type, document_version_pk=document_version.pk))
|
|
||||||
|
|||||||
@@ -10,11 +10,11 @@ import sys
|
|||||||
from django.utils.translation import ugettext as _
|
from django.utils.translation import ugettext as _
|
||||||
from django.utils.importlib import import_module
|
from django.utils.importlib import import_module
|
||||||
|
|
||||||
#from common.settings import TEMPORARY_DIRECTORY
|
from common.settings import TEMPORARY_DIRECTORY
|
||||||
#from converter.api import convert
|
from converter.api import convert
|
||||||
from documents.models import DocumentPage, DocumentVersion
|
from documents.models import DocumentPage, DocumentVersion
|
||||||
|
|
||||||
#from .conf.settings import (TESSERACT_PATH, TESSERACT_LANGUAGE, UNPAPER_PATH)
|
from .settings import TESSERACT_PATH, TESSERACT_LANGUAGE, UNPAPER_PATH
|
||||||
from .exceptions import TesseractError, UnpaperError
|
from .exceptions import TesseractError, UnpaperError
|
||||||
from .parsers import parse_document_page
|
from .parsers import parse_document_page
|
||||||
from .parsers.exceptions import ParserError, ParserUnknownFile
|
from .parsers.exceptions import ParserError, ParserUnknownFile
|
||||||
|
|||||||
@@ -1,10 +1,3 @@
|
|||||||
class AlreadyQueued(Exception):
|
|
||||||
"""
|
|
||||||
Raised when a trying to queue document already in the queue
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TesseractError(Exception):
|
class TesseractError(Exception):
|
||||||
"""
|
"""
|
||||||
Raised by tesseract
|
Raised by tesseract
|
||||||
@@ -17,15 +10,3 @@ class UnpaperError(Exception):
|
|||||||
Raised by unpaper
|
Raised by unpaper
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ReQueueError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class OCRProcessingAlreadyDisabled(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class OCRProcessingAlreadyEnabled(Exception):
|
|
||||||
pass
|
|
||||||
|
|||||||
@@ -7,22 +7,9 @@ from navigation import Link
|
|||||||
from .permissions import (PERMISSION_OCR_DOCUMENT,
|
from .permissions import (PERMISSION_OCR_DOCUMENT,
|
||||||
PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE,
|
PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE,
|
||||||
PERMISSION_OCR_CLEAN_ALL_PAGES)
|
PERMISSION_OCR_CLEAN_ALL_PAGES)
|
||||||
from .models import OCRProcessingSingleton
|
|
||||||
from .icons import icon_submit_document, icon_ocr_cleanup
|
from .icons import icon_submit_document, icon_ocr_cleanup
|
||||||
|
|
||||||
def is_enabled(context):
|
|
||||||
return OCRProcessingSingleton.get().is_enabled()
|
|
||||||
|
|
||||||
def is_disabled(context):
|
|
||||||
return not OCRProcessingSingleton.get().is_enabled()
|
|
||||||
|
|
||||||
|
|
||||||
#ocr_log = Link(text=_(u'queue document list'), view='ocr_log', sprite='text', permissions=[PERMISSION_OCR_DOCUMENT])
|
|
||||||
#ocr_disable = Link(text=_(u'disable OCR processing'), view='ocr_disable', sprite='control_stop_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE], conditional_disable=is_disabled)
|
|
||||||
#ocr_enable = Link(text=_(u'enable OCR processing'), view='ocr_enable', sprite='control_play_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE], conditional_disable=is_enabled)
|
|
||||||
submit_document = Link(text=_('submit to OCR queue'), view='submit_document', args='object.id', icon=icon_submit_document, permissions=[PERMISSION_OCR_DOCUMENT])
|
submit_document = Link(text=_('submit to OCR queue'), view='submit_document', args='object.id', icon=icon_submit_document, permissions=[PERMISSION_OCR_DOCUMENT])
|
||||||
submit_document_multiple = Link(text=_('submit to OCR queue'), view='submit_document_multiple', icon=icon_submit_document, permissions=[PERMISSION_OCR_DOCUMENT])
|
submit_document_multiple = Link(text=_('submit to OCR queue'), view='submit_document_multiple', icon=icon_submit_document, permissions=[PERMISSION_OCR_DOCUMENT])
|
||||||
|
|
||||||
all_document_ocr_cleanup = Link(text=_(u'clean up pages content'), view='all_document_ocr_cleanup', icon=icon_ocr_cleanup, permissions=[PERMISSION_OCR_CLEAN_ALL_PAGES], description=_(u'Runs a language filter to remove common OCR mistakes from document pages content.'))
|
all_document_ocr_cleanup = Link(text=_(u'clean up pages content'), view='all_document_ocr_cleanup', icon=icon_ocr_cleanup, permissions=[PERMISSION_OCR_CLEAN_ALL_PAGES], description=_(u'Runs a language filter to remove common OCR mistakes from document pages content.'))
|
||||||
|
|
||||||
#ocr_tool_link = Link(text=_(u'OCR'), view='ocr_log', sprite='hourglass', icon='text.png', permissions=[PERMISSION_OCR_DOCUMENT]) # children_view_regex=[r'queue_', r'document_queue'])
|
|
||||||
|
|||||||
@@ -1,16 +1,12 @@
|
|||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
|
|
||||||
OCR_STATE_DISABLED = 'd'
|
|
||||||
OCR_STATE_ENABLED = 'e'
|
|
||||||
|
|
||||||
OCR_STATE_CHOICES = (
|
|
||||||
(OCR_STATE_DISABLED, _(u'disabled')),
|
|
||||||
(OCR_STATE_ENABLED, _(u'enabled')),
|
|
||||||
)
|
|
||||||
|
|
||||||
DEFAULT_OCR_FILE_FORMAT = u'tiff'
|
DEFAULT_OCR_FILE_FORMAT = u'tiff'
|
||||||
DEFAULT_OCR_FILE_EXTENSION = u'tif'
|
DEFAULT_OCR_FILE_EXTENSION = u'tif'
|
||||||
UNPAPER_FILE_FORMAT = u'ppm'
|
UNPAPER_FILE_FORMAT = u'ppm'
|
||||||
|
|
||||||
OCR_QUEUE_NAME = 'ocr'
|
OCR_QUEUE_NAME = 'ocr'
|
||||||
|
|
||||||
|
DEFAULT_TESSERACT_PATH = u'/usr/bin/tesseract'
|
||||||
|
DEFAULT_UNPAPER_PATH = u'/usr/bin/unpaper'
|
||||||
|
DEFAULT_PDFTOTEXT_PATH = u'/usr/bin/pdftotext'
|
||||||
|
DEFAULT_TESSERACT_LANGUAGE = u'eng'
|
||||||
|
DEFAULT_REPLICATION_DELAY = 0
|
||||||
|
|||||||
@@ -1,54 +0,0 @@
|
|||||||
from __future__ import absolute_import
|
|
||||||
|
|
||||||
from ast import literal_eval
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
from django.db import models
|
|
||||||
from django.utils.translation import ugettext_lazy as _
|
|
||||||
from django.utils.translation import ugettext
|
|
||||||
from django.core.exceptions import ObjectDoesNotExist
|
|
||||||
from django.contrib.contenttypes.models import ContentType
|
|
||||||
from django.contrib.contenttypes import generic
|
|
||||||
from django.core.exceptions import ValidationError
|
|
||||||
|
|
||||||
from common.models import Singleton
|
|
||||||
from documents.models import Document, DocumentVersion
|
|
||||||
from converter.api import get_available_transformations_choices
|
|
||||||
from sources.managers import SourceTransformationManager
|
|
||||||
|
|
||||||
from .literals import (OCR_STATE_CHOICES, OCR_STATE_ENABLED,
|
|
||||||
OCR_STATE_DISABLED)
|
|
||||||
from .exceptions import (ReQueueError, OCRProcessingAlreadyDisabled,
|
|
||||||
OCRProcessingAlreadyEnabled)
|
|
||||||
|
|
||||||
|
|
||||||
class OCRProcessingSingleton(Singleton):
|
|
||||||
state = models.CharField(max_length=4,
|
|
||||||
choices=OCR_STATE_CHOICES,
|
|
||||||
default=OCR_STATE_ENABLED,
|
|
||||||
verbose_name=_(u'state'))
|
|
||||||
|
|
||||||
#objects = AnonymousUserSingletonManager()
|
|
||||||
|
|
||||||
def __unicode__(self):
|
|
||||||
return ugettext('OCR processing')
|
|
||||||
|
|
||||||
def disable(self):
|
|
||||||
if self.state == OCR_STATE_DISABLED:
|
|
||||||
raise OCRProcessingAlreadyDisabled
|
|
||||||
|
|
||||||
self.state = OCR_STATE_DISABLED
|
|
||||||
self.save()
|
|
||||||
|
|
||||||
def enable(self):
|
|
||||||
if self.state == OCR_STATE_ENABLED:
|
|
||||||
raise OCRProcessingAlreadyEnabled
|
|
||||||
|
|
||||||
self.state = OCR_STATE_ENABLED
|
|
||||||
self.save()
|
|
||||||
|
|
||||||
def is_enabled(self):
|
|
||||||
return self.state == OCR_STATE_ENABLED
|
|
||||||
|
|
||||||
class Meta:
|
|
||||||
verbose_name = verbose_name_plural = _(u'OCR processing properties')
|
|
||||||
@@ -6,16 +6,16 @@ import subprocess
|
|||||||
|
|
||||||
from django.utils.translation import ugettext as _
|
from django.utils.translation import ugettext as _
|
||||||
|
|
||||||
#from converter import office_converter
|
from converter import office_converter
|
||||||
#from converter.office_converter import OfficeConverter
|
from converter.office_converter import OfficeConverter
|
||||||
#from converter.exceptions import OfficeConversionError
|
from converter.exceptions import OfficeConversionError
|
||||||
#from documents.utils import document_save_to_temp_dir
|
from documents.utils import document_save_to_temp_dir
|
||||||
#from common.utils import copyfile
|
from common.utils import copyfile
|
||||||
#from common.settings import TEMPORARY_DIRECTORY
|
from common.settings import TEMPORARY_DIRECTORY
|
||||||
#from common.textparser import TextParser as OriginalTextParser, TEXT_PARSER_MIMETYPES
|
from common.textparser import TextParser as OriginalTextParser, TEXT_PARSER_MIMETYPES
|
||||||
|
|
||||||
from ocr.parsers.exceptions import ParserError, ParserUnknownFile
|
from ocr.parsers.exceptions import ParserError, ParserUnknownFile
|
||||||
#from ocr.settings import PDFTOTEXT_PATH
|
from ocr.settings import PDFTOTEXT_PATH
|
||||||
|
|
||||||
|
|
||||||
mimetype_registry = {}
|
mimetype_registry = {}
|
||||||
|
|||||||
71
apps/ocr/post_init.py
Normal file
71
apps/ocr/post_init.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from django.db import transaction
|
||||||
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
from django.utils.translation import ugettext
|
||||||
|
from django.db.models.signals import post_save, post_syncdb
|
||||||
|
from django.dispatch import receiver
|
||||||
|
from django.db.utils import DatabaseError
|
||||||
|
|
||||||
|
from navigation.api import (bind_links, register_multi_item_links,
|
||||||
|
register_multi_item_links)
|
||||||
|
from documents.models import Document, DocumentVersion
|
||||||
|
from maintenance.api import MaintenanceNamespace
|
||||||
|
from acls.api import class_permissions
|
||||||
|
from job_processor.models import JobQueue, JobType
|
||||||
|
from job_processor.exceptions import JobQueuePushError
|
||||||
|
|
||||||
|
from .settings import AUTOMATIC_OCR
|
||||||
|
from .api import do_document_ocr
|
||||||
|
from .permissions import PERMISSION_OCR_DOCUMENT
|
||||||
|
from .exceptions import AlreadyQueued
|
||||||
|
from .literals import OCR_QUEUE_NAME
|
||||||
|
from .links import (submit_document, ocr_disable,
|
||||||
|
ocr_enable, all_document_ocr_cleanup, ocr_log,
|
||||||
|
ocr_tool_link, submit_document_multiple)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
ocr_job_queue = None
|
||||||
|
|
||||||
|
|
||||||
|
@transaction.commit_on_success
|
||||||
|
def create_ocr_job_queue():
|
||||||
|
global ocr_job_queue
|
||||||
|
try:
|
||||||
|
ocr_job_queue, created = JobQueue.objects.get_or_create(name=OCR_QUEUE_NAME, defaults={'label': _('OCR'), 'unique_jobs': True})
|
||||||
|
except DatabaseError:
|
||||||
|
transaction.rollback()
|
||||||
|
|
||||||
|
|
||||||
|
@receiver(post_save, dispatch_uid='document_post_save', sender=DocumentVersion)
|
||||||
|
def document_post_save(sender, instance, **kwargs):
|
||||||
|
logger.debug('received post save signal')
|
||||||
|
logger.debug('instance: %s' % instance)
|
||||||
|
if kwargs.get('created', False):
|
||||||
|
if AUTOMATIC_OCR:
|
||||||
|
try:
|
||||||
|
instance.submit_for_ocr()
|
||||||
|
except JobQueuePushError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def init_ocr_app():
|
||||||
|
bind_links([Document], [submit_document])
|
||||||
|
bind_links([OCRProcessingSingleton], [ocr_disable, ocr_enable])
|
||||||
|
|
||||||
|
#namespace = MaintenanceNamespace(label=_(u'OCR'))
|
||||||
|
#namespace.create_tool(all_document_ocr_cleanup)
|
||||||
|
|
||||||
|
register_multi_item_links(['folder_view', 'search', 'results', 'index_instance_node_view', 'document_find_duplicates', 'document_type_document_list', 'document_group_view', 'document_list', 'document_list_recent'], [submit_document_multiple])
|
||||||
|
|
||||||
|
class_permissions(Document, [
|
||||||
|
PERMISSION_OCR_DOCUMENT,
|
||||||
|
])
|
||||||
|
|
||||||
|
create_ocr_job_queue()
|
||||||
|
ocr_job_type = JobType('ocr', _(u'OCR'), do_document_ocr)
|
||||||
|
|
||||||
|
Document.add_to_class('submit_for_ocr', lambda document: ocr_job_queue.push(ocr_job_type, document_version_pk=document.latest_version.pk))
|
||||||
|
DocumentVersion.add_to_class('submit_for_ocr', lambda document_version: ocr_job_queue.push(ocr_job_type, document_version_pk=document_version.pk))
|
||||||
@@ -2,60 +2,52 @@ from __future__ import absolute_import
|
|||||||
|
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
from smart_settings import LocalScope
|
from smart_settings import LocalScope, ClusterScope
|
||||||
|
|
||||||
from .icons import icon_submit_document
|
from .icons import icon_submit_document
|
||||||
|
from .literals import (DEFAULT_TESSERACT_PATH, DEFAULT_TESSERACT_LANGUAGE,
|
||||||
|
DEFAULT_REPLICATION_DELAY, DEFAULT_UNPAPER_PATH, DEFAULT_PDFTOTEXT_PATH)
|
||||||
|
from .links import all_document_ocr_cleanup
|
||||||
|
|
||||||
label = _(u'OCR')
|
label = _(u'OCR')
|
||||||
description = _(u'Handles optical character recognition.')
|
description = _(u'Handles optical character recognition.')
|
||||||
icon = icon_submit_document
|
icon = icon_submit_document
|
||||||
dependencies = ['app_registry', 'icons', 'navigation']
|
dependencies = ['app_registry', 'icons', 'navigation']
|
||||||
|
#maintenance_links = [all_document_ocr_cleanup]
|
||||||
settings = [
|
settings = [
|
||||||
|
{
|
||||||
|
'name': 'AUTOMATIC_OCR',
|
||||||
|
'default': True,
|
||||||
|
'description': _(u'Automatically queue newly created documents for OCR.'),
|
||||||
|
'scopes': [ClusterScope()]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'name': 'TESSERACT_PATH',
|
'name': 'TESSERACT_PATH',
|
||||||
'default': u'/usr/bin/tesseract',
|
'default': DEFAULT_TESSERACT_PATH,
|
||||||
'exists': True,
|
'exists': True,
|
||||||
'scopes': [LocalScope()]
|
'scopes': [LocalScope()]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'name': 'TESSERACT_LANGUAGE',
|
'name': 'TESSERACT_LANGUAGE',
|
||||||
'default': u'eng',
|
'default': DEFAULT_TESSERACT_LANGUAGE,
|
||||||
'scopes': [LocalScope()]
|
'scopes': [ClusterScope()]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'name': 'REPLICATION_DELAY',
|
'name': 'REPLICATION_DELAY',
|
||||||
'default': 0,
|
'default': DEFAULT_REPLICATION_DELAY,
|
||||||
'description': _(u'Amount of seconds to delay OCR of documents to allow for the node\'s storage replication overhead.'),
|
'description': _(u'Amount of seconds to delay OCR of documents to allow for the node\'s storage replication overhead.'),
|
||||||
'scopes': [LocalScope()]
|
'scopes': [LocalScope()]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'name': 'NODE_CONCURRENT_EXECUTION',
|
|
||||||
'default': 1,
|
|
||||||
'description': _(u'Maximum amount of concurrent document OCRs a node can perform.'),
|
|
||||||
'scopes': [LocalScope()]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'name': 'AUTOMATIC_OCR',
|
|
||||||
'default': True,
|
|
||||||
'description': _(u'Automatically queue newly created documents for OCR.'),
|
|
||||||
'scopes': [LocalScope()]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'name': 'QUEUE_PROCESSING_INTERVAL',
|
|
||||||
'default': 10,
|
|
||||||
'description': _(u'Automatically queue newly created documents for OCR.'),
|
|
||||||
'scopes': [LocalScope()]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'name': 'UNPAPER_PATH',
|
'name': 'UNPAPER_PATH',
|
||||||
'default': u'/usr/bin/unpaper',
|
'default': DEFAULT_UNPAPER_PATH,
|
||||||
'description': _(u'File path to unpaper program.'),
|
'description': _(u'File path to unpaper program.'),
|
||||||
'exists': True,
|
'exists': True,
|
||||||
'scopes': [LocalScope()]
|
'scopes': [LocalScope()]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'name': 'PDFTOTEXT_PATH',
|
'name': 'PDFTOTEXT_PATH',
|
||||||
'default': u'/usr/bin/pdftotext',
|
'default': DEFAULT_PDFTOTEXT_PATH,
|
||||||
'description': _(u'File path to poppler\'s pdftotext program used to extract text from PDF files.'),
|
'description': _(u'File path to poppler\'s pdftotext program used to extract text from PDF files.'),
|
||||||
'exists': True,
|
'exists': True,
|
||||||
'scopes': [LocalScope()]
|
'scopes': [LocalScope()]
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
from django.conf.urls.defaults import patterns, url
|
from django.conf.urls.defaults import patterns, url
|
||||||
|
|
||||||
urlpatterns = patterns('ocr.views',
|
urlpatterns = patterns('ocr.views',
|
||||||
url(r'^processing/enable/$', 'ocr_enable', (), 'ocr_enable'),
|
|
||||||
url(r'^processing/disable/$', 'ocr_disable', (), 'ocr_disable'),
|
|
||||||
|
|
||||||
url(r'^document/(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'),
|
url(r'^document/(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'),
|
||||||
url(r'^document/multiple/submit/$', 'submit_document_multiple', (), 'submit_document_multiple'),
|
url(r'^document/multiple/submit/$', 'submit_document_multiple', (), 'submit_document_multiple'),
|
||||||
|
|
||||||
|
|||||||
@@ -19,11 +19,7 @@ from job_processor.exceptions import JobQueuePushError
|
|||||||
from .permissions import (PERMISSION_OCR_DOCUMENT,
|
from .permissions import (PERMISSION_OCR_DOCUMENT,
|
||||||
PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE,
|
PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE,
|
||||||
PERMISSION_OCR_CLEAN_ALL_PAGES, PERMISSION_OCR_QUEUE_EDIT)
|
PERMISSION_OCR_CLEAN_ALL_PAGES, PERMISSION_OCR_QUEUE_EDIT)
|
||||||
from .models import OCRProcessingSingleton
|
|
||||||
from .exceptions import (AlreadyQueued, ReQueueError, OCRProcessingAlreadyDisabled,
|
|
||||||
OCRProcessingAlreadyEnabled)
|
|
||||||
from .api import clean_pages
|
from .api import clean_pages
|
||||||
from . import ocr_job_queue, ocr_job_type
|
|
||||||
|
|
||||||
|
|
||||||
# {'name': _(u'document'), 'attribute': encapsulate(lambda x: document_link(x.document_version.document) if hasattr(x, 'document_version') else _(u'Missing document.'))},
|
# {'name': _(u'document'), 'attribute': encapsulate(lambda x: document_link(x.document_version.document) if hasattr(x, 'document_version') else _(u'Missing document.'))},
|
||||||
@@ -32,58 +28,6 @@ from . import ocr_job_queue, ocr_job_type
|
|||||||
# {'name': _('submitted'), 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True},
|
# {'name': _('submitted'), 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True},
|
||||||
|
|
||||||
|
|
||||||
def ocr_disable(request):
|
|
||||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE])
|
|
||||||
|
|
||||||
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
|
|
||||||
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
|
|
||||||
|
|
||||||
if request.method == 'POST':
|
|
||||||
try:
|
|
||||||
OCRProcessingSingleton.get().disable()
|
|
||||||
except OCRProcessingAlreadyDisabled:
|
|
||||||
messages.warning(request, _(u'OCR processing already disabled.'))
|
|
||||||
return HttpResponseRedirect(previous)
|
|
||||||
else:
|
|
||||||
messages.success(request, _(u'OCR processing disabled successfully.'))
|
|
||||||
return HttpResponseRedirect(next)
|
|
||||||
|
|
||||||
return render_to_response('generic_confirm.html', {
|
|
||||||
'queue': OCRProcessingSingleton.get(),
|
|
||||||
'navigation_object_name': 'queue',
|
|
||||||
'title': _(u'Are you sure you wish to disable OCR processing?'),
|
|
||||||
'next': next,
|
|
||||||
'previous': previous,
|
|
||||||
'form_icon': u'control_stop_blue.png',
|
|
||||||
}, context_instance=RequestContext(request))
|
|
||||||
|
|
||||||
|
|
||||||
def ocr_enable(request):
|
|
||||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE])
|
|
||||||
|
|
||||||
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
|
|
||||||
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
|
|
||||||
|
|
||||||
if request.method == 'POST':
|
|
||||||
try:
|
|
||||||
OCRProcessingSingleton.get().enable()
|
|
||||||
except OCRProcessingAlreadyDisabled:
|
|
||||||
messages.warning(request, _(u'OCR processing already enabled.'))
|
|
||||||
return HttpResponseRedirect(previous)
|
|
||||||
else:
|
|
||||||
messages.success(request, _(u'OCR processing enabled successfully.'))
|
|
||||||
return HttpResponseRedirect(next)
|
|
||||||
|
|
||||||
return render_to_response('generic_confirm.html', {
|
|
||||||
'queue': OCRProcessingSingleton.get(),
|
|
||||||
'navigation_object_name': 'queue',
|
|
||||||
'title': _(u'Are you sure you wish to enable OCR processing?'),
|
|
||||||
'next': next,
|
|
||||||
'previous': previous,
|
|
||||||
'form_icon': u'control_play_blue.png',
|
|
||||||
}, context_instance=RequestContext(request))
|
|
||||||
|
|
||||||
|
|
||||||
def submit_document_multiple(request):
|
def submit_document_multiple(request):
|
||||||
for item_id in request.GET.get('id_list', '').split(','):
|
for item_id in request.GET.get('id_list', '').split(','):
|
||||||
submit_document(request, item_id)
|
submit_document(request, item_id)
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ Overview
|
|||||||
#TODO: add clustering
|
#TODO: add clustering
|
||||||
#TODO: add local jobs & distributed job processing
|
#TODO: add local jobs & distributed job processing
|
||||||
#TODO: removal of DISABLE_HOME_VIEW
|
#TODO: removal of DISABLE_HOME_VIEW
|
||||||
|
#TODO: removal of OCR_DEFAULT_NODE_CONCURRENT_EXECUTION, OCR_QUEUE_PROCESSING_INTERVAL
|
||||||
|
|
||||||
What's new in Mayan EDMS v0.13
|
What's new in Mayan EDMS v0.13
|
||||||
==============================
|
==============================
|
||||||
|
|||||||
@@ -178,7 +178,7 @@ INSTALLED_APPS = (
|
|||||||
#'document_signatures',
|
#'document_signatures',
|
||||||
'linking',
|
'linking',
|
||||||
'metadata',
|
'metadata',
|
||||||
#'ocr',
|
'ocr',
|
||||||
'main',
|
'main',
|
||||||
#'installation',
|
#'installation',
|
||||||
#'document_indexing',
|
#'document_indexing',
|
||||||
|
|||||||
2
urls.py
2
urls.py
@@ -15,7 +15,7 @@ urlpatterns = patterns('',
|
|||||||
(r'^documents/', include('documents.urls')),
|
(r'^documents/', include('documents.urls')),
|
||||||
(r'^folders/', include('folders.urls')),
|
(r'^folders/', include('folders.urls')),
|
||||||
(r'^search/', include('dynamic_search.urls')),
|
(r'^search/', include('dynamic_search.urls')),
|
||||||
#(r'^ocr/', include('ocr.urls')),
|
(r'^ocr/', include('ocr.urls')),
|
||||||
(r'^tags/', include('tags.urls')),
|
(r'^tags/', include('tags.urls')),
|
||||||
(r'^comments/', include('document_comments.urls')),
|
(r'^comments/', include('document_comments.urls')),
|
||||||
(r'^user_management/', include('user_management.urls')),
|
(r'^user_management/', include('user_management.urls')),
|
||||||
|
|||||||
Reference in New Issue
Block a user