diff --git a/mayan/apps/ocr/__init__.py b/mayan/apps/ocr/__init__.py index c4be0a4539..4f4e79d184 100644 --- a/mayan/apps/ocr/__init__.py +++ b/mayan/apps/ocr/__init__.py @@ -1,61 +1,75 @@ -from __future__ import absolute_import +from __future__ import unicode_literals import logging from django.dispatch import receiver from django.utils.translation import ugettext_lazy as _ -from south.signals import post_migrate - from acls.api import class_permissions +from common.utils import encapsulate from documents.models import Document, DocumentVersion from documents.signals import post_version_upload +from documents.widgets import document_link from main.api import register_maintenance_links -from navigation.api import register_links +from navigation.api import register_links, register_model_list_columns from navigation.links import link_spacer from project_tools.api import register_tool from rest_api.classes import APIEndPoint -from .links import (all_document_ocr_cleanup, ocr_tool_link, - queue_document_list, queue_document_multiple_delete, - re_queue_multiple_document, submit_document, - submit_document_multiple) -from .models import DocumentQueue +from .links import ( + link_document_all_ocr_cleanup, link_document_submit, + link_document_submit_multiple, link_entry_delete, + link_entry_delete_multiple, link_entry_list, link_entry_re_queue, + link_entry_re_queue_multiple +) +from .models import DocumentVersionOCRError from .permissions import PERMISSION_OCR_DOCUMENT from .tasks import task_do_ocr logger = logging.getLogger(__name__) -register_links(Document, [submit_document]) -register_links([Document], [submit_document_multiple, link_spacer], menu_name='multi_item_links') -register_links(['ocr:queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete]) -register_links(['ocr:queue_document_list'], [queue_document_list], menu_name='secondary_menu') +register_links(Document, [link_document_submit]) +register_links([Document], [link_document_submit_multiple, link_spacer], menu_name='multi_item_links') -register_maintenance_links([all_document_ocr_cleanup], namespace='ocr', title=_(u'OCR')) +register_links([DocumentVersionOCRError], [link_entry_re_queue_multiple, link_entry_delete_multiple, link_spacer], menu_name='multi_item_links') +register_links([DocumentVersionOCRError], [link_entry_re_queue, link_entry_delete]) +register_links(['ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError], [link_entry_list], menu_name='secondary_menu') +register_maintenance_links([link_document_all_ocr_cleanup], namespace='ocr', title=_('OCR')) def document_ocr_submit(self): task_do_ocr.apply_async(args=[self.pk], queue='ocr') +def document_version_ocr_submit(self): + task_do_ocr.apply_async(args=[self.document.pk], queue='ocr') + + @receiver(post_version_upload, dispatch_uid='post_version_upload_ocr', sender=DocumentVersion) def post_version_upload_ocr(sender, instance, **kwargs): logger.debug('received post_version_upload') - logger.debug('instance.document: %s', instance.document) + logger.debug('instance pk: %s', instance.pk) if instance.document.document_type.ocr: - instance.document.submit_for_ocr() - - -@receiver(post_migrate, dispatch_uid='create_default_queue') -def create_default_queue_signal_handler(sender, **kwargs): - if kwargs['app'] == 'ocr': - DocumentQueue.objects.get_or_create(name='default') + instance.submit_for_ocr() Document.add_to_class('submit_for_ocr', document_ocr_submit) +DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit) class_permissions(Document, [PERMISSION_OCR_DOCUMENT]) -register_tool(ocr_tool_link) +register_tool(link_entry_list) APIEndPoint('ocr') + +register_model_list_columns(DocumentVersionOCRError, [ + { + 'name': _('Document'), 'attribute': encapsulate(lambda entry: document_link(entry.document_version.document)) + }, + { + 'name': _('Added'), 'attribute': 'datetime_submitted' + }, + { + 'name': _('Result'), 'attribute': 'result' + }, +]) diff --git a/mayan/apps/ocr/admin.py b/mayan/apps/ocr/admin.py index 7cae1df462..cd434cc85f 100644 --- a/mayan/apps/ocr/admin.py +++ b/mayan/apps/ocr/admin.py @@ -1,20 +1,13 @@ -from __future__ import absolute_import +from __future__ import unicode_literals from django.contrib import admin -from .models import DocumentQueue, QueueDocument +from .models import DocumentVersionOCRError -class QueueDocumentInline(admin.StackedInline): - model = QueueDocument - extra = 1 - classes = ('collapse-open',) - allow_add = True +class DocumentVersionOCRErrorAdmin(admin.ModelAdmin): + list_display = ('document_version', 'datetime_submitted') + readonly_fields = ('document_version', 'datetime_submitted', 'result') -class DocumentQueueAdmin(admin.ModelAdmin): - inlines = [QueueDocumentInline] - list_display = ('name', 'label') - - -admin.site.register(DocumentQueue, DocumentQueueAdmin) +admin.site.register(DocumentVersionOCRError, DocumentVersionOCRErrorAdmin) diff --git a/mayan/apps/ocr/api.py b/mayan/apps/ocr/api.py index f0189a09df..b5e772019a 100644 --- a/mayan/apps/ocr/api.py +++ b/mayan/apps/ocr/api.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import unicode_literals import logging import os @@ -30,14 +30,14 @@ except sh.CommandNotFound: UNPAPER = None -def do_document_ocr(document): +def do_document_ocr(document_version): """ Try first to extract text from document pages using the registered parser, if the parser fails or if there is no parser registered for the document mimetype do a visual OCR by calling the corresponding OCR backend """ - for document_page in document.pages.all(): + for document_page in document_version.pages.all(): try: # Try to extract text by means of a parser parse_document_page(document_page) @@ -68,10 +68,10 @@ def do_document_ocr(document): os.rename(pre_ocr_filepath, pre_ocr_filepath_w_ext) try: - ocr_text = ocr_backend.execute(pre_ocr_filepath_w_ext, document.language) + ocr_text = ocr_backend.execute(pre_ocr_filepath_w_ext, document_version.document.language) - document_page.content = ocr_cleanup(document.language, ocr_text) - document_page.page_label = _(u'Text from OCR') + document_page.content = ocr_cleanup(document_version.document.language, ocr_text) + document_page.page_label = _('Text from OCR') document_page.save() finally: fs_cleanup(pre_ocr_filepath_w_ext) @@ -86,7 +86,7 @@ def ocr_cleanup(language, text): cleanup filter """ try: - language_backend = load_backend(u'.'.join([u'ocr', u'lang', language, u'LanguageBackend']))() + language_backend = load_backend('.'.join(['ocr', 'lang', language, 'LanguageBackend']))() except ImportError: language_backend = None @@ -104,9 +104,9 @@ def ocr_cleanup(language, text): result = word if result: output.append(result) - output.append(u'\n') + output.append('\n') - return u' '.join(output) + return ' '.join(output) def clean_pages(): diff --git a/mayan/apps/ocr/api_views.py b/mayan/apps/ocr/api_views.py index 220339e54f..75d5bb338a 100644 --- a/mayan/apps/ocr/api_views.py +++ b/mayan/apps/ocr/api_views.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals from django.core.exceptions import PermissionDenied from django.shortcuts import get_object_or_404 @@ -8,33 +8,33 @@ from rest_framework.response import Response from rest_framework.settings import api_settings from acls.models import AccessEntry -from documents.models import Document +from documents.models import DocumentVersion from permissions.models import Permission from rest_api.permissions import MayanPermission from .permissions import PERMISSION_OCR_DOCUMENT -from .serializers import DocumentOCRSerializer +from .serializers import DocumentVersionOCRSerializer -class DocumentOCRView(generics.GenericAPIView): - serializer_class = DocumentOCRSerializer +class DocumentVersionOCRView(generics.GenericAPIView): + serializer_class = DocumentVersionOCRSerializer permission_classes = (MayanPermission,) def post(self, request, *args, **kwargs): - """Submit document OCR queue.""" + """Submit document version for OCR.""" serializer = self.get_serializer(data=request.DATA, files=request.FILES) if serializer.is_valid(): - document = get_object_or_404(Document, pk=serializer.data['document_id']) + document_version = get_object_or_404(DocumentVersion, pk=serializer.data['document_version_id']) try: Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) except PermissionDenied: - AccessEntry.objects.check_access(PERMISSION_OCR_DOCUMENT, request.user, document) + AccessEntry.objects.check_access(PERMISSION_OCR_DOCUMENT, request.user, document_version.document) - document.submit_for_ocr() + document_version.submit_for_ocr() headers = self.get_success_headers(serializer.data) return Response(serializer.data, status=status.HTTP_202_ACCEPTED, diff --git a/mayan/apps/ocr/backends/__init__.py b/mayan/apps/ocr/backends/__init__.py index f6e245ceb8..6558a75c85 100644 --- a/mayan/apps/ocr/backends/__init__.py +++ b/mayan/apps/ocr/backends/__init__.py @@ -1,3 +1,3 @@ class BackendBase(object): - def execute(self, input_filename, language=None): # NOQA + def execute(self, input_filename, language=None): raise NotImplementedError diff --git a/mayan/apps/ocr/backends/tesseract.py b/mayan/apps/ocr/backends/tesseract.py index f0c34477c0..e36b4c043a 100644 --- a/mayan/apps/ocr/backends/tesseract.py +++ b/mayan/apps/ocr/backends/tesseract.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import unicode_literals import codecs import errno @@ -20,11 +20,11 @@ class Tesseract(BackendBase): """ fd, filepath = tempfile.mkstemp() os.close(fd) - ocr_output = os.extsep.join([filepath, u'txt']) + ocr_output = os.extsep.join([filepath, 'txt']) command = [unicode(TESSERACT_PATH), unicode(input_filename), unicode(filepath)] if language is not None: - command.extend([u'-l', language]) + command.extend(['-l', language]) try: proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) diff --git a/mayan/apps/ocr/exceptions.py b/mayan/apps/ocr/exceptions.py index 5497c92ea5..123f52160f 100644 --- a/mayan/apps/ocr/exceptions.py +++ b/mayan/apps/ocr/exceptions.py @@ -1,3 +1,6 @@ +from __future__ import unicode_literals + + class OCRError(Exception): """ Raised by the OCR backend diff --git a/mayan/apps/ocr/lang/deu.py b/mayan/apps/ocr/lang/deu.py index a3ca0383e9..ccff3eba7d 100644 --- a/mayan/apps/ocr/lang/deu.py +++ b/mayan/apps/ocr/lang/deu.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import absolute_import +from __future__ import unicode_literals import re diff --git a/mayan/apps/ocr/lang/eng.py b/mayan/apps/ocr/lang/eng.py index 29dc3384e8..5025db136d 100644 --- a/mayan/apps/ocr/lang/eng.py +++ b/mayan/apps/ocr/lang/eng.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import unicode_literals import re diff --git a/mayan/apps/ocr/lang/rus.py b/mayan/apps/ocr/lang/rus.py index 05ce0e1ab1..e7b7588358 100644 --- a/mayan/apps/ocr/lang/rus.py +++ b/mayan/apps/ocr/lang/rus.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import absolute_import +from __future__ import unicode_literals import re diff --git a/mayan/apps/ocr/lang/spa.py b/mayan/apps/ocr/lang/spa.py index eb4d9ead45..c736a69b9a 100644 --- a/mayan/apps/ocr/lang/spa.py +++ b/mayan/apps/ocr/lang/spa.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import absolute_import +from __future__ import unicode_literals import re diff --git a/mayan/apps/ocr/links.py b/mayan/apps/ocr/links.py index eb41e2b9d7..f41743f121 100644 --- a/mayan/apps/ocr/links.py +++ b/mayan/apps/ocr/links.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import unicode_literals from django.utils.translation import ugettext_lazy as _ @@ -6,14 +6,13 @@ from .permissions import (PERMISSION_OCR_CLEAN_ALL_PAGES, PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE) -submit_document = {'text': _('Submit to OCR queue'), 'view': 'ocr:submit_document', 'args': 'object.id', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]} -submit_document_multiple = {'text': _('Submit to OCR queue'), 'view': 'ocr:submit_document_multiple', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]} -re_queue_document = {'text': _('Re-queue'), 'view': 'ocr:re_queue_document', 'args': 'object.id', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]} -re_queue_multiple_document = {'text': _('Re-queue'), 'view': 'ocr:re_queue_multiple_document', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]} -queue_document_delete = {'text': _(u'Delete'), 'view': 'ocr:queue_document_delete', 'args': 'object.id', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]} -queue_document_multiple_delete = {'text': _(u'Delete'), 'view': 'ocr:queue_document_multiple_delete', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]} +link_document_submit = {'text': _('Submit to OCR queue'), 'view': 'ocr:document_submit', 'args': 'object.id', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]} +link_document_submit_multiple = {'text': _('Submit to OCR queue'), 'view': 'ocr:document_submit_multiple', 'famfam': 'hourglass_add'} +link_entry_re_queue = {'text': _('Re-queue'), 'view': 'ocr:entry_re_queue', 'args': 'object.id', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]} +link_entry_re_queue_multiple = {'text': _('Re-queue'), 'view': 'ocr:entry_re_queue_multiple', 'famfam': 'hourglass_add'} +link_entry_delete = {'text': _('Delete'), 'view': 'ocr:entry_delete', 'args': 'object.id', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]} +link_entry_delete_multiple = {'text': _('Delete'), 'view': 'ocr:entry_delete_multiple', 'famfam': 'hourglass_delete'} -all_document_ocr_cleanup = {'text': _(u'Clean up pages content'), 'view': 'ocr:all_document_ocr_cleanup', 'famfam': 'text_strikethrough', 'permissions': [PERMISSION_OCR_CLEAN_ALL_PAGES], 'description': _(u'Runs a language filter to remove common OCR mistakes from document pages content.')} +link_document_all_ocr_cleanup = {'text': _('Clean up pages content'), 'view': 'ocr:document_all_ocr_cleanup', 'famfam': 'text_strikethrough', 'permissions': [PERMISSION_OCR_CLEAN_ALL_PAGES], 'description': _('Runs a language filter to remove common OCR mistakes from document pages content.')} -queue_document_list = {'text': _(u'Queue document list'), 'view': 'ocr:queue_document_list', 'famfam': 'hourglass', 'permissions': [PERMISSION_OCR_DOCUMENT]} -ocr_tool_link = {'text': _(u'OCR'), 'view': 'ocr:queue_document_list', 'famfam': 'hourglass', 'icon': 'text.png', 'permissions': [PERMISSION_OCR_DOCUMENT]} +link_entry_list = {'text': _('OCR Errors'), 'view': 'ocr:entry_list', 'famfam': 'hourglass', 'icon': 'text.png', 'permissions': [PERMISSION_OCR_DOCUMENT]} diff --git a/mayan/apps/ocr/literals.py b/mayan/apps/ocr/literals.py index 8e80534272..3a7b1360dc 100644 --- a/mayan/apps/ocr/literals.py +++ b/mayan/apps/ocr/literals.py @@ -1,4 +1,6 @@ -DEFAULT_OCR_FILE_FORMAT = u'tiff' -DEFAULT_OCR_FILE_EXTENSION = u'tif' +from __future__ import unicode_literals + +DEFAULT_OCR_FILE_FORMAT = 'tiff' +DEFAULT_OCR_FILE_EXTENSION = 'tif' LOCK_EXPIRE = 60 * 10 # Adjust to worst case scenario -UNPAPER_FILE_FORMAT = u'ppm' +UNPAPER_FILE_FORMAT = 'ppm' diff --git a/mayan/apps/ocr/models.py b/mayan/apps/ocr/models.py index 8533dcea2a..e4c1713eb9 100644 --- a/mayan/apps/ocr/models.py +++ b/mayan/apps/ocr/models.py @@ -1,39 +1,22 @@ -from __future__ import absolute_import +from __future__ import unicode_literals from django.db import models -from django.core.exceptions import ObjectDoesNotExist -from django.utils.translation import ugettext +from django.utils.encoding import python_2_unicode_compatible from django.utils.translation import ugettext_lazy as _ -from documents.models import Document +from documents.models import DocumentVersion -class DocumentQueue(models.Model): - name = models.CharField(max_length=64, unique=True, verbose_name=_(u'Name')) - label = models.CharField(max_length=64, verbose_name=_(u'Label')) +@python_2_unicode_compatible +class DocumentVersionOCRError(models.Model): + document_version = models.ForeignKey(DocumentVersion, verbose_name=_('Document version')) + datetime_submitted = models.DateTimeField(verbose_name=_('Date time submitted'), auto_now=True, db_index=True) + result = models.TextField(blank=True, null=True, verbose_name=_('Result')) - class Meta: - verbose_name = _(u'Document queue') - verbose_name_plural = _(u'Document queues') - - def __unicode__(self): - return self.label - - -class QueueDocument(models.Model): - document_queue = models.ForeignKey(DocumentQueue, related_name='documents', verbose_name=_(u'Document queue')) - document = models.ForeignKey(Document, verbose_name=_(u'Document')) - datetime_submitted = models.DateTimeField(verbose_name=_(u'Date time submitted'), auto_now=True, db_index=True) - result = models.TextField(blank=True, null=True, verbose_name=_(u'Result')) - node_name = models.CharField(max_length=256, verbose_name=_(u'Node name'), blank=True, null=True) + def __str__(self): + return unicode(self.document_version) class Meta: ordering = ('datetime_submitted',) - verbose_name = _(u'Queue document') - verbose_name_plural = _(u'Queue documents') - - def __unicode__(self): - try: - return unicode(self.document) - except ObjectDoesNotExist: - return ugettext(u'Missing document.') + verbose_name = _('Document Version OCR Error') + verbose_name_plural = _('Document Version OCR Errors') diff --git a/mayan/apps/ocr/parsers/__init__.py b/mayan/apps/ocr/parsers/__init__.py index d599103b15..f505b3c531 100644 --- a/mayan/apps/ocr/parsers/__init__.py +++ b/mayan/apps/ocr/parsers/__init__.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import logging import os import slate @@ -90,7 +92,7 @@ class SlateParser(Parser): raise ParserError document_page.content = pdf_pages[document_page.page_number - 1] - document_page.page_label = _(u'Text extracted from PDF') + document_page.page_label = _('Text extracted from PDF') document_page.save() @@ -112,7 +114,7 @@ class OfficeParser(Parser): # Now that the office document has been converted to PDF # call the coresponding PDF parser in this new file - parse_document_page(document_page, descriptor=open(input_filepath), mimetype=u'application/pdf') + parse_document_page(document_page, descriptor=open(input_filepath), mimetype='application/pdf') else: raise ParserError @@ -126,7 +128,7 @@ class PopplerParser(Parser): PDF parser using the pdftotext execute from the poppler package """ def __init__(self): - self.pdftotext_path = PDFTOTEXT_PATH if PDFTOTEXT_PATH else u'/usr/bin/pdftotext' + self.pdftotext_path = PDFTOTEXT_PATH if PDFTOTEXT_PATH else '/usr/bin/pdftotext' if not os.path.exists(self.pdftotext_path): raise ParserError('cannot find pdftotext executable') logger.debug('self.pdftotext_path: %s', self.pdftotext_path) @@ -167,9 +169,9 @@ class PopplerParser(Parser): raise ParserError('No output') document_page.content = output - document_page.page_label = _(u'Text extracted from PDF') + document_page.page_label = _('Text extracted from PDF') document_page.save() -register_parser(mimetypes=[u'application/pdf'], parsers=[PopplerParser, SlateParser]) +register_parser(mimetypes=['application/pdf'], parsers=[PopplerParser, SlateParser]) register_parser(mimetypes=office_converter.CONVERTER_OFFICE_FILE_MIMETYPES, parsers=[OfficeParser]) diff --git a/mayan/apps/ocr/permissions.py b/mayan/apps/ocr/permissions.py index e8dbc188e7..b6bf977a6c 100644 --- a/mayan/apps/ocr/permissions.py +++ b/mayan/apps/ocr/permissions.py @@ -1,10 +1,10 @@ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals from django.utils.translation import ugettext_lazy as _ from permissions.models import Permission, PermissionNamespace -ocr_namespace = PermissionNamespace('ocr', _(u'OCR')) -PERMISSION_OCR_DOCUMENT = Permission.objects.register(ocr_namespace, 'ocr_document', _(u'Submit documents for OCR')) -PERMISSION_OCR_DOCUMENT_DELETE = Permission.objects.register(ocr_namespace, 'ocr_document_delete', _(u'Delete documents from OCR queue')) -PERMISSION_OCR_CLEAN_ALL_PAGES = Permission.objects.register(ocr_namespace, 'ocr_clean_all_pages', _(u'Can execute the OCR clean up on all document pages')) +ocr_namespace = PermissionNamespace('ocr', _('OCR')) +PERMISSION_OCR_DOCUMENT = Permission.objects.register(ocr_namespace, 'ocr_document', _('Submit documents for OCR')) +PERMISSION_OCR_DOCUMENT_DELETE = Permission.objects.register(ocr_namespace, 'ocr_document_delete', _('Delete documents from OCR queue')) +PERMISSION_OCR_CLEAN_ALL_PAGES = Permission.objects.register(ocr_namespace, 'ocr_clean_all_pages', _('Can execute the OCR clean up on all document pages')) diff --git a/mayan/apps/ocr/runtime.py b/mayan/apps/ocr/runtime.py index eef63478c0..78aef88077 100644 --- a/mayan/apps/ocr/runtime.py +++ b/mayan/apps/ocr/runtime.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from common.utils import load_backend from .settings import BACKEND diff --git a/mayan/apps/ocr/serializers.py b/mayan/apps/ocr/serializers.py index c38fb42f7c..9640050fb8 100644 --- a/mayan/apps/ocr/serializers.py +++ b/mayan/apps/ocr/serializers.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import - from rest_framework import serializers -class DocumentOCRSerializer(serializers.Serializer): - document_id = serializers.IntegerField() +class DocumentVersionOCRSerializer(serializers.Serializer): + document_version_id = serializers.IntegerField() diff --git a/mayan/apps/ocr/settings.py b/mayan/apps/ocr/settings.py index ddfec29592..7bde2b13bb 100644 --- a/mayan/apps/ocr/settings.py +++ b/mayan/apps/ocr/settings.py @@ -1,16 +1,16 @@ -"""Configuration options for the ocr app""" +from __future__ import unicode_literals from django.utils.translation import ugettext_lazy as _ from smart_settings.api import register_settings register_settings( - namespace=u'ocr', - module=u'ocr.settings', + namespace='ocr', + module='ocr.settings', settings=[ - {'name': u'TESSERACT_PATH', 'global_name': u'OCR_TESSERACT_PATH', 'default': u'/usr/bin/tesseract', 'exists': True}, - {'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True}, - {'name': u'PDFTOTEXT_PATH', 'global_name': u'OCR_PDFTOTEXT_PATH', 'default': u'/usr/bin/pdftotext', 'description': _(u'File path to poppler\'s pdftotext program used to extract text from PDF files.'), 'exists': True}, - {'name': u'BACKEND', 'global_name': u'OCR_BACKEND', 'default': u'ocr.backends.tesseract.Tesseract', 'description': _(u'Full path to the backend to be used to do OCR.')}, + {'name': 'TESSERACT_PATH', 'global_name': 'OCR_TESSERACT_PATH', 'default': '/usr/bin/tesseract', 'exists': True}, + {'name': 'UNPAPER_PATH', 'global_name': 'OCR_UNPAPER_PATH', 'default': '/usr/bin/unpaper', 'description': _('File path to unpaper program.'), 'exists': True}, + {'name': 'PDFTOTEXT_PATH', 'global_name': 'OCR_PDFTOTEXT_PATH', 'default': '/usr/bin/pdftotext', 'description': _('File path to poppler\'s pdftotext program used to extract text from PDF files.'), 'exists': True}, + {'name': 'BACKEND', 'global_name': 'OCR_BACKEND', 'default': 'ocr.backends.tesseract.Tesseract', 'description': _('Full path to the backend to be used to do OCR.')}, ] ) diff --git a/mayan/apps/ocr/south_migrations/0004_auto__del_documentqueue__del_queuedocument__add_documentversionocrerro.py b/mayan/apps/ocr/south_migrations/0004_auto__del_documentqueue__del_queuedocument__add_documentversionocrerro.py new file mode 100644 index 0000000000..ff4777e7ca --- /dev/null +++ b/mayan/apps/ocr/south_migrations/0004_auto__del_documentqueue__del_queuedocument__add_documentversionocrerro.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +from south.utils import datetime_utils as datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Deleting model 'DocumentQueue' + db.delete_table(u'ocr_documentqueue') + + # Deleting model 'QueueDocument' + db.delete_table(u'ocr_queuedocument') + + # Adding model 'DocumentVersionOCRError' + db.create_table(u'ocr_documentversionocrerror', ( + (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('document_version', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['documents.DocumentVersion'])), + ('datetime_submitted', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, db_index=True, blank=True)), + ('result', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), + )) + db.send_create_signal(u'ocr', ['DocumentVersionOCRError']) + + + def backwards(self, orm): + # Adding model 'DocumentQueue' + db.create_table(u'ocr_documentqueue', ( + (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('name', self.gf('django.db.models.fields.CharField')(max_length=64, unique=True)), + ('label', self.gf('django.db.models.fields.CharField')(max_length=64)), + )) + db.send_create_signal(u'ocr', ['DocumentQueue']) + + # Adding model 'QueueDocument' + db.create_table(u'ocr_queuedocument', ( + ('node_name', self.gf('django.db.models.fields.CharField')(max_length=256, null=True, blank=True)), + ('result', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), + ('datetime_submitted', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, blank=True, db_index=True)), + ('document_queue', self.gf('django.db.models.fields.related.ForeignKey')(related_name='documents', to=orm['ocr.DocumentQueue'])), + ('document', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['documents.Document'])), + (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + )) + db.send_create_signal(u'ocr', ['QueueDocument']) + + # Deleting model 'DocumentVersionOCRError' + db.delete_table(u'ocr_documentversionocrerror') + + + models = { + u'documents.document': { + 'Meta': {'ordering': "['-date_added']", 'object_name': 'Document'}, + 'date_added': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'document_type': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'documents'", 'to': u"orm['documents.DocumentType']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'label': ('django.db.models.fields.CharField', [], {'default': "u'Uninitialized document'", 'max_length': '255', 'db_index': 'True'}), + 'language': ('django.db.models.fields.CharField', [], {'default': "u'eng'", 'max_length': '8'}), + 'uuid': ('django.db.models.fields.CharField', [], {'default': "u'b5b498b5-ffe5-4b70-b8a6-6c875ed11bf2'", 'max_length': '48'}) + }, + u'documents.documenttype': { + 'Meta': {'ordering': "['name']", 'object_name': 'DocumentType'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}), + 'ocr': ('django.db.models.fields.BooleanField', [], {'default': 'True'}) + }, + u'documents.documentversion': { + 'Meta': {'object_name': 'DocumentVersion'}, + 'checksum': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'comment': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'document': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'versions'", 'to': u"orm['documents.Document']"}), + 'encoding': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True', 'blank': 'True'}), + 'file': ('django.db.models.fields.files.FileField', [], {'max_length': '100'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'mimetype': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'timestamp': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}) + }, + u'ocr.documentversionocrerror': { + 'Meta': {'ordering': "('datetime_submitted',)", 'object_name': 'DocumentVersionOCRError'}, + 'datetime_submitted': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'db_index': 'True', 'blank': 'True'}), + 'document_version': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.DocumentVersion']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'result': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}) + } + } + + complete_apps = ['ocr'] \ No newline at end of file diff --git a/mayan/apps/ocr/tasks.py b/mayan/apps/ocr/tasks.py index 0f506e103f..a7bebddc68 100644 --- a/mayan/apps/ocr/tasks.py +++ b/mayan/apps/ocr/tasks.py @@ -1,65 +1,61 @@ -from __future__ import absolute_import +from __future__ import unicode_literals import logging -import platform import sys import traceback from django.conf import settings -from documents.models import Document +from documents.models import DocumentVersion from lock_manager import Lock, LockError from mayan.celery import app from .api import do_document_ocr from .literals import LOCK_EXPIRE -from .models import DocumentQueue, QueueDocument +from .models import DocumentVersionOCRError logger = logging.getLogger(__name__) @app.task(ignore_result=True) -def task_do_ocr(document_pk): - lock_id = u'task_do_ocr_doc-%d' % document_pk +def task_do_ocr(document_version_pk): + lock_id = 'task_do_ocr_doc_version-%d' % document_version_pk try: logger.debug('trying to acquire lock: %s', lock_id) - # Acquire lock to avoid doing OCR on the same document more than once - # concurrently + # Acquire lock to avoid doing OCR on the same document version more than + # once concurrently lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) logger.debug('acquired lock: %s', lock_id) - document = None + document_version = None try: - logger.info('Starting document OCR for document: %d', document_pk) - document = Document.objects.get(pk=document_pk) - do_document_ocr(document) + logger.info('Starting document OCR for document version: %d', document_version_pk) + document_version = DocumentVersion.objects.get(pk=document_version_pk) + do_document_ocr(document_version) except Exception as exception: - logger.error('OCR error for document: %d; %s', document_pk, exception) - document_queue = DocumentQueue.objects.get(name='default') - if document: - queue_document, created = document_queue.documents.get_or_create(document=document) - queue_document.node_name = platform.node() + logger.error('OCR error for document version: %d; %s', document_version_pk, exception) + if document_version: + entry, created = DocumentVersionOCRError.objects.get_or_create(document_version=document_version) if settings.DEBUG: result = [] type, value, tb = sys.exc_info() result.append('%s: %s' % (type.__name__, value)) result.extend(traceback.format_tb(tb)) - queue_document.result = '\n'.join(result) + entry.result = '\n'.join(result) else: - queue_document.result = exception + entry.result = exception - queue_document.save() + entry.save() else: - logger.info('OCR for document: %d ended', document_pk) - document_queue = DocumentQueue.objects.get(name='default') + logger.info('OCR for document: %d ended', document_version_pk) try: - queue_document = document_queue.documents.get(document=document) - except QueueDocument.DoesNotExist: + entry = DocumentVersionOCRError.objects.get(document_version=document_version) + except DocumentVersionOCRError.DoesNotExist: pass else: - queue_document.delete() + entry.delete() finally: lock.release() except LockError: - logger.debug('unable to obtain lock') + logger.debug('unable to obtain lock: %s' % lock_id) pass diff --git a/mayan/apps/ocr/tests.py b/mayan/apps/ocr/tests.py index a6f48f4c4a..efa0438838 100644 --- a/mayan/apps/ocr/tests.py +++ b/mayan/apps/ocr/tests.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import unicode_literals from django.core.files.base import File from django.test import TransactionTestCase @@ -6,8 +6,6 @@ from django.test import TransactionTestCase from documents.models import Document, DocumentType from documents.tests import TEST_SMALL_DOCUMENT_PATH, TEST_DOCUMENT_TYPE -from .models import DocumentQueue, QueueDocument - class DocumentOCRTestCase(TransactionTestCase): def setUp(self): @@ -16,11 +14,6 @@ class DocumentOCRTestCase(TransactionTestCase): with open(TEST_SMALL_DOCUMENT_PATH) as file_object: self.document = Document.objects.new_document(file_object=File(file_object), document_type=self.document_type)[0].document - DocumentQueue.objects.get_or_create(name='default') - - # Clear OCR queue - QueueDocument.objects.all().delete() - def _test_ocr_language_issue_16(self, language, result): """ Reusable OCR test for a specific language diff --git a/mayan/apps/ocr/urls.py b/mayan/apps/ocr/urls.py index 4b4f5b8436..629f9e9888 100644 --- a/mayan/apps/ocr/urls.py +++ b/mayan/apps/ocr/urls.py @@ -1,19 +1,21 @@ +from __future__ import unicode_literals + from django.conf.urls import patterns, url -from .api_views import DocumentOCRView +from .api_views import DocumentVersionOCRView urlpatterns = patterns('ocr.views', - url(r'^document/(?P\d+)/submit/$', 'submit_document', (), 'submit_document'), - url(r'^document/multiple/submit/$', 'submit_document_multiple', (), 'submit_document_multiple'), - url(r'^queue/document/list/$', 'queue_document_list', (), 'queue_document_list'), - url(r'^queue/document/(?P\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'), - url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'), - url(r'^queue/document/(?P\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'), - url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'), + url(r'^document/(?P\d+)/submit/$', 'document_submit', (), 'document_submit'), + url(r'^document/multiple/submit/$', 'document_submit_multiple', (), 'document_submit_multiple'), + url(r'^document/all/clean_up/$', 'document_all_ocr_cleanup', (), 'document_all_ocr_cleanup'), - url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'), + url(r'^all/$', 'entry_list', (), 'entry_list'), + url(r'^(?P\d+)/delete/$', 'entry_delete', (), 'entry_delete'), + url(r'^multiple/delete/$', 'entry_delete_multiple', (), 'entry_delete_multiple'), + url(r'^(?P\d+)/re-queue/$', 'entry_re_queue', (), 'entry_re_queue'), + url(r'^multiple/re-queue/$', 'entry_re_queue_multiple', (), 'entry_re_queue_multiple'), ) api_urls = patterns('', - url(r'^submit/$', DocumentOCRView.as_view(), name='document-ocr-submit-view'), + url(r'^submit/$', DocumentVersionOCRView.as_view(), name='document-version-ocr-submit-view'), ) diff --git a/mayan/apps/ocr/views.py b/mayan/apps/ocr/views.py index 416fd9801e..ae4b80c1a3 100644 --- a/mayan/apps/ocr/views.py +++ b/mayan/apps/ocr/views.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals from django.contrib import messages from django.core.exceptions import PermissionDenied @@ -6,173 +6,43 @@ from django.core.urlresolvers import reverse from django.http import HttpResponseRedirect from django.shortcuts import get_object_or_404, render_to_response from django.template import RequestContext -from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import ugettext_lazy as _, ungettext from acls.models import AccessEntry -from common.utils import encapsulate -from documents.models import Document -from documents.widgets import document_link, document_thumbnail +from documents.models import Document, DocumentVersion from permissions.models import Permission from .api import clean_pages -from .models import DocumentQueue, QueueDocument +from .models import DocumentVersionOCRError from .permissions import (PERMISSION_OCR_CLEAN_ALL_PAGES, PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE) -def queue_document_list(request, queue_name='default'): - Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) - - document_queue = get_object_or_404(DocumentQueue, name=queue_name) - - context = { - 'object_list': document_queue.documents.all(), - 'title': _(u'Documents in queue: %s') % document_queue, - 'hide_object': True, - 'queue': document_queue, - 'navigation_object_name': 'queue', - 'list_object_variable_name': 'queue_document', - 'extra_columns': [ - {'name': _('Document'), 'attribute': encapsulate(lambda x: document_link(x.document) if hasattr(x, 'document') else _(u'Missing document.'))}, - {'name': _(u'Thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document))}, - {'name': _('Added'), 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True}, - {'name': _('Node'), 'attribute': 'node_name'}, - {'name': _('Result'), 'attribute': 'result'}, - ], - } - - return render_to_response('main/generic_list.html', context, - context_instance=RequestContext(request)) - - -def queue_document_delete(request, queue_document_id=None, queue_document_id_list=None): - Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT_DELETE]) - - if queue_document_id: - queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id)] - elif queue_document_id_list: - queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id) for queue_document_id in queue_document_id_list.split(',')] - else: - messages.error(request, _(u'Must provide at least one queue document.')) - return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home'))) - - next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) - previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) - - if request.method == 'POST': - for queue_document in queue_documents: - try: - queue_document.delete() - messages.success(request, _(u'Queue document: %(document)s deleted successfully.') % { - 'document': queue_document.document}) - - except Exception as exception: - messages.error(request, _(u'Error deleting document: %(document)s; %(error)s') % { - 'document': queue_document, 'error': exception}) - return HttpResponseRedirect(next) - - context = { - 'next': next, - 'previous': previous, - 'delete_view': True, - } - - if len(queue_documents) == 1: - context['object'] = queue_documents[0] - context['title'] = _(u'Are you sure you wish to delete queue document: %s?') % ', '.join([unicode(d) for d in queue_documents]) - elif len(queue_documents) > 1: - context['title'] = _(u'Are you sure you wish to delete queue documents: %s?') % ', '.join([unicode(d) for d in queue_documents]) - - return render_to_response('main/generic_confirm.html', context, - context_instance=RequestContext(request)) - - -def queue_document_multiple_delete(request): - return queue_document_delete(request, queue_document_id_list=request.GET.get('id_list', '')) - - -def submit_document_multiple(request): - for item_id in request.GET.get('id_list', '').split(','): - submit_document(request, item_id) - - return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home'))) - - -def submit_document(request, document_id): - document = get_object_or_404(Document, pk=document_id) +def document_submit(request, pk): + document = get_object_or_404(Document, pk=pk) try: Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) except PermissionDenied: AccessEntry.objects.check_access(PERMISSION_OCR_DOCUMENT, request.user, document) - return submit_document_to_queue(request, document=document, - post_submit_redirect=request.META.get('HTTP_REFERER', reverse('main:home'))) - - -def submit_document_to_queue(request, document, post_submit_redirect=None): - """ - This view is meant to be reusable - """ - document.submit_for_ocr() - messages.success(request, _(u'Document: %(document)s was added to the OCR queue.') % { + messages.success(request, _('Document: %(document)s was added to the OCR queue.') % { 'document': document} ) - if post_submit_redirect: - return HttpResponseRedirect(post_submit_redirect) + return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home'))) -def re_queue_document(request, queue_document_id=None, queue_document_id_list=None): - Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) +def document_submit_multiple(request): + for item_id in request.GET.get('id_list', '').split(','): + document_submit(request, item_id) - if queue_document_id: - queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id)] - elif queue_document_id_list: - queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id) for queue_document_id in queue_document_id_list.split(',')] - else: - messages.error(request, _(u'Must provide at least one queue document.')) - return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home'))) - - next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) - previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) - - if request.method == 'POST': - for queue_document in queue_documents: - try: - queue_document.document.submit_for_ocr() - messages.success( - request, - _(u'Document: %(document)s was re-queued for OCR.') % { - 'document': queue_document.document - } - ) - except Document.DoesNotExist: - messages.error(request, _(u'Document id#: %d, no longer exists.') % queue_document.document_id) - return HttpResponseRedirect(next) - - context = { - 'next': next, - 'previous': previous, - } - - if len(queue_documents) == 1: - context['object'] = queue_documents[0] - context['title'] = _(u'Are you sure you wish to re-queue document: %s?') % ', '.join([unicode(d) for d in queue_documents]) - elif len(queue_documents) > 1: - context['title'] = _(u'Are you sure you wish to re-queue documents: %s?') % ', '.join([unicode(d) for d in queue_documents]) - - return render_to_response('main/generic_confirm.html', context, - context_instance=RequestContext(request)) + return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home'))) -def re_queue_multiple_document(request): - return re_queue_document(request, queue_document_id_list=request.GET.get('id_list', [])) - - -def all_document_ocr_cleanup(request): +def document_all_ocr_cleanup(request): Permission.objects.check_permissions(request.user, [PERMISSION_OCR_CLEAN_ALL_PAGES]) previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) @@ -182,27 +52,133 @@ def all_document_ocr_cleanup(request): return render_to_response('main/generic_confirm.html', { 'previous': previous, 'next': next, - 'title': _(u'Are you sure you wish to clean up all the pages content?'), - 'message': _(u'On large databases this operation may take some time to execute.'), + 'title': _('Are you sure you wish to clean up all the pages content?'), + 'message': _('On large databases this operation may take some time to execute.'), }, context_instance=RequestContext(request)) else: try: + # TODO: turn this into a Celery task clean_pages() - messages.success(request, _(u'Document pages content clean up complete.')) + messages.success(request, _('Document pages content clean up complete.')) except Exception as exception: - messages.error(request, _(u'Document pages content clean up error: %s') % exception) + messages.error(request, _('Document pages content clean up error: %s') % exception) return HttpResponseRedirect(next) -def display_link(obj): - output = [] - if hasattr(obj, 'get_absolute_url'): - output.append(u'%(obj)s' % { - 'url': obj.get_absolute_url(), - 'obj': obj - }) - if output: - return u''.join(output) +def entry_list(request): + Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) + + context = { + 'object_list': DocumentVersionOCRError.objects.all(), + 'title': _('OCR errors'), + 'hide_object': True, + } + + return render_to_response('main/generic_list.html', context, + context_instance=RequestContext(request)) + + +def entry_delete(request, pk=None, pk_list=None): + Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT_DELETE]) + + if pk: + entries = [get_object_or_404(DocumentVersionOCRError, pk=pk)] + elif pk_list: + entries = [get_object_or_404(DocumentVersionOCRError, pk=pk) for pk in pk_list.split(',')] else: - return obj + messages.error(request, _('Make at least one selection.')) + return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home'))) + + next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) + previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) + + if request.method == 'POST': + for entry in entries: + try: + entry.delete() + messages.success(request, _('Entry: %(entry)s deleted successfully.') % { + 'entry': entry}) + + except Exception as exception: + messages.error(request, _('Error entry: %(entry)s; %(error)s') % { + 'entry': entry, 'error': exception}) + return HttpResponseRedirect(next) + + context = { + 'next': next, + 'previous': previous, + 'delete_view': True, + } + + if len(entries) == 1: + context['object'] = entries[0] + + context['title'] = ungettext( + 'Are you sure you wish to delete the entry: %(entry)s?', + 'Are you sure you wish to delete these %(count)d entries.', + len(entries) + ) % { + 'count': len(entries), + 'entry': entries[0], + } + + return render_to_response('main/generic_confirm.html', context, + context_instance=RequestContext(request)) + + +def entry_delete_multiple(request): + return entry_delete(request, pk_list=request.GET.get('id_list', '')) + + +def entry_re_queue(request, pk=None, pk_list=None): + Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) + + if pk: + entries = [get_object_or_404(DocumentVersionOCRError, pk=pk)] + elif pk_list: + entries = [get_object_or_404(DocumentVersionOCRError, pk=pk) for pk in pk_list.split(',')] + else: + messages.error(request, _('Make at least one selection.')) + return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home'))) + + next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) + previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) + + if request.method == 'POST': + for entry in entries: + try: + entry.document_version.submit_for_ocr() + messages.success( + request, + _('Entry: %(entry)s was re-queued for OCR.') % { + 'entry': entry + } + ) + except DocumentVersion.DoesNotExist: + messages.error(request, _('Document version id#: %d, no longer exists.') % entry.document_version_id) + return HttpResponseRedirect(next) + + context = { + 'next': next, + 'previous': previous, + } + + if len(entries) == 1: + context['object'] = entries[0] + + context['title'] = ungettext( + 'Are you sure you wish to re-queue the entry: %(entry)s?', + 'Are you sure you wish to re-queue these %(count)d entries.', + len(entries) + ) % { + 'count': len(entries), + 'entry': entries[0], + } + + return render_to_response('main/generic_confirm.html', context, + context_instance=RequestContext(request)) + + +def entry_re_queue_multiple(request): + return entry_re_queue(request, pk_list=request.GET.get('id_list', []))