diff --git a/mayan/apps/documents/apps.py b/mayan/apps/documents/apps.py index 9a14681f79..7eca3e814e 100644 --- a/mayan/apps/documents/apps.py +++ b/mayan/apps/documents/apps.py @@ -33,14 +33,13 @@ from documents import settings as document_settings from .handlers import create_default_document_type from .links import ( link_clear_image_cache, link_document_acl_list, - link_document_clear_transformations, link_document_content, - link_document_delete, link_document_document_type_edit, - link_document_events_view, link_document_multiple_document_type_edit, - link_document_download, link_document_edit, link_document_list, - link_document_list_recent, link_document_multiple_delete, + link_document_clear_transformations, link_document_delete, + link_document_document_type_edit, link_document_events_view, + link_document_multiple_document_type_edit, link_document_download, + link_document_edit, link_document_list, link_document_list_recent, + link_document_multiple_delete, link_document_multiple_clear_transformations, - link_document_multiple_download, - link_document_multiple_update_page_count, + link_document_multiple_download, link_document_multiple_update_page_count, link_document_page_navigation_first, link_document_page_navigation_last, link_document_page_navigation_next, link_document_page_navigation_previous, link_document_page_return, @@ -129,7 +128,6 @@ class DocumentsApp(apps.AppConfig): # Document facet links menu_facet.bind_links(links=[link_document_acl_list], sources=[Document]) menu_facet.bind_links(links=[link_document_preview], sources=[Document], position=0) - menu_facet.bind_links(links=[link_document_content], sources=[Document], position=1) menu_facet.bind_links(links=[link_document_properties], sources=[Document], position=2) menu_facet.bind_links(links=[link_document_events_view, link_document_version_list], sources=[Document], position=2) menu_facet.bind_links(links=[link_document_pages], sources=[Document]) diff --git a/mayan/apps/documents/forms.py b/mayan/apps/documents/forms.py index 6a99b7d73b..91d677f6bc 100644 --- a/mayan/apps/documents/forms.py +++ b/mayan/apps/documents/forms.py @@ -87,34 +87,6 @@ class DocumentPropertiesForm(DetailForm): model = Document -class DocumentContentForm(forms.Form): - """ - Form that concatenates all of a document pages' text content into a - single textarea widget - """ - def __init__(self, *args, **kwargs): - self.document = kwargs.pop('document', None) - super(DocumentContentForm, self).__init__(*args, **kwargs) - content = [] - self.fields['contents'].initial = '' - try: - document_pages = self.document.pages.all() - except AttributeError: - document_pages = [] - - for page in document_pages: - if page.content: - content.append(conditional_escape(force_unicode(page.content))) - content.append('\n\n\n
- %s -

\n\n\n' % (ugettext('Page %(page_number)d') % {'page_number': page.page_number})) - - self.fields['contents'].initial = mark_safe(''.join(content)) - - contents = forms.CharField( - label=_('Contents'), - widget=TextAreaDiv(attrs={'class': 'text_area_div full-height', 'data-height-difference': 360}) - ) - - class DocumentTypeSelectForm(forms.Form): """ Form to select the document type of a document to be created, used diff --git a/mayan/apps/documents/links.py b/mayan/apps/documents/links.py index 8b2e9030ca..8729039a47 100644 --- a/mayan/apps/documents/links.py +++ b/mayan/apps/documents/links.py @@ -42,7 +42,6 @@ def is_min_zoom(context): # Facet link_document_acl_list = Link(permissions=[ACLS_VIEW_ACL], text=_('ACLs'), view='documents:document_acl_list', args='object.pk') -link_document_content = Link(permissions=[PERMISSION_DOCUMENT_VIEW], text=_('Content'), view='documents:document_content', args='object.id') link_document_events_view = Link(permissions=[PERMISSION_EVENTS_VIEW], text=_('Events'), view='events:events_for_object', args=['"documents"', '"document"', 'object.id']) link_document_preview = Link(permissions=[PERMISSION_DOCUMENT_VIEW], text=_('Preview'), view='documents:document_preview', args='object.id') link_document_properties = Link(permissions=[PERMISSION_DOCUMENT_VIEW], text=_('Properties'), view='documents:document_properties', args='object.id') diff --git a/mayan/apps/documents/migrations/0005_auto_20150617_0358.py b/mayan/apps/documents/migrations/0005_auto_20150617_0358.py new file mode 100644 index 0000000000..93a635f480 --- /dev/null +++ b/mayan/apps/documents/migrations/0005_auto_20150617_0358.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0004_auto_20150616_1930'), + ] + + operations = [ + migrations.RenameField( + model_name='documentpage', + old_name='content', + new_name='content_old', + ), + ] diff --git a/mayan/apps/documents/migrations/0006_remove_documentpage_content_old.py b/mayan/apps/documents/migrations/0006_remove_documentpage_content_old.py new file mode 100644 index 0000000000..87df8ad339 --- /dev/null +++ b/mayan/apps/documents/migrations/0006_remove_documentpage_content_old.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0005_auto_20150617_0358'), + ] + + operations = [ + migrations.RemoveField( + model_name='documentpage', + name='content_old', + ), + ] diff --git a/mayan/apps/documents/models.py b/mayan/apps/documents/models.py index 764a6a9300..aff64118e3 100644 --- a/mayan/apps/documents/models.py +++ b/mayan/apps/documents/models.py @@ -424,10 +424,9 @@ class DocumentTypeFilename(models.Model): @python_2_unicode_compatible class DocumentPage(models.Model): """ - Model that describes a document version page including it's content + Model that describes a document version page """ document_version = models.ForeignKey(DocumentVersion, verbose_name=_('Document version'), related_name='pages') - content = models.TextField(blank=True, null=True, verbose_name=_('Content')) page_label = models.CharField(max_length=40, blank=True, null=True, verbose_name=_('Page label')) page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_('Page number'), db_index=True) diff --git a/mayan/apps/documents/urls.py b/mayan/apps/documents/urls.py index 06a0006ca9..f91b787f92 100644 --- a/mayan/apps/documents/urls.py +++ b/mayan/apps/documents/urls.py @@ -20,7 +20,6 @@ urlpatterns = patterns( url(r'^list/recent/$', RecentDocumentListView.as_view(), name='document_list_recent'), url(r'^(?P\d+)/preview/$', 'document_preview', name='document_preview'), - url(r'^(?P\d+)/content/$', 'document_content', name='document_content'), url(r'^(?P\d+)/properties/$', 'document_properties', name='document_properties'), url(r'^(?P\d+)/type/$', 'document_document_type_edit', name='document_document_type_edit'), url(r'^multiple/type/$', 'document_multiple_document_type_edit', name='document_multiple_document_type_edit'), diff --git a/mayan/apps/documents/views.py b/mayan/apps/documents/views.py index d83935291e..afdf529ee8 100644 --- a/mayan/apps/documents/views.py +++ b/mayan/apps/documents/views.py @@ -33,10 +33,9 @@ from .events import ( event_document_properties_edit, event_document_type_change ) from .forms import ( - DocumentContentForm, DocumentDownloadForm, DocumentForm, DocumentPageForm, - DocumentPreviewForm, DocumentPropertiesForm, DocumentTypeForm, - DocumentTypeFilenameForm, DocumentTypeFilenameForm_create, - DocumentTypeSelectForm, PrintForm + DocumentDownloadForm, DocumentForm, DocumentPageForm, DocumentPreviewForm, + DocumentPropertiesForm, DocumentTypeForm, DocumentTypeFilenameForm, + DocumentTypeFilenameForm_create, DocumentTypeSelectForm, PrintForm ) from .literals import DOCUMENT_IMAGE_TASK_TIMEOUT from .models import ( @@ -188,28 +187,6 @@ def document_preview(request, document_id): }, context_instance=RequestContext(request)) -def document_content(request, document_id): - document = get_object_or_404(Document, pk=document_id) - - try: - Permission.objects.check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW]) - except PermissionDenied: - AccessEntry.objects.check_access(PERMISSION_DOCUMENT_VIEW, request.user, document) - - document.add_as_recent_document_for_user(request.user) - - content_form = DocumentContentForm(document=document) - - return render_to_response('appearance/generic_form.html', { - 'document': document, - 'form': content_form, - 'hide_labels': True, - 'object': document, - 'read_only': True, - 'title': _('Content of document: %s') % document, - }, context_instance=RequestContext(request)) - - def document_delete(request, document_id=None, document_id_list=None): post_action_redirect = None diff --git a/mayan/apps/ocr/apps.py b/mayan/apps/ocr/apps.py index 6b5d5daa69..0f1b90a3fc 100644 --- a/mayan/apps/ocr/apps.py +++ b/mayan/apps/ocr/apps.py @@ -8,7 +8,9 @@ from django import apps from django.utils.translation import ugettext_lazy as _ from acls.api import class_permissions -from common import menu_multi_item, menu_object, menu_secondary, menu_tools +from common import ( + menu_facet, menu_multi_item, menu_object, menu_secondary, menu_tools +) from common.api import register_maintenance_links from common.utils import encapsulate from documents.models import Document, DocumentVersion @@ -20,7 +22,8 @@ from rest_api.classes import APIEndPoint from .handlers import post_version_upload_ocr from .links import ( - link_document_submit, link_document_submit_multiple, link_entry_delete, + link_document_content, link_document_submit, + link_document_submit_multiple, link_entry_delete, link_entry_delete_multiple, link_entry_list, link_entry_re_queue, link_entry_re_queue_multiple ) @@ -52,6 +55,7 @@ class OCRApp(apps.AppConfig): class_permissions(Document, [PERMISSION_OCR_DOCUMENT]) + menu_facet.bind_links(links=[link_document_content], sources=[Document]) menu_multi_item.bind_links(links=[link_document_submit_multiple], sources=[Document]) menu_multi_item.bind_links(links=[link_entry_re_queue_multiple, link_entry_delete_multiple], sources=[DocumentVersionOCRError]) menu_object.bind_links(links=[link_document_submit], sources=[Document]) diff --git a/mayan/apps/ocr/classes.py b/mayan/apps/ocr/classes.py index 34abad8561..e8a9b0ec7d 100644 --- a/mayan/apps/ocr/classes.py +++ b/mayan/apps/ocr/classes.py @@ -4,8 +4,6 @@ import logging import os import tempfile -import sh - from django.utils.module_loading import import_string from django.utils.translation import ugettext_lazy as _ @@ -18,6 +16,7 @@ from .exceptions import UnpaperError from .literals import ( DEFAULT_OCR_FILE_EXTENSION, DEFAULT_OCR_FILE_FORMAT, UNPAPER_FILE_FORMAT ) +from .models import DocumentPageContent from .parsers import parse_document_page from .parsers.exceptions import ParserError, ParserUnknownFile from .settings import UNPAPER_PATH @@ -34,11 +33,13 @@ class OCRBackendBase(object): for page in document_version.pages.all(): image = page.get_image() - logger.info('Processing page: %d', page.page_number) - page.content = self.execute(file_object=image, language=language) - page.save() + logger.info('Processing page: %d of document version: %s', page.page_number, document_version) + document_page_content, created = DocumentPageContent.objects.get_or_create(document_page=page) + result = self.execute(file_object=image, language=language) + document_page_content.content = self.execute(file_object=image, language=language) + document_page_content.save() image.close() - logger.info('Finished processing page: %d', page.page_number) + logger.info('Finished processing page: %d of document version: %s', page.page_number, document_version) def execute(self, file_object, language=None, transformations=None): if not transformations: diff --git a/mayan/apps/ocr/forms.py b/mayan/apps/ocr/forms.py new file mode 100644 index 0000000000..461c0a8579 --- /dev/null +++ b/mayan/apps/ocr/forms.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +from django import forms +from django.utils.encoding import force_unicode +from django.utils.html import conditional_escape +from django.utils.safestring import mark_safe +from django.utils.translation import ugettext_lazy as _, ugettext + +from common.widgets import TextAreaDiv + +from .models import DocumentPageContent + + +class DocumentContentForm(forms.Form): + """ + Form that concatenates all of a document pages' text content into a + single textarea widget + """ + def __init__(self, *args, **kwargs): + self.document = kwargs.pop('document', None) + super(DocumentContentForm, self).__init__(*args, **kwargs) + content = [] + self.fields['contents'].initial = '' + try: + document_pages = self.document.pages.all() + except AttributeError: + document_pages = [] + + for page in document_pages: + try: + page_content = page.ocr_content.content + except DocumentPageContent.DoesNotExist: + pass + else: + content.append(conditional_escape(force_unicode(page_content))) + content.append('\n\n\n
- %s -

\n\n\n' % (ugettext('Page %(page_number)d') % {'page_number': page.page_number})) + + self.fields['contents'].initial = mark_safe(''.join(content)) + + contents = forms.CharField( + label=_('Contents'), + widget=TextAreaDiv(attrs={'class': 'text_area_div full-height', 'data-height-difference': 360}) + ) diff --git a/mayan/apps/ocr/links.py b/mayan/apps/ocr/links.py index 80973dc417..e6196dedf2 100644 --- a/mayan/apps/ocr/links.py +++ b/mayan/apps/ocr/links.py @@ -5,9 +5,11 @@ from django.utils.translation import ugettext_lazy as _ from navigation import Link from .permissions import ( - PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE + PERMISSION_OCR_CONTENT_VIEW, PERMISSION_OCR_DOCUMENT, + PERMISSION_OCR_DOCUMENT_DELETE ) +link_document_content = Link(permissions=[PERMISSION_OCR_CONTENT_VIEW], text=_('Content'), view='ocr:document_content', args='resolved_object.id') link_document_submit = Link(permissions=[PERMISSION_OCR_DOCUMENT], text=_('Submit to OCR queue'), view='ocr:document_submit', args='object.id') link_document_submit_multiple = Link(text=_('Submit to OCR queue'), view='ocr:document_submit_multiple') link_entry_delete = Link(permissions=[PERMISSION_OCR_DOCUMENT_DELETE], text=_('Delete'), view='ocr:entry_delete', args='object.id') diff --git a/mayan/apps/ocr/migrations/0002_documentpagecontent.py b/mayan/apps/ocr/migrations/0002_documentpagecontent.py new file mode 100644 index 0000000000..85641cc29a --- /dev/null +++ b/mayan/apps/ocr/migrations/0002_documentpagecontent.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0005_auto_20150617_0358'), + ('ocr', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='DocumentPageContent', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('content', models.TextField(verbose_name='Content', blank=True)), + ('document_page', models.OneToOneField(related_name='ocr_content', verbose_name='Document page', to='documents.DocumentPage')), + ], + options={ + 'verbose_name': 'Document page content', + 'verbose_name_plural': 'Document pages contents', + }, + bases=(models.Model,), + ), + ] diff --git a/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py b/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py new file mode 100644 index 0000000000..3a69ebd78e --- /dev/null +++ b/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + +def move_content_from_documents_to_ocr_app(apps, schema_editor): + DocumentPage = apps.get_model('documents', 'DocumentPage') + DocumentPageContent = apps.get_model('ocr', 'DocumentPageContent') + + for document_page in DocumentPage.objects.all(): + document_page_content = DocumentPageContent( + document_page=document_page, content=document_page.content_old + ) + document_page_content.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('ocr', '0002_documentpagecontent'), + ] + + operations = [ + ] + + + operations = [ + migrations.RunPython(move_content_from_documents_to_ocr_app), + ] diff --git a/mayan/apps/ocr/models.py b/mayan/apps/ocr/models.py index e4c1713eb9..cc2210b8b7 100644 --- a/mayan/apps/ocr/models.py +++ b/mayan/apps/ocr/models.py @@ -4,7 +4,7 @@ from django.db import models from django.utils.encoding import python_2_unicode_compatible from django.utils.translation import ugettext_lazy as _ -from documents.models import DocumentVersion +from documents.models import DocumentVersion, DocumentPage @python_2_unicode_compatible @@ -20,3 +20,19 @@ class DocumentVersionOCRError(models.Model): ordering = ('datetime_submitted',) verbose_name = _('Document Version OCR Error') verbose_name_plural = _('Document Version OCR Errors') + + +@python_2_unicode_compatible +class DocumentPageContent(models.Model): + """ + Model that describes a document page content + """ + document_page = models.OneToOneField(DocumentPage, related_name='ocr_content', verbose_name=_('Document page')) + content = models.TextField(blank=True, verbose_name=_('Content')) + + def __str__(self): + return unicode(self.document_page) + + class Meta: + verbose_name = _('Document page content') + verbose_name_plural = _('Document pages contents') diff --git a/mayan/apps/ocr/permissions.py b/mayan/apps/ocr/permissions.py index c0811fef54..d14cdafcec 100644 --- a/mayan/apps/ocr/permissions.py +++ b/mayan/apps/ocr/permissions.py @@ -7,3 +7,4 @@ from permissions.models import Permission, PermissionNamespace ocr_namespace = PermissionNamespace('ocr', _('OCR')) PERMISSION_OCR_DOCUMENT = Permission.objects.register(ocr_namespace, 'ocr_document', _('Submit documents for OCR')) PERMISSION_OCR_DOCUMENT_DELETE = Permission.objects.register(ocr_namespace, 'ocr_document_delete', _('Delete documents from OCR queue')) +PERMISSION_OCR_CONTENT_VIEW = Permission.objects.register(ocr_namespace, 'ocr_content_view', _('Can view the transcribed text from document')) diff --git a/mayan/apps/ocr/urls.py b/mayan/apps/ocr/urls.py index 09da9ebe4e..9805a86ce5 100644 --- a/mayan/apps/ocr/urls.py +++ b/mayan/apps/ocr/urls.py @@ -6,6 +6,7 @@ from .api_views import DocumentVersionOCRView urlpatterns = patterns( 'ocr.views', + url(r'^(?P\d+)/content/$', 'document_content', name='document_content'), url(r'^document/(?P\d+)/submit/$', 'document_submit', name='document_submit'), url(r'^document/multiple/submit/$', 'document_submit_multiple', name='document_submit_multiple'), diff --git a/mayan/apps/ocr/views.py b/mayan/apps/ocr/views.py index d39a19985b..e9b9e45ba4 100644 --- a/mayan/apps/ocr/views.py +++ b/mayan/apps/ocr/views.py @@ -13,12 +13,36 @@ from acls.models import AccessEntry from documents.models import Document, DocumentVersion from permissions.models import Permission +from .forms import DocumentContentForm from .models import DocumentVersionOCRError from .permissions import ( - PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE + PERMISSION_OCR_CONTENT_VIEW, PERMISSION_OCR_DOCUMENT, + PERMISSION_OCR_DOCUMENT_DELETE ) +def document_content(request, document_id): + document = get_object_or_404(Document, pk=document_id) + + try: + Permission.objects.check_permissions(request.user, [PERMISSION_OCR_CONTENT_VIEW]) + except PermissionDenied: + AccessEntry.objects.check_access(PERMISSION_OCR_CONTENT_VIEW, request.user, document) + + document.add_as_recent_document_for_user(request.user) + + content_form = DocumentContentForm(document=document) + + return render_to_response('appearance/generic_form.html', { + 'document': document, + 'form': content_form, + 'hide_labels': True, + 'object': document, + 'read_only': True, + 'title': _('Content of document: %s') % document, + }, context_instance=RequestContext(request)) + + def document_submit(request, pk): document = get_object_or_404(Document, pk=pk)