diff --git a/mayan/apps/documents/apps.py b/mayan/apps/documents/apps.py
index 9a14681f79..7eca3e814e 100644
--- a/mayan/apps/documents/apps.py
+++ b/mayan/apps/documents/apps.py
@@ -33,14 +33,13 @@ from documents import settings as document_settings
from .handlers import create_default_document_type
from .links import (
link_clear_image_cache, link_document_acl_list,
- link_document_clear_transformations, link_document_content,
- link_document_delete, link_document_document_type_edit,
- link_document_events_view, link_document_multiple_document_type_edit,
- link_document_download, link_document_edit, link_document_list,
- link_document_list_recent, link_document_multiple_delete,
+ link_document_clear_transformations, link_document_delete,
+ link_document_document_type_edit, link_document_events_view,
+ link_document_multiple_document_type_edit, link_document_download,
+ link_document_edit, link_document_list, link_document_list_recent,
+ link_document_multiple_delete,
link_document_multiple_clear_transformations,
- link_document_multiple_download,
- link_document_multiple_update_page_count,
+ link_document_multiple_download, link_document_multiple_update_page_count,
link_document_page_navigation_first, link_document_page_navigation_last,
link_document_page_navigation_next,
link_document_page_navigation_previous, link_document_page_return,
@@ -129,7 +128,6 @@ class DocumentsApp(apps.AppConfig):
# Document facet links
menu_facet.bind_links(links=[link_document_acl_list], sources=[Document])
menu_facet.bind_links(links=[link_document_preview], sources=[Document], position=0)
- menu_facet.bind_links(links=[link_document_content], sources=[Document], position=1)
menu_facet.bind_links(links=[link_document_properties], sources=[Document], position=2)
menu_facet.bind_links(links=[link_document_events_view, link_document_version_list], sources=[Document], position=2)
menu_facet.bind_links(links=[link_document_pages], sources=[Document])
diff --git a/mayan/apps/documents/forms.py b/mayan/apps/documents/forms.py
index 6a99b7d73b..91d677f6bc 100644
--- a/mayan/apps/documents/forms.py
+++ b/mayan/apps/documents/forms.py
@@ -87,34 +87,6 @@ class DocumentPropertiesForm(DetailForm):
model = Document
-class DocumentContentForm(forms.Form):
- """
- Form that concatenates all of a document pages' text content into a
- single textarea widget
- """
- def __init__(self, *args, **kwargs):
- self.document = kwargs.pop('document', None)
- super(DocumentContentForm, self).__init__(*args, **kwargs)
- content = []
- self.fields['contents'].initial = ''
- try:
- document_pages = self.document.pages.all()
- except AttributeError:
- document_pages = []
-
- for page in document_pages:
- if page.content:
- content.append(conditional_escape(force_unicode(page.content)))
- content.append('\n\n\n
- %s -
\n\n\n' % (ugettext('Page %(page_number)d') % {'page_number': page.page_number}))
-
- self.fields['contents'].initial = mark_safe(''.join(content))
-
- contents = forms.CharField(
- label=_('Contents'),
- widget=TextAreaDiv(attrs={'class': 'text_area_div full-height', 'data-height-difference': 360})
- )
-
-
class DocumentTypeSelectForm(forms.Form):
"""
Form to select the document type of a document to be created, used
diff --git a/mayan/apps/documents/links.py b/mayan/apps/documents/links.py
index 8b2e9030ca..8729039a47 100644
--- a/mayan/apps/documents/links.py
+++ b/mayan/apps/documents/links.py
@@ -42,7 +42,6 @@ def is_min_zoom(context):
# Facet
link_document_acl_list = Link(permissions=[ACLS_VIEW_ACL], text=_('ACLs'), view='documents:document_acl_list', args='object.pk')
-link_document_content = Link(permissions=[PERMISSION_DOCUMENT_VIEW], text=_('Content'), view='documents:document_content', args='object.id')
link_document_events_view = Link(permissions=[PERMISSION_EVENTS_VIEW], text=_('Events'), view='events:events_for_object', args=['"documents"', '"document"', 'object.id'])
link_document_preview = Link(permissions=[PERMISSION_DOCUMENT_VIEW], text=_('Preview'), view='documents:document_preview', args='object.id')
link_document_properties = Link(permissions=[PERMISSION_DOCUMENT_VIEW], text=_('Properties'), view='documents:document_properties', args='object.id')
diff --git a/mayan/apps/documents/migrations/0005_auto_20150617_0358.py b/mayan/apps/documents/migrations/0005_auto_20150617_0358.py
new file mode 100644
index 0000000000..93a635f480
--- /dev/null
+++ b/mayan/apps/documents/migrations/0005_auto_20150617_0358.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import models, migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('documents', '0004_auto_20150616_1930'),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name='documentpage',
+ old_name='content',
+ new_name='content_old',
+ ),
+ ]
diff --git a/mayan/apps/documents/migrations/0006_remove_documentpage_content_old.py b/mayan/apps/documents/migrations/0006_remove_documentpage_content_old.py
new file mode 100644
index 0000000000..87df8ad339
--- /dev/null
+++ b/mayan/apps/documents/migrations/0006_remove_documentpage_content_old.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import models, migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('documents', '0005_auto_20150617_0358'),
+ ]
+
+ operations = [
+ migrations.RemoveField(
+ model_name='documentpage',
+ name='content_old',
+ ),
+ ]
diff --git a/mayan/apps/documents/models.py b/mayan/apps/documents/models.py
index 764a6a9300..aff64118e3 100644
--- a/mayan/apps/documents/models.py
+++ b/mayan/apps/documents/models.py
@@ -424,10 +424,9 @@ class DocumentTypeFilename(models.Model):
@python_2_unicode_compatible
class DocumentPage(models.Model):
"""
- Model that describes a document version page including it's content
+ Model that describes a document version page
"""
document_version = models.ForeignKey(DocumentVersion, verbose_name=_('Document version'), related_name='pages')
- content = models.TextField(blank=True, null=True, verbose_name=_('Content'))
page_label = models.CharField(max_length=40, blank=True, null=True, verbose_name=_('Page label'))
page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_('Page number'), db_index=True)
diff --git a/mayan/apps/documents/urls.py b/mayan/apps/documents/urls.py
index 06a0006ca9..f91b787f92 100644
--- a/mayan/apps/documents/urls.py
+++ b/mayan/apps/documents/urls.py
@@ -20,7 +20,6 @@ urlpatterns = patterns(
url(r'^list/recent/$', RecentDocumentListView.as_view(), name='document_list_recent'),
url(r'^(?P\d+)/preview/$', 'document_preview', name='document_preview'),
- url(r'^(?P\d+)/content/$', 'document_content', name='document_content'),
url(r'^(?P\d+)/properties/$', 'document_properties', name='document_properties'),
url(r'^(?P\d+)/type/$', 'document_document_type_edit', name='document_document_type_edit'),
url(r'^multiple/type/$', 'document_multiple_document_type_edit', name='document_multiple_document_type_edit'),
diff --git a/mayan/apps/documents/views.py b/mayan/apps/documents/views.py
index d83935291e..afdf529ee8 100644
--- a/mayan/apps/documents/views.py
+++ b/mayan/apps/documents/views.py
@@ -33,10 +33,9 @@ from .events import (
event_document_properties_edit, event_document_type_change
)
from .forms import (
- DocumentContentForm, DocumentDownloadForm, DocumentForm, DocumentPageForm,
- DocumentPreviewForm, DocumentPropertiesForm, DocumentTypeForm,
- DocumentTypeFilenameForm, DocumentTypeFilenameForm_create,
- DocumentTypeSelectForm, PrintForm
+ DocumentDownloadForm, DocumentForm, DocumentPageForm, DocumentPreviewForm,
+ DocumentPropertiesForm, DocumentTypeForm, DocumentTypeFilenameForm,
+ DocumentTypeFilenameForm_create, DocumentTypeSelectForm, PrintForm
)
from .literals import DOCUMENT_IMAGE_TASK_TIMEOUT
from .models import (
@@ -188,28 +187,6 @@ def document_preview(request, document_id):
}, context_instance=RequestContext(request))
-def document_content(request, document_id):
- document = get_object_or_404(Document, pk=document_id)
-
- try:
- Permission.objects.check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])
- except PermissionDenied:
- AccessEntry.objects.check_access(PERMISSION_DOCUMENT_VIEW, request.user, document)
-
- document.add_as_recent_document_for_user(request.user)
-
- content_form = DocumentContentForm(document=document)
-
- return render_to_response('appearance/generic_form.html', {
- 'document': document,
- 'form': content_form,
- 'hide_labels': True,
- 'object': document,
- 'read_only': True,
- 'title': _('Content of document: %s') % document,
- }, context_instance=RequestContext(request))
-
-
def document_delete(request, document_id=None, document_id_list=None):
post_action_redirect = None
diff --git a/mayan/apps/ocr/apps.py b/mayan/apps/ocr/apps.py
index 6b5d5daa69..0f1b90a3fc 100644
--- a/mayan/apps/ocr/apps.py
+++ b/mayan/apps/ocr/apps.py
@@ -8,7 +8,9 @@ from django import apps
from django.utils.translation import ugettext_lazy as _
from acls.api import class_permissions
-from common import menu_multi_item, menu_object, menu_secondary, menu_tools
+from common import (
+ menu_facet, menu_multi_item, menu_object, menu_secondary, menu_tools
+)
from common.api import register_maintenance_links
from common.utils import encapsulate
from documents.models import Document, DocumentVersion
@@ -20,7 +22,8 @@ from rest_api.classes import APIEndPoint
from .handlers import post_version_upload_ocr
from .links import (
- link_document_submit, link_document_submit_multiple, link_entry_delete,
+ link_document_content, link_document_submit,
+ link_document_submit_multiple, link_entry_delete,
link_entry_delete_multiple, link_entry_list, link_entry_re_queue,
link_entry_re_queue_multiple
)
@@ -52,6 +55,7 @@ class OCRApp(apps.AppConfig):
class_permissions(Document, [PERMISSION_OCR_DOCUMENT])
+ menu_facet.bind_links(links=[link_document_content], sources=[Document])
menu_multi_item.bind_links(links=[link_document_submit_multiple], sources=[Document])
menu_multi_item.bind_links(links=[link_entry_re_queue_multiple, link_entry_delete_multiple], sources=[DocumentVersionOCRError])
menu_object.bind_links(links=[link_document_submit], sources=[Document])
diff --git a/mayan/apps/ocr/classes.py b/mayan/apps/ocr/classes.py
index 34abad8561..e8a9b0ec7d 100644
--- a/mayan/apps/ocr/classes.py
+++ b/mayan/apps/ocr/classes.py
@@ -4,8 +4,6 @@ import logging
import os
import tempfile
-import sh
-
from django.utils.module_loading import import_string
from django.utils.translation import ugettext_lazy as _
@@ -18,6 +16,7 @@ from .exceptions import UnpaperError
from .literals import (
DEFAULT_OCR_FILE_EXTENSION, DEFAULT_OCR_FILE_FORMAT, UNPAPER_FILE_FORMAT
)
+from .models import DocumentPageContent
from .parsers import parse_document_page
from .parsers.exceptions import ParserError, ParserUnknownFile
from .settings import UNPAPER_PATH
@@ -34,11 +33,13 @@ class OCRBackendBase(object):
for page in document_version.pages.all():
image = page.get_image()
- logger.info('Processing page: %d', page.page_number)
- page.content = self.execute(file_object=image, language=language)
- page.save()
+ logger.info('Processing page: %d of document version: %s', page.page_number, document_version)
+ document_page_content, created = DocumentPageContent.objects.get_or_create(document_page=page)
+ result = self.execute(file_object=image, language=language)
+ document_page_content.content = self.execute(file_object=image, language=language)
+ document_page_content.save()
image.close()
- logger.info('Finished processing page: %d', page.page_number)
+ logger.info('Finished processing page: %d of document version: %s', page.page_number, document_version)
def execute(self, file_object, language=None, transformations=None):
if not transformations:
diff --git a/mayan/apps/ocr/forms.py b/mayan/apps/ocr/forms.py
new file mode 100644
index 0000000000..461c0a8579
--- /dev/null
+++ b/mayan/apps/ocr/forms.py
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+from django import forms
+from django.utils.encoding import force_unicode
+from django.utils.html import conditional_escape
+from django.utils.safestring import mark_safe
+from django.utils.translation import ugettext_lazy as _, ugettext
+
+from common.widgets import TextAreaDiv
+
+from .models import DocumentPageContent
+
+
+class DocumentContentForm(forms.Form):
+ """
+ Form that concatenates all of a document pages' text content into a
+ single textarea widget
+ """
+ def __init__(self, *args, **kwargs):
+ self.document = kwargs.pop('document', None)
+ super(DocumentContentForm, self).__init__(*args, **kwargs)
+ content = []
+ self.fields['contents'].initial = ''
+ try:
+ document_pages = self.document.pages.all()
+ except AttributeError:
+ document_pages = []
+
+ for page in document_pages:
+ try:
+ page_content = page.ocr_content.content
+ except DocumentPageContent.DoesNotExist:
+ pass
+ else:
+ content.append(conditional_escape(force_unicode(page_content)))
+ content.append('\n\n\n
- %s -
\n\n\n' % (ugettext('Page %(page_number)d') % {'page_number': page.page_number}))
+
+ self.fields['contents'].initial = mark_safe(''.join(content))
+
+ contents = forms.CharField(
+ label=_('Contents'),
+ widget=TextAreaDiv(attrs={'class': 'text_area_div full-height', 'data-height-difference': 360})
+ )
diff --git a/mayan/apps/ocr/links.py b/mayan/apps/ocr/links.py
index 80973dc417..e6196dedf2 100644
--- a/mayan/apps/ocr/links.py
+++ b/mayan/apps/ocr/links.py
@@ -5,9 +5,11 @@ from django.utils.translation import ugettext_lazy as _
from navigation import Link
from .permissions import (
- PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE
+ PERMISSION_OCR_CONTENT_VIEW, PERMISSION_OCR_DOCUMENT,
+ PERMISSION_OCR_DOCUMENT_DELETE
)
+link_document_content = Link(permissions=[PERMISSION_OCR_CONTENT_VIEW], text=_('Content'), view='ocr:document_content', args='resolved_object.id')
link_document_submit = Link(permissions=[PERMISSION_OCR_DOCUMENT], text=_('Submit to OCR queue'), view='ocr:document_submit', args='object.id')
link_document_submit_multiple = Link(text=_('Submit to OCR queue'), view='ocr:document_submit_multiple')
link_entry_delete = Link(permissions=[PERMISSION_OCR_DOCUMENT_DELETE], text=_('Delete'), view='ocr:entry_delete', args='object.id')
diff --git a/mayan/apps/ocr/migrations/0002_documentpagecontent.py b/mayan/apps/ocr/migrations/0002_documentpagecontent.py
new file mode 100644
index 0000000000..85641cc29a
--- /dev/null
+++ b/mayan/apps/ocr/migrations/0002_documentpagecontent.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import models, migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('documents', '0005_auto_20150617_0358'),
+ ('ocr', '0001_initial'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='DocumentPageContent',
+ fields=[
+ ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
+ ('content', models.TextField(verbose_name='Content', blank=True)),
+ ('document_page', models.OneToOneField(related_name='ocr_content', verbose_name='Document page', to='documents.DocumentPage')),
+ ],
+ options={
+ 'verbose_name': 'Document page content',
+ 'verbose_name_plural': 'Document pages contents',
+ },
+ bases=(models.Model,),
+ ),
+ ]
diff --git a/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py b/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py
new file mode 100644
index 0000000000..3a69ebd78e
--- /dev/null
+++ b/mayan/apps/ocr/migrations/0003_auto_20150617_0401.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import models, migrations
+
+def move_content_from_documents_to_ocr_app(apps, schema_editor):
+ DocumentPage = apps.get_model('documents', 'DocumentPage')
+ DocumentPageContent = apps.get_model('ocr', 'DocumentPageContent')
+
+ for document_page in DocumentPage.objects.all():
+ document_page_content = DocumentPageContent(
+ document_page=document_page, content=document_page.content_old
+ )
+ document_page_content.save()
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('ocr', '0002_documentpagecontent'),
+ ]
+
+ operations = [
+ ]
+
+
+ operations = [
+ migrations.RunPython(move_content_from_documents_to_ocr_app),
+ ]
diff --git a/mayan/apps/ocr/models.py b/mayan/apps/ocr/models.py
index e4c1713eb9..cc2210b8b7 100644
--- a/mayan/apps/ocr/models.py
+++ b/mayan/apps/ocr/models.py
@@ -4,7 +4,7 @@ from django.db import models
from django.utils.encoding import python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _
-from documents.models import DocumentVersion
+from documents.models import DocumentVersion, DocumentPage
@python_2_unicode_compatible
@@ -20,3 +20,19 @@ class DocumentVersionOCRError(models.Model):
ordering = ('datetime_submitted',)
verbose_name = _('Document Version OCR Error')
verbose_name_plural = _('Document Version OCR Errors')
+
+
+@python_2_unicode_compatible
+class DocumentPageContent(models.Model):
+ """
+ Model that describes a document page content
+ """
+ document_page = models.OneToOneField(DocumentPage, related_name='ocr_content', verbose_name=_('Document page'))
+ content = models.TextField(blank=True, verbose_name=_('Content'))
+
+ def __str__(self):
+ return unicode(self.document_page)
+
+ class Meta:
+ verbose_name = _('Document page content')
+ verbose_name_plural = _('Document pages contents')
diff --git a/mayan/apps/ocr/permissions.py b/mayan/apps/ocr/permissions.py
index c0811fef54..d14cdafcec 100644
--- a/mayan/apps/ocr/permissions.py
+++ b/mayan/apps/ocr/permissions.py
@@ -7,3 +7,4 @@ from permissions.models import Permission, PermissionNamespace
ocr_namespace = PermissionNamespace('ocr', _('OCR'))
PERMISSION_OCR_DOCUMENT = Permission.objects.register(ocr_namespace, 'ocr_document', _('Submit documents for OCR'))
PERMISSION_OCR_DOCUMENT_DELETE = Permission.objects.register(ocr_namespace, 'ocr_document_delete', _('Delete documents from OCR queue'))
+PERMISSION_OCR_CONTENT_VIEW = Permission.objects.register(ocr_namespace, 'ocr_content_view', _('Can view the transcribed text from document'))
diff --git a/mayan/apps/ocr/urls.py b/mayan/apps/ocr/urls.py
index 09da9ebe4e..9805a86ce5 100644
--- a/mayan/apps/ocr/urls.py
+++ b/mayan/apps/ocr/urls.py
@@ -6,6 +6,7 @@ from .api_views import DocumentVersionOCRView
urlpatterns = patterns(
'ocr.views',
+ url(r'^(?P\d+)/content/$', 'document_content', name='document_content'),
url(r'^document/(?P\d+)/submit/$', 'document_submit', name='document_submit'),
url(r'^document/multiple/submit/$', 'document_submit_multiple', name='document_submit_multiple'),
diff --git a/mayan/apps/ocr/views.py b/mayan/apps/ocr/views.py
index d39a19985b..e9b9e45ba4 100644
--- a/mayan/apps/ocr/views.py
+++ b/mayan/apps/ocr/views.py
@@ -13,12 +13,36 @@ from acls.models import AccessEntry
from documents.models import Document, DocumentVersion
from permissions.models import Permission
+from .forms import DocumentContentForm
from .models import DocumentVersionOCRError
from .permissions import (
- PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE
+ PERMISSION_OCR_CONTENT_VIEW, PERMISSION_OCR_DOCUMENT,
+ PERMISSION_OCR_DOCUMENT_DELETE
)
+def document_content(request, document_id):
+ document = get_object_or_404(Document, pk=document_id)
+
+ try:
+ Permission.objects.check_permissions(request.user, [PERMISSION_OCR_CONTENT_VIEW])
+ except PermissionDenied:
+ AccessEntry.objects.check_access(PERMISSION_OCR_CONTENT_VIEW, request.user, document)
+
+ document.add_as_recent_document_for_user(request.user)
+
+ content_form = DocumentContentForm(document=document)
+
+ return render_to_response('appearance/generic_form.html', {
+ 'document': document,
+ 'form': content_form,
+ 'hide_labels': True,
+ 'object': document,
+ 'read_only': True,
+ 'title': _('Content of document: %s') % document,
+ }, context_instance=RequestContext(request))
+
+
def document_submit(request, pk):
document = get_object_or_404(Document, pk=pk)