Add support for downloading a document's OCR text.
Closes GitLab issue #215. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -1,3 +1,8 @@
|
||||
2.5 (2017-07-XX)
|
||||
===============
|
||||
- Add view to download a document's OCR text. GitLab #215
|
||||
|
||||
|
||||
2.4 (2017-06-23)
|
||||
================
|
||||
- Add Django-mathfilters.
|
||||
@@ -9,7 +14,7 @@
|
||||
- Make tags, metadata types and cabinets searchable via the dynamic search API. GitLab issue #344.
|
||||
- Add support for updating configuration options from environment variables.
|
||||
- Add purgelocks management command. GitLab issue #221.
|
||||
- Fix index rebuilding for multi value first levels. GitLab issue #391.
|
||||
- Fix index rebuilding for multi value first levels. GitLab issue #391.
|
||||
- Truncate views titles via the APPEARANCE_MAXIMUM_TITLE_LENGTH setting. GitLab issue #217.
|
||||
- Add background task manager app. GitLab issue #132.
|
||||
- Add link to show a document's OCR errors. GitLab issue #291.
|
||||
|
||||
84
docs/releases/2.5.rst
Normal file
84
docs/releases/2.5.rst
Normal file
@@ -0,0 +1,84 @@
|
||||
=============================
|
||||
Mayan EDMS v2.5 release notes
|
||||
=============================
|
||||
|
||||
Released: July XX, 2017
|
||||
|
||||
What's new
|
||||
==========
|
||||
|
||||
|
||||
Other Changes
|
||||
-------------
|
||||
- Add view to download a document's OCR text. GitLab issue #215.
|
||||
- Add missing OCR migration.
|
||||
- Improve error output of the performupgrade command to debug upgrade errors
|
||||
that could stop an upgrade (missing document files, etc).
|
||||
- Enable the django-mathfilters app added in version 2.4.
|
||||
- Do a complete pull and synchronization of the translations to fix missing
|
||||
translations for Polish. Thanks to Wojtek Warczakowski for the report.
|
||||
- Allow null for the SANE source resolution field. Even though the field was
|
||||
marked as allowing blank values it was failing because it is a number field
|
||||
and number fields need to allow explicit null values when left blank.
|
||||
- Rename the mayan_task_manager app to task_manager.
|
||||
- Make the task manager translatable.
|
||||
- Add Turkish to the list of processes languages.
|
||||
|
||||
|
||||
Removals
|
||||
--------
|
||||
* None
|
||||
|
||||
Upgrading from a previous version
|
||||
---------------------------------
|
||||
|
||||
Using PIP
|
||||
~~~~~~~~~
|
||||
|
||||
Type in the console::
|
||||
|
||||
$ pip install -U mayan-edms
|
||||
|
||||
the requirements will also be updated automatically.
|
||||
|
||||
Using Git
|
||||
~~~~~~~~~
|
||||
|
||||
If you installed Mayan EDMS by cloning the Git repository issue the commands::
|
||||
|
||||
$ git reset --hard HEAD
|
||||
$ git pull
|
||||
|
||||
otherwise download the compressed archived and uncompress it overriding the
|
||||
existing installation.
|
||||
|
||||
Next upgrade/add the new requirements::
|
||||
|
||||
$ pip install --upgrade -r requirements.txt
|
||||
|
||||
Common steps
|
||||
~~~~~~~~~~~~
|
||||
|
||||
Migrate existing database schema with::
|
||||
|
||||
$ mayan-edms.py performupgrade
|
||||
|
||||
Add new static media::
|
||||
|
||||
$ mayan-edms.py collectstatic --noinput
|
||||
|
||||
The upgrade procedure is now complete.
|
||||
|
||||
|
||||
Backward incompatible changes
|
||||
=============================
|
||||
|
||||
* None
|
||||
|
||||
Bugs fixed or issues closed
|
||||
===========================
|
||||
|
||||
* `GitLab issue #215 <https://gitlab.com/mayan-edms/mayan-edms/issues/215>`_ Download text contents
|
||||
|
||||
|
||||
.. _PyPI: https://pypi.python.org/pypi/mayan-edms/
|
||||
@@ -23,7 +23,8 @@ from rest_api.classes import APIEndPoint
|
||||
|
||||
from .handlers import initialize_new_ocr_settings, post_version_upload_ocr
|
||||
from .links import (
|
||||
link_document_content, link_document_ocr_erros_list, link_document_submit,
|
||||
link_document_content, link_document_ocr_download,
|
||||
link_document_ocr_erros_list, link_document_submit,
|
||||
link_document_submit_all, link_document_submit_multiple,
|
||||
link_document_type_ocr_settings, link_document_type_submit,
|
||||
link_entry_list
|
||||
@@ -130,9 +131,13 @@ class OCRApp(MayanAppConfig):
|
||||
links=(link_document_type_ocr_settings,), sources=(DocumentType,)
|
||||
)
|
||||
menu_secondary.bind_links(
|
||||
links=(link_document_content, link_document_ocr_erros_list,),
|
||||
links=(
|
||||
link_document_content, link_document_ocr_erros_list,
|
||||
link_document_ocr_download
|
||||
),
|
||||
sources=(
|
||||
'ocr:document_content', 'ocr:document_ocr_error_list'
|
||||
'ocr:document_content', 'ocr:document_ocr_error_list',
|
||||
'ocr:document_ocr_download',
|
||||
)
|
||||
)
|
||||
menu_secondary.bind_links(
|
||||
|
||||
@@ -41,3 +41,8 @@ link_document_ocr_erros_list = Link(
|
||||
permissions=(permission_ocr_content_view,), text=_('OCR errors'),
|
||||
view='ocr:document_ocr_error_list'
|
||||
)
|
||||
link_document_ocr_download = Link(
|
||||
args='resolved_object.id', icon='fa fa-file-text-o',
|
||||
permissions=(permission_ocr_content_view,), text=_('Download OCR text'),
|
||||
view='ocr:document_ocr_download'
|
||||
)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.test import override_settings
|
||||
from django.utils.encoding import force_text
|
||||
|
||||
from documents.tests.test_views import GenericDocumentViewTestCase
|
||||
|
||||
from ..permissions import permission_ocr_content_view
|
||||
from ..utils import get_document_ocr_content
|
||||
|
||||
|
||||
@override_settings(OCR_AUTO_OCR=True)
|
||||
@@ -35,3 +37,26 @@ class OCRViewsTestCase(GenericDocumentViewTestCase):
|
||||
self.assertContains(
|
||||
response, 'Mayan EDMS Documentation', status_code=200
|
||||
)
|
||||
|
||||
def test_document_ocr_download_view_no_permission(self):
|
||||
response = self.get(
|
||||
'ocr:document_ocr_download', args=(self.document.pk,)
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 403)
|
||||
|
||||
def test_document_download_view_with_permission(self):
|
||||
self.expected_content_type = 'application/octet-stream; charset=utf-8'
|
||||
|
||||
self.grant(permission=permission_ocr_content_view)
|
||||
response = self.get(
|
||||
'ocr:document_ocr_download', args=(self.document.pk,)
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
self.assert_download_response(
|
||||
response, content=(
|
||||
''.join(get_document_ocr_content(document=self.document))
|
||||
),
|
||||
)
|
||||
|
||||
@@ -6,9 +6,9 @@ from .api_views import (
|
||||
APIDocumentOCRView, APIDocumentPageContentView, APIDocumentVersionOCRView
|
||||
)
|
||||
from .views import (
|
||||
DocumentAllSubmitView, DocumentOCRContent, DocumentOCRErrorsListView,
|
||||
DocumentSubmitView, DocumentSubmitManyView, DocumentTypeSettingsEditView,
|
||||
DocumentTypeSubmitView, EntryListView
|
||||
DocumentAllSubmitView, DocumentOCRContent, DocumentOCRDownloadView,
|
||||
DocumentOCRErrorsListView, DocumentSubmitView, DocumentSubmitManyView,
|
||||
DocumentTypeSettingsEditView, DocumentTypeSubmitView, EntryListView
|
||||
)
|
||||
|
||||
urlpatterns = [
|
||||
@@ -41,6 +41,10 @@ urlpatterns = [
|
||||
r'^documents/(?P<pk>\d+)/ocr/errors/$',
|
||||
DocumentOCRErrorsListView.as_view(), name='document_ocr_error_list'
|
||||
),
|
||||
url(
|
||||
r'^documents/(?P<pk>\d+)/ocr/download/$',
|
||||
DocumentOCRDownloadView.as_view(), name='document_ocr_download'
|
||||
),
|
||||
url(r'^all/$', EntryListView.as_view(), name='entry_list'),
|
||||
]
|
||||
|
||||
|
||||
14
mayan/apps/ocr/utils.py
Normal file
14
mayan/apps/ocr/utils.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.utils.encoding import force_unicode
|
||||
from django.utils.html import conditional_escape
|
||||
|
||||
|
||||
def get_document_ocr_content(document):
|
||||
for page in document.pages.all():
|
||||
try:
|
||||
page_content = page.ocr_content.content
|
||||
except DocumentPageContent.DoesNotExist:
|
||||
pass
|
||||
else:
|
||||
yield conditional_escape(force_unicode(page_content))
|
||||
@@ -8,8 +8,8 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from acls.models import AccessControlList
|
||||
from common.generics import (
|
||||
ConfirmView, FormView, SingleObjectDetailView, SingleObjectEditView,
|
||||
SingleObjectListView
|
||||
ConfirmView, FormView, SingleObjectDetailView, SingleObjectDownloadView,
|
||||
SingleObjectEditView, SingleObjectListView
|
||||
)
|
||||
from common.mixins import MultipleInstanceActionMixin
|
||||
from documents.models import Document, DocumentType
|
||||
@@ -20,6 +20,7 @@ from .permissions import (
|
||||
permission_ocr_content_view, permission_ocr_document,
|
||||
permission_document_type_ocr_setup
|
||||
)
|
||||
from .utils import get_document_ocr_content
|
||||
|
||||
|
||||
class DocumentAllSubmitView(ConfirmView):
|
||||
@@ -174,3 +175,16 @@ class DocumentOCRErrorsListView(SingleObjectListView):
|
||||
|
||||
def get_queryset(self):
|
||||
return self.get_document().latest_version.ocr_errors.all()
|
||||
|
||||
|
||||
class DocumentOCRDownloadView(SingleObjectDownloadView):
|
||||
model = Document
|
||||
object_permission = permission_ocr_content_view
|
||||
|
||||
def get_file(self):
|
||||
file_object = DocumentOCRDownloadView.TextIteratorIO(
|
||||
iterator=get_document_ocr_content(document=self.get_object())
|
||||
)
|
||||
return DocumentOCRDownloadView.VirtualFile(
|
||||
file=file_object, name='{}-OCR'.format(self.get_object())
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user