2017-07-01 01:07:23 -04:00
parent 610e10e85a
commit 916c3497c4
8 changed files with 165 additions and 9 deletions
							
							
							
						
@@ -1,3 +1,8 @@
2.5 (2017-07-XX)
===============
- Add view to download a document's OCR text. GitLab #215
2.4 (2017-06-23)
================
- Add Django-mathfilters.
							
							
							
								
							
						
@@ -9,7 +14,7 @@
- Make tags, metadata types and cabinets searchable via the dynamic search API. GitLab issue #344.
- Add support for updating configuration options from environment variables.
- Add purgelocks management command. GitLab issue #221.
- Fix index rebuilding for multi value first levels. GitLab issue #391. 
- Fix index rebuilding for multi value first levels. GitLab issue #391.
- Truncate views titles via the APPEARANCE_MAXIMUM_TITLE_LENGTH setting. GitLab issue #217.
- Add background task manager app. GitLab issue #132.
- Add link to show a document's OCR errors. GitLab issue #291.
							
								
							
							
							
						
 
							
							
							
						
@@ -0,0 +1,84 @@
=============================
Mayan EDMS v2.5 release notes
=============================
Released: July XX, 2017
What's new
==========
Other Changes
-------------
- Add view to download a document's OCR text. GitLab issue #215.
- Add missing OCR migration.
- Improve error output of the performupgrade command to debug upgrade errors
  that could stop an upgrade (missing document files, etc).
- Enable the django-mathfilters app added in version 2.4.
- Do a complete pull and synchronization of the translations to fix missing
  translations for Polish. Thanks to Wojtek Warczakowski for the report.
- Allow null for the SANE source resolution field. Even though the field was
  marked as allowing blank values it was failing because it is a number field
  and number fields need to allow explicit null values when left blank.
- Rename the mayan_task_manager app to task_manager.
- Make the task manager translatable.
- Add Turkish to the list of processes languages.
Removals
--------
* None
Upgrading from a previous version
---------------------------------
Using PIP
~~~~~~~~~
Type in the console::
    $ pip install -U mayan-edms
the requirements will also be updated automatically.
Using Git
~~~~~~~~~
If you installed Mayan EDMS by cloning the Git repository issue the commands::
    $ git reset --hard HEAD
    $ git pull
otherwise download the compressed archived and uncompress it overriding the
existing installation.
Next upgrade/add the new requirements::
    $ pip install --upgrade -r requirements.txt
Common steps
~~~~~~~~~~~~
Migrate existing database schema with::
    $ mayan-edms.py performupgrade
Add new static media::
    $ mayan-edms.py collectstatic --noinput
The upgrade procedure is now complete.
Backward incompatible changes
=============================
* None
Bugs fixed or issues closed
===========================
* `GitLab issue #215 <https://gitlab.com/mayan-edms/mayan-edms/issues/215>`_ Download text contents
.. _PyPI: https://pypi.python.org/pypi/mayan-edms/
							
							
								
							
							
						
@@ -23,7 +23,8 @@ from rest_api.classes import APIEndPoint
from .handlers import initialize_new_ocr_settings, post_version_upload_ocr
from .links import (
    link_document_content, link_document_ocr_erros_list, link_document_submit,
    link_document_content, link_document_ocr_download,
    link_document_ocr_erros_list, link_document_submit,
    link_document_submit_all, link_document_submit_multiple,
    link_document_type_ocr_settings, link_document_type_submit,
    link_entry_list
							
								
							
							
								
							
							
						
@@ -130,9 +131,13 @@ class OCRApp(MayanAppConfig):
            links=(link_document_type_ocr_settings,), sources=(DocumentType,)
        )
        menu_secondary.bind_links(
            links=(link_document_content, link_document_ocr_erros_list,),
            links=(
                link_document_content, link_document_ocr_erros_list,
                link_document_ocr_download
            ),
            sources=(
                'ocr:document_content', 'ocr:document_ocr_error_list'
                'ocr:document_content', 'ocr:document_ocr_error_list',
                'ocr:document_ocr_download',
            )
        )
        menu_secondary.bind_links(
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -41,3 +41,8 @@ link_document_ocr_erros_list = Link(
    permissions=(permission_ocr_content_view,), text=_('OCR errors'),
    view='ocr:document_ocr_error_list'
)
link_document_ocr_download = Link(
    args='resolved_object.id', icon='fa fa-file-text-o',
    permissions=(permission_ocr_content_view,), text=_('Download OCR text'),
    view='ocr:document_ocr_download'
)
							
							
							
						
 
							
							
							
						
@@ -1,10 +1,12 @@
from __future__ import unicode_literals
from django.test import override_settings
from django.utils.encoding import force_text
from documents.tests.test_views import GenericDocumentViewTestCase
from ..permissions import permission_ocr_content_view
from ..utils import get_document_ocr_content
@override_settings(OCR_AUTO_OCR=True)
							
								
							
							
								
							
							
						
@@ -35,3 +37,26 @@ class OCRViewsTestCase(GenericDocumentViewTestCase):
        self.assertContains(
            response, 'Mayan EDMS Documentation', status_code=200
        )
    def test_document_ocr_download_view_no_permission(self):
        response = self.get(
            'ocr:document_ocr_download', args=(self.document.pk,)
        )
        self.assertEqual(response.status_code, 403)
    def test_document_download_view_with_permission(self):
        self.expected_content_type = 'application/octet-stream; charset=utf-8'
        self.grant(permission=permission_ocr_content_view)
        response = self.get(
            'ocr:document_ocr_download', args=(self.document.pk,)
        )
        self.assertEqual(response.status_code, 200)
        self.assert_download_response(
            response, content=(
                ''.join(get_document_ocr_content(document=self.document))
            ),
        )
							
							
							
						
 
							
							
								
							
							
						
@@ -6,9 +6,9 @@ from .api_views import (
    APIDocumentOCRView, APIDocumentPageContentView, APIDocumentVersionOCRView
)
from .views import (
    DocumentAllSubmitView, DocumentOCRContent, DocumentOCRErrorsListView,
    DocumentSubmitView, DocumentSubmitManyView, DocumentTypeSettingsEditView,
    DocumentTypeSubmitView, EntryListView
    DocumentAllSubmitView, DocumentOCRContent, DocumentOCRDownloadView,
    DocumentOCRErrorsListView, DocumentSubmitView, DocumentSubmitManyView,
    DocumentTypeSettingsEditView, DocumentTypeSubmitView, EntryListView
)
urlpatterns = [
							
								
							
							
								
							
							
						
@@ -41,6 +41,10 @@ urlpatterns = [
        r'^documents/(?P<pk>\d+)/ocr/errors/$',
        DocumentOCRErrorsListView.as_view(), name='document_ocr_error_list'
    ),
    url(
        r'^documents/(?P<pk>\d+)/ocr/download/$',
        DocumentOCRDownloadView.as_view(), name='document_ocr_download'
    ),
    url(r'^all/$', EntryListView.as_view(), name='entry_list'),
]
							
								
							
							
							
						
 
							
							
							
						
@@ -0,0 +1,14 @@
from __future__ import unicode_literals
from django.utils.encoding import force_unicode
from django.utils.html import conditional_escape
def get_document_ocr_content(document):
    for page in document.pages.all():
        try:
            page_content = page.ocr_content.content
        except DocumentPageContent.DoesNotExist:
            pass
        else:
            yield conditional_escape(force_unicode(page_content))
							
							
								
							
							
						
@@ -8,8 +8,8 @@ from django.utils.translation import ugettext_lazy as _
from acls.models import AccessControlList
from common.generics import (
    ConfirmView, FormView, SingleObjectDetailView, SingleObjectEditView,
    SingleObjectListView
    ConfirmView, FormView, SingleObjectDetailView, SingleObjectDownloadView,
    SingleObjectEditView, SingleObjectListView
)
from common.mixins import MultipleInstanceActionMixin
from documents.models import Document, DocumentType
							
							
							
								
							
						
@@ -20,6 +20,7 @@ from .permissions import (
    permission_ocr_content_view, permission_ocr_document,
    permission_document_type_ocr_setup
)
from .utils import get_document_ocr_content
class DocumentAllSubmitView(ConfirmView):
							
								
							
							
								
							
							
						
@@ -174,3 +175,16 @@ class DocumentOCRErrorsListView(SingleObjectListView):
    def get_queryset(self):
        return self.get_document().latest_version.ocr_errors.all()
class DocumentOCRDownloadView(SingleObjectDownloadView):
    model = Document
    object_permission = permission_ocr_content_view
    def get_file(self):
        file_object = DocumentOCRDownloadView.TextIteratorIO(
            iterator=get_document_ocr_content(document=self.get_object())
        )
        return DocumentOCRDownloadView.VirtualFile(
            file=file_object, name='{}-OCR'.format(self.get_object())
        )