Fix tests

Signed-off-by: Roberto Rosario <roberto.rosario@mayan-edms.com>
This commit is contained in:
Roberto Rosario
2019-10-09 21:06:38 -04:00
parent a9077cb47a
commit cf697d3ea7
16 changed files with 215 additions and 88 deletions

View File

@@ -19,10 +19,24 @@ class Migration(migrations.Migration):
name='document_page',
field=models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
name='document_version_page',
related_name='content',
to='documents.DocumentVersionPage',
verbose_name='Document version page'
),
),
migrations.RenameField(
model_name='documentversionpagecontent',
old_name='document_page',
new_name='document_version_page',
),
migrations.AlterModelOptions(
name='documentversionpagecontent',
options={
'verbose_name': 'Document version page content',
'verbose_name_plural': 'Document version pages contents'
},
),
]

View File

@@ -64,8 +64,8 @@ document_page_search = SearchModel(
document_version_page_search = SearchModel(
app_label='documents', list_mode=LIST_MODE_CHOICE_ITEM,
model_name='DocumentVersionPage', permission=permission_document_view,
#queryset=get_queryset_document_version_page_search_queryset,
serializer_path='mayan.apps.documents.serializers.DocumentPageVersionSerializer'
queryset=get_queryset_document_version_page_search_queryset,
serializer_path='mayan.apps.documents.serializers.DocumentVersionPageSerializer'
)
document_page_search.add_model_field(

View File

@@ -10,7 +10,8 @@ from .models import (
@admin.register(DocumentVersionPageOCRContent)
class DocumentVersionPageOCRContentAdmin(admin.ModelAdmin):
list_display = ('document_version_page',)
pass
#list_display = ('document_page',)
@admin.register(DocumentTypeSettings)

View File

@@ -34,17 +34,19 @@ from .links import (
link_document_ocr_content_delete_multiple, link_document_ocr_download,
link_document_ocr_errors_list, link_document_submit,
link_document_submit_multiple, link_document_type_ocr_settings,
link_document_type_submit, link_entry_list
link_document_type_submit, link_document_version_page_ocr_content,
link_entry_list
)
from .methods import (
method_document_ocr_submit, method_document_version_ocr_submit
method_document_ocr_submit, method_document_page_get_ocr_content,
method_document_version_ocr_submit
)
from .permissions import (
permission_document_type_ocr_setup, permission_ocr_document,
permission_ocr_content_view
)
from .signals import post_document_version_ocr
from .utils import get_document_ocr_content
from .utils import get_document_ocr_content, get_document_version_ocr_content
logger = logging.getLogger(__name__)
@@ -75,6 +77,9 @@ class OCRApp(MayanAppConfig):
DocumentVersion = apps.get_model(
app_label='documents', model_name='DocumentVersion'
)
DocumentVersionPage = apps.get_model(
app_label='documents', model_name='DocumentVersionPage'
)
DocumentVersionOCRError = self.get_model(
model_name='DocumentVersionOCRError'
@@ -83,8 +88,11 @@ class OCRApp(MayanAppConfig):
Document.add_to_class(
name='submit_for_ocr', value=method_document_ocr_submit
)
DocumentPage.add_to_class(
name='get_ocr_content', value=method_document_page_get_ocr_content
)
DocumentVersion.add_to_class(
name='ocr_content', value=get_document_ocr_content
name='ocr_content', value=get_document_version_ocr_content
)
DocumentVersion.add_to_class(
name='submit_for_ocr', value=method_document_version_ocr_submit
@@ -98,9 +106,9 @@ class OCRApp(MayanAppConfig):
)
)
#ModelField(
# model=Document, name='versions__pages__ocr_content__content'
#)
ModelField(
model=Document, name='versions__pages__ocr_content__content'
)
ModelPermission.register(
model=Document, permissions=(
@@ -145,6 +153,10 @@ class OCRApp(MayanAppConfig):
menu_list_facet.bind_links(
links=(link_document_page_ocr_content,), sources=(DocumentPage,)
)
menu_list_facet.bind_links(
links=(link_document_version_page_ocr_content,),
sources=(DocumentVersionPage,)
)
menu_list_facet.bind_links(
links=(link_document_type_ocr_settings,), sources=(DocumentType,)
)

View File

@@ -28,15 +28,26 @@ class DocumentPageOCRContentForm(forms.Form):
content = ''
self.fields['contents'].initial = ''
try:
page_content = page.ocr_content.content
except DocumentVersionPageOCRContent.DoesNotExist:
pass
else:
content = conditional_escape(force_text(page_content))
content = conditional_escape(
force_text(self.get_instance_ocr_content(instance=page))
)
self.fields['contents'].initial = mark_safe(content)
def get_instance_ocr_content(self, instance):
try:
return instance.content_object.ocr_content.content
except DocumentVersionPageOCRContent.DoesNotExist:
return ''
class DocumentVersionPageOCRContentForm(DocumentPageOCRContentForm):
def get_instance_ocr_content(self, instance):
try:
return instance.ocr_content.content
except (AttributeError, DocumentVersionPageOCRContent.DoesNotExist):
return ''
class DocumentOCRContentForm(forms.Form):
"""
@@ -54,19 +65,15 @@ class DocumentOCRContentForm(forms.Form):
)
def __init__(self, *args, **kwargs):
self.document = kwargs.pop('instance', None)
document = kwargs.pop('instance', None)
super(DocumentOCRContentForm, self).__init__(*args, **kwargs)
content = []
self.fields['contents'].initial = ''
try:
document_pages = self.document.pages.all()
except AttributeError:
document_pages = []
for page in document_pages:
for document_page in document.pages.all():
try:
page_content = page.ocr_content.content
except DocumentVersionPageOCRContent.DoesNotExist:
page_content = document_page.content_object.ocr_content.content
except (AttributeError, DocumentVersionPageOCRContent.DoesNotExist):
pass
else:
content.append(conditional_escape(force_text(page_content)))
@@ -74,7 +81,7 @@ class DocumentOCRContentForm(forms.Form):
'\n\n\n<hr/><div class="document-page-content-divider">- %s -</div><hr/>\n\n\n' % (
ugettext(
'Page %(page_number)d'
) % {'page_number': page.page_number}
) % {'page_number': document_page.page_number}
)
)

View File

@@ -19,7 +19,7 @@ icon_document_ocr_errors_list = Icon(
icon_document_type_ocr_settings = Icon(
driver_name='fontawesome', symbol='font'
)
icon_document_type_submit = Icon(driver_name='fontawesome', symbol='font')
icon_entry_list = Icon(driver_name='fontawesome', symbol='font')
icon_document_submit = icon_document_multiple_submit
icon_document_type_submit = Icon(driver_name='fontawesome', symbol='font')
icon_document_version_page_ocr_content = Icon(driver_name='fontawesome', symbol='font')
icon_entry_list = Icon(driver_name='fontawesome', symbol='font')

View File

@@ -15,7 +15,7 @@ def is_document_page_disabled(context):
link_document_page_ocr_content = Link(
args='resolved_object.id', conditional_disable=is_document_page_disabled,
args='resolved_object.id',# conditional_disable=is_document_page_disabled,
icon_class_path='mayan.apps.ocr.icons.icon_document_page_ocr_content',
permissions=(permission_ocr_content_view,), text=_('OCR'),
view='ocr:document_page_ocr_content',
@@ -58,10 +58,11 @@ link_document_type_submit = Link(
permissions=(permission_ocr_document,), text=_('OCR documents per type'),
view='ocr:document_type_submit'
)
link_entry_list = Link(
icon_class_path='mayan.apps.ocr.icons.icon_entry_list',
permissions=(permission_ocr_document,), text=_('OCR errors'),
view='ocr:entry_list'
link_document_version_page_ocr_content = Link(
args='resolved_object.id',
icon_class_path='mayan.apps.ocr.icons.icon_document_version_page_ocr_content',
permissions=(permission_ocr_content_view,), text=_('OCR'),
view='ocr:document_version_page_ocr_content',
)
link_document_ocr_errors_list = Link(
args='resolved_object.id',
@@ -75,3 +76,8 @@ link_document_ocr_download = Link(
permissions=(permission_ocr_content_view,), text=_('Download OCR text'),
view='ocr:document_ocr_download'
)
link_entry_list = Link(
icon_class_path='mayan.apps.ocr.icons.icon_entry_list',
permissions=(permission_ocr_document,), text=_('OCR errors'),
view='ocr:entry_list'
)

View File

@@ -9,7 +9,9 @@ from django.conf import settings
from django.db import models, transaction
from mayan.apps.documents.literals import DOCUMENT_IMAGE_TASK_TIMEOUT
from mayan.apps.documents.tasks import task_generate_document_page_image
from mayan.apps.documents.tasks import (
task_generate_document_version_page_image
)
from .events import (
event_ocr_document_content_deleted, event_ocr_document_version_finish
@@ -24,25 +26,28 @@ class DocumentPageOCRContentManager(models.Manager):
def delete_content_for(self, document, user=None):
with transaction.atomic():
for document_page in document.pages.all():
self.filter(document_page=document_page).delete()
self.filter(
document_version_page=document_page.content_object
).delete()
event_ocr_document_content_deleted.commit(
actor=user, target=document
)
def process_document_page(self, document_page):
def process_document_version_page(self, document_version_page):
logger.info(
'Processing page: %d of document version: %s',
document_page.page_number, document_page.document_version
document_version_page.page_number,
document_version_page.document_version
)
DocumentPageOCRContent = apps.get_model(
app_label='ocr', model_name='DocumentPageOCRContent'
DocumentVersionPageOCRContent = apps.get_model(
app_label='ocr', model_name='DocumentVersionPageOCRContent'
)
task = task_generate_document_page_image.apply_async(
task = task_generate_document_version_page_image.apply_async(
kwargs=dict(
document_page_id=document_page.pk
document_version_page_id=document_version_page.pk
)
)
@@ -50,19 +55,20 @@ class DocumentPageOCRContentManager(models.Manager):
timeout=DOCUMENT_IMAGE_TASK_TIMEOUT, disable_sync_subtasks=False
)
with document_page.cache_partition.get_file(filename=cache_filename).open() as file_object:
document_page_content, created = DocumentPageOCRContent.objects.get_or_create(
document_page=document_page
with document_version_page.cache_partition.get_file(filename=cache_filename).open() as file_object:
document_version_page_content, created = DocumentVersionPageOCRContent.objects.get_or_create(
document_version_page=document_version_page
)
document_page_content.content = ocr_backend.execute(
document_version_page_content.content = ocr_backend.execute(
file_object=file_object,
language=document_page.document.language
language=document_version_page.document.language
)
document_page_content.save()
document_version_page_content.save()
logger.info(
'Finished processing page: %d of document version: %s',
document_page.page_number, document_page.document_version
document_version_page.page_number,
document_version_page.document_version
)
def process_document_version(self, document_version):
@@ -70,8 +76,10 @@ class DocumentPageOCRContentManager(models.Manager):
logger.debug('document version: %d', document_version.pk)
try:
for document_page in document_version.pages.all():
self.process_document_page(document_page=document_page)
for document_version_page in document_version.pages.all():
self.process_document_version_page(
document_version_page=document_version_page
)
except Exception as exception:
logger.error(
'OCR error for document version: %d; %s', document_version.pk,

View File

@@ -2,6 +2,7 @@ from __future__ import unicode_literals
from datetime import timedelta
from django.apps import apps
from django.utils.timezone import now
from mayan.apps.common.settings import settings_db_sync_task_delay
@@ -17,6 +18,17 @@ def method_document_ocr_submit(self):
latest_version.submit_for_ocr()
def method_document_page_get_ocr_content(self):
DocumentVersionPageOCRContent = apps.get_model(
app_label='ocr', model_name='DocumentVersionPageOCRContent'
)
try:
return self.content_object.ocr_content.content
except (AttributeError, DocumentVersionPageOCRContent.DoesNotExist):
return None
def method_document_version_ocr_submit(self):
event_ocr_document_version_submit.commit(
action_object=self.document, target=self

View File

@@ -19,10 +19,24 @@ class Migration(migrations.Migration):
name='document_page',
field=models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
name='document_version_page',
#name='document_version_page',
related_name='ocr_content',
to='documents.DocumentVersionPage',
verbose_name='Document version page'
),
),
migrations.RenameField(
model_name='documentversionpageocrcontent',
old_name='document_page',
new_name='document_version_page',
),
migrations.AlterModelOptions(
name='documentversionpageocrcontent',
options={
'verbose_name': 'Document version page OCR content',
'verbose_name_plural': 'Document version pages OCR contents'
},
),
]

View File

@@ -12,13 +12,34 @@ from ..permissions import (
from .literals import TEST_DOCUMENT_CONTENT
class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase):
class OCRAPIViewTestMixin(object):
def _request_document_ocr_submit_view(self):
return self.post(
viewname='rest_api:document-ocr-submit-view',
kwargs={'pk': self.test_document.pk}
)
def _request_document_version_ocr_submit_view(self):
return self.post(
viewname='rest_api:document-version-ocr-submit-view', kwargs={
'document_pk': self.test_document.pk,
'version_pk': self.test_document.latest_version.pk
}
)
def _request_document_version_page_content_view(self):
return self.get(
viewname='rest_api:document-page-ocr-content-view', kwargs={
'document_pk': self.test_document.pk,
'version_pk': self.test_document.latest_version.pk,
'page_pk': self.test_document.latest_version.pages.first().pk,
}
)
class OCRAPIViewTestCase(
OCRAPIViewTestMixin, DocumentTestMixin, BaseAPITestCase
):
def test_submit_document_no_access(self):
response = self._request_document_ocr_submit_view()
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
@@ -35,15 +56,9 @@ class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase):
self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED)
self.assertTrue(
hasattr(self.test_document.pages.first(), 'ocr_content')
)
def _request_document_version_ocr_submit_view(self):
return self.post(
viewname='rest_api:document-version-ocr-submit-view', kwargs={
'document_pk': self.test_document.pk,
'version_pk': self.test_document.latest_version.pk
}
hasattr(
self.test_document.pages.first().content_type, 'ocr_content'
)
)
def test_submit_document_version_no_access(self):
@@ -62,16 +77,7 @@ class OCRAPITestCase(DocumentTestMixin, BaseAPITestCase):
self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED)
self.assertTrue(
hasattr(self.test_document.pages.first(), 'ocr_content')
)
def _request_document_version_page_content_view(self):
return self.get(
viewname='rest_api:document-page-ocr-content-view', kwargs={
'document_pk': self.test_document.pk,
'version_pk': self.test_document.latest_version.pk,
'page_pk': self.test_document.latest_version.pages.first().pk,
}
hasattr(self.test_document_version.pages.first(), 'ocr_content')
)
def test_get_document_version_page_content_no_access(self):

View File

@@ -19,7 +19,7 @@ class DocumentOCRTestCase(DocumentTestMixin, BaseTestCase):
_skip_file_descriptor_test = True
def test_ocr_language_backends_end(self):
content = self.test_document.pages.first().ocr_content.content
content = self.test_document.pages.first().get_ocr_content()
self.assertTrue(TEST_DOCUMENT_CONTENT in content)

View File

@@ -30,7 +30,7 @@ class OCRViewTestMixin(object):
def _request_document_version_page_content_view(self):
return self.get(
viewname='ocr:document_version_page_ocr_content', kwargs={
'pk': self.test_document.pages.first().pk
'pk': self.test_document_version.pages.first().pk
}
)
@@ -87,7 +87,7 @@ class OCRViewsTestCase(OCRViewTestMixin, GenericDocumentViewTestCase):
self.assertTrue(
DocumentVersionPageOCRContent.objects.filter(
document_version_page=self.test_document.pages.first()
document_version_page=self.test_document.pages.first().content_object
).exists()
)
@@ -102,7 +102,7 @@ class OCRViewsTestCase(OCRViewTestMixin, GenericDocumentViewTestCase):
self.assertFalse(
DocumentVersionPageOCRContent.objects.filter(
document_version_page=self.test_document.pages.first()
document_version_page=self.test_document.pages.first().content_object
).exists()
)

View File

@@ -8,9 +8,10 @@ from .api_views import (
)
from .views import (
DocumentOCRContentDeleteView, DocumentOCRContentView,
DocumentOCRDownloadView,
DocumentOCRErrorsListView, DocumentPageOCRContentView, DocumentSubmitView,
DocumentTypeSettingsEditView, DocumentTypeSubmitView, EntryListView
DocumentOCRDownloadView, DocumentOCRErrorsListView,
DocumentPageOCRContentView, DocumentSubmitView,
DocumentTypeSettingsEditView, DocumentTypeSubmitView,
DocumentVersionPageOCRContentView, EntryListView
)
urlpatterns = [
@@ -50,6 +51,11 @@ urlpatterns = [
view=DocumentPageOCRContentView.as_view(),
name='document_page_ocr_content'
),
url(
regex=r'^documents/versions/pages/(?P<pk>\d+)/content/$',
view=DocumentVersionPageOCRContentView.as_view(),
name='document_version_page_ocr_content'
),
url(
regex=r'^document_types/submit/$',
view=DocumentTypeSubmitView.as_view(), name='document_type_submit'

View File

@@ -4,15 +4,29 @@ from django.apps import apps
from django.utils.encoding import force_text
def get_document_ocr_content(document):
DocumentPageOCRContent = apps.get_model(
app_label='ocr', model_name='DocumentPageOCRContent'
def get_document_version_ocr_content(document_version):
DocumentVersionPageOCRContent = apps.get_model(
app_label='ocr', model_name='DocumentVersionPageOCRContent'
)
for page in document.pages.all():
for document_version_page in document_version.pages.all():
try:
page_content = page.ocr_content.content
except DocumentPageOCRContent.DoesNotExist:
page_content = document_version_page.ocr_content.content
except DocumentVersionPageOCRContent.DoesNotExist:
pass
else:
yield force_text(page_content)
def get_document_ocr_content(document):
DocumentVersionPageOCRContent = apps.get_model(
app_label='ocr', model_name='DocumentVersionPageOCRContent'
)
for document_page in document.pages.all():
try:
page_content = document_page.content_object.ocr_content.content
except (AttributeError, DocumentVersionPageOCRContent.DoesNotExist):
pass
else:
yield force_text(page_content)

View File

@@ -13,10 +13,13 @@ from mayan.apps.common.generics import (
from mayan.apps.common.mixins import ExternalObjectMixin
from mayan.apps.documents.forms import DocumentTypeFilteredSelectForm
from mayan.apps.documents.models import (
Document, DocumentType, DocumentVersionPage
Document, DocumentPage, DocumentType, DocumentVersionPage
)
from .forms import DocumentPageOCRContentForm, DocumentOCRContentForm
from .forms import (
DocumentPageOCRContentForm, DocumentOCRContentForm,
DocumentVersionPageOCRContentForm
)
from .models import DocumentVersionPageOCRContent, DocumentVersionOCRError
from .permissions import (
permission_ocr_content_view, permission_ocr_document,
@@ -76,7 +79,7 @@ class DocumentOCRContentView(SingleObjectDetailView):
class DocumentPageOCRContentView(SingleObjectDetailView):
form_class = DocumentPageOCRContentForm
model = DocumentVersionPage
model = DocumentPage
object_permission = permission_ocr_content_view
def dispatch(self, request, *args, **kwargs):
@@ -96,6 +99,30 @@ class DocumentPageOCRContentView(SingleObjectDetailView):
}
class DocumentVersionPageOCRContentView(SingleObjectDetailView):
form_class = DocumentVersionPageOCRContentForm
model = DocumentVersionPage
object_permission = permission_ocr_content_view
def dispatch(self, request, *args, **kwargs):
result = super(DocumentVersionPageOCRContentView, self).dispatch(
request, *args, **kwargs
)
self.get_object().document.add_as_recent_document_for_user(
user=request.user
)
return result
def get_extra_context(self):
return {
'hide_labels': True,
'object': self.get_object(),
'title': _(
'OCR result for document version page: %s'
) % self.get_object(),
}
class DocumentSubmitView(MultipleObjectConfirmActionView):
model = Document
object_permission = permission_ocr_document