Update OCR app to use organizations.
This commit is contained in:
@@ -16,7 +16,7 @@ class APIDocumentOCRView(generics.GenericAPIView):
|
||||
'POST': (permission_ocr_document,)
|
||||
}
|
||||
permission_classes = (MayanPermission,)
|
||||
queryset = Document.objects.all()
|
||||
queryset = Document.on_organization.all()
|
||||
|
||||
def get_serializer_class(self):
|
||||
return None
|
||||
@@ -44,7 +44,7 @@ class APIDocumentVersionOCRView(generics.GenericAPIView):
|
||||
'POST': (permission_ocr_document,)
|
||||
}
|
||||
permission_classes = (MayanPermission,)
|
||||
queryset = DocumentVersion.objects.all()
|
||||
queryset = DocumentVersion.on_organization.all()
|
||||
|
||||
def get_serializer_class(self):
|
||||
return None
|
||||
@@ -83,7 +83,7 @@ class APIDocumentPageContentView(generics.RetrieveAPIView):
|
||||
}
|
||||
permission_classes = (MayanPermission,)
|
||||
serializer_class = DocumentPageContentSerializer
|
||||
queryset = DocumentPage.objects.all()
|
||||
queryset = DocumentPage.on_organization.all()
|
||||
|
||||
def retrieve(self, request, *args, **kwargs):
|
||||
instance = self.get_object()
|
||||
|
||||
@@ -60,7 +60,7 @@ class OCRBackendBase(object):
|
||||
image = document_page.get_image()
|
||||
|
||||
try:
|
||||
document_page_content, created = DocumentPageContent.objects.get_or_create(
|
||||
document_page_content, created = DocumentPageContent.on_organization.get_or_create(
|
||||
document_page=document_page
|
||||
)
|
||||
document_page_content.content = self.execute(
|
||||
|
||||
@@ -57,5 +57,5 @@ class DocumentContentForm(forms.Form):
|
||||
|
||||
class DocumentTypeSelectForm(forms.Form):
|
||||
document_type = forms.ModelChoiceField(
|
||||
queryset=DocumentType.objects.all(), label=('Document type')
|
||||
queryset=DocumentType.on_organization.all(), label=('Document type')
|
||||
)
|
||||
|
||||
@@ -20,6 +20,6 @@ def initialize_new_ocr_settings(sender, instance, **kwargs):
|
||||
DocumentTypeSettings = get_model('ocr', 'DocumentTypeSettings')
|
||||
|
||||
if kwargs['created']:
|
||||
DocumentTypeSettings.objects.create(
|
||||
DocumentTypeSettings.on_organization.create(
|
||||
document_type=instance, auto_ocr=setting_auto_ocr.value
|
||||
)
|
||||
|
||||
37
mayan/apps/ocr/managers.py
Normal file
37
mayan/apps/ocr/managers.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.apps import apps
|
||||
from django.db import models
|
||||
|
||||
|
||||
class OrganizationDocumentTypeSettingsManager(models.Manager):
|
||||
def get_queryset(self):
|
||||
DocumentType = apps.get_model('documents', 'DocumentType')
|
||||
|
||||
return super(
|
||||
OrganizationDocumentTypeSettingsManager, self
|
||||
).get_queryset().filter(
|
||||
document_type__in=DocumentType.on_organization.all(),
|
||||
)
|
||||
|
||||
|
||||
class OrganizationDocumentVersionOCRErrorManager(models.Manager):
|
||||
def get_queryset(self):
|
||||
DocumentVersion = apps.get_model('documents', 'DocumentVersion')
|
||||
|
||||
return super(
|
||||
OrganizationDocumentVersionOCRErrorManager, self
|
||||
).get_queryset().filter(
|
||||
document_version__in=DocumentVersion.on_organization.all(),
|
||||
)
|
||||
|
||||
|
||||
class OrganizationDocumentPageContentManager(models.Manager):
|
||||
def get_queryset(self):
|
||||
DocumentPage = apps.get_model('documents', 'DocumentPage')
|
||||
|
||||
return super(
|
||||
OrganizationDocumentPageContentManager, self
|
||||
).get_queryset().filter(
|
||||
document_page__in=DocumentPage.on_organization.all(),
|
||||
)
|
||||
@@ -6,6 +6,12 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from documents.models import DocumentPage, DocumentType, DocumentVersion
|
||||
|
||||
from .managers import (
|
||||
OrganizationDocumentTypeSettingsManager,
|
||||
OrganizationDocumentVersionOCRErrorManager,
|
||||
OrganizationDocumentPageContentManager
|
||||
)
|
||||
|
||||
|
||||
class DocumentTypeSettings(models.Model):
|
||||
"""
|
||||
@@ -20,6 +26,9 @@ class DocumentTypeSettings(models.Model):
|
||||
verbose_name=_('Automatically queue newly created documents for OCR.')
|
||||
)
|
||||
|
||||
objects = models.Manager()
|
||||
on_organization = OrganizationDocumentTypeSettingsManager()
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Document type settings')
|
||||
verbose_name_plural = _('Document types settings')
|
||||
@@ -35,14 +44,17 @@ class DocumentVersionOCRError(models.Model):
|
||||
)
|
||||
result = models.TextField(blank=True, null=True, verbose_name=_('Result'))
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self.document_version)
|
||||
objects = models.Manager()
|
||||
on_organization = OrganizationDocumentVersionOCRErrorManager()
|
||||
|
||||
class Meta:
|
||||
ordering = ('datetime_submitted',)
|
||||
verbose_name = _('Document Version OCR Error')
|
||||
verbose_name_plural = _('Document Version OCR Errors')
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self.document_version)
|
||||
|
||||
|
||||
@python_2_unicode_compatible
|
||||
class DocumentPageContent(models.Model):
|
||||
@@ -55,9 +67,12 @@ class DocumentPageContent(models.Model):
|
||||
)
|
||||
content = models.TextField(blank=True, verbose_name=_('Content'))
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self.document_page)
|
||||
objects = models.Manager()
|
||||
on_organization = OrganizationDocumentPageContentManager()
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Document page content')
|
||||
verbose_name_plural = _('Document pages contents')
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self.document_page)
|
||||
|
||||
@@ -92,7 +92,7 @@ class Parser(object):
|
||||
file_object = document_page.document_version.get_intermidiate_file()
|
||||
|
||||
try:
|
||||
document_page_content, created = DocumentPageContent.objects.get_or_create(
|
||||
document_page_content, created = DocumentPageContent.on_organization.get_or_create(
|
||||
document_page=document_page
|
||||
)
|
||||
document_page_content.content = self.execute(
|
||||
|
||||
@@ -35,7 +35,9 @@ def task_do_ocr(self, document_version_pk):
|
||||
logger.debug('acquired lock: %s', lock_id)
|
||||
document_version = None
|
||||
try:
|
||||
document_version = DocumentVersion.objects.get(pk=document_version_pk)
|
||||
document_version = DocumentVersion.on_organization.get(
|
||||
pk=document_version_pk
|
||||
)
|
||||
logger.info(
|
||||
'Starting document OCR for document version: %s',
|
||||
document_version
|
||||
@@ -53,7 +55,7 @@ def task_do_ocr(self, document_version_pk):
|
||||
exception
|
||||
)
|
||||
if document_version:
|
||||
entry, created = DocumentVersionOCRError.objects.get_or_create(
|
||||
entry, created = DocumentVersionOCRError.on_organization.get_or_create(
|
||||
document_version=document_version
|
||||
)
|
||||
|
||||
@@ -72,7 +74,7 @@ def task_do_ocr(self, document_version_pk):
|
||||
'OCR complete for document version: %s', document_version
|
||||
)
|
||||
try:
|
||||
entry = DocumentVersionOCRError.objects.get(
|
||||
entry = DocumentVersionOCRError.on_organization.get(
|
||||
document_version=document_version
|
||||
)
|
||||
except DocumentVersionOCRError.DoesNotExist:
|
||||
|
||||
@@ -1,36 +1,23 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.core.urlresolvers import reverse
|
||||
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import DocumentType
|
||||
from documents.tests import TEST_DOCUMENT_TYPE, TEST_SMALL_DOCUMENT_PATH
|
||||
from user_management.tests import (
|
||||
TEST_ADMIN_EMAIL, TEST_ADMIN_PASSWORD, TEST_ADMIN_USERNAME
|
||||
)
|
||||
from rest_api.tests import GenericAPITestCase
|
||||
|
||||
|
||||
class OCRAPITestCase(APITestCase):
|
||||
class OCRAPITestCase(GenericAPITestCase):
|
||||
"""
|
||||
Test the OCR app API endpoints
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.admin_user = get_user_model().objects.create_superuser(
|
||||
username=TEST_ADMIN_USERNAME, email=TEST_ADMIN_EMAIL,
|
||||
password=TEST_ADMIN_PASSWORD
|
||||
)
|
||||
super(OCRAPITestCase, self).setUp()
|
||||
|
||||
self.client.login(
|
||||
username=TEST_ADMIN_USERNAME, password=TEST_ADMIN_PASSWORD
|
||||
)
|
||||
|
||||
self.document_type = DocumentType.objects.create(
|
||||
self.document_type = DocumentType.on_organization.create(
|
||||
label=TEST_DOCUMENT_TYPE
|
||||
)
|
||||
|
||||
@@ -41,6 +28,7 @@ class OCRAPITestCase(APITestCase):
|
||||
|
||||
def tearDown(self):
|
||||
self.document_type.delete()
|
||||
super(OCRAPITestCase, self).tearDown()
|
||||
|
||||
def test_submit_document(self):
|
||||
response = self.client.post(
|
||||
@@ -81,5 +69,5 @@ class OCRAPITestCase(APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
self.assertTrue(
|
||||
'Mayan EDMS Documentation' in json.loads(response.content)['content']
|
||||
'Mayan EDMS Documentation' in response.data['content']
|
||||
)
|
||||
|
||||
@@ -2,30 +2,31 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.core.files.base import File
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.models import DocumentType
|
||||
from documents.settings import setting_language_choices
|
||||
from documents.tests import (
|
||||
TEST_DEU_DOCUMENT_PATH, TEST_DOCUMENT_TYPE, TEST_SMALL_DOCUMENT_PATH
|
||||
)
|
||||
from organizations.tests import OrganizationTestCase
|
||||
|
||||
|
||||
class DocumentOCRTestCase(TestCase):
|
||||
class DocumentOCRTestCase(OrganizationTestCase):
|
||||
def setUp(self):
|
||||
self.document_type = DocumentType.objects.create(
|
||||
super(DocumentOCRTestCase, self).setUp()
|
||||
|
||||
self.document_type = DocumentType.on_organization.create(
|
||||
label=TEST_DOCUMENT_TYPE
|
||||
)
|
||||
|
||||
with open(TEST_SMALL_DOCUMENT_PATH) as file_object:
|
||||
self.document = self.document_type.new_document(
|
||||
file_object=File(file_object),
|
||||
file_object=file_object,
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
self.document.delete()
|
||||
self.document_type.delete()
|
||||
super(DocumentOCRTestCase, self).tearDown()
|
||||
|
||||
def test_ocr_language_backends_end(self):
|
||||
content = self.document.pages.first().ocr_content.content
|
||||
@@ -33,9 +34,11 @@ class DocumentOCRTestCase(TestCase):
|
||||
self.assertTrue('Mayan EDMS Documentation' in content)
|
||||
|
||||
|
||||
class GermanOCRSupportTestCase(TestCase):
|
||||
class GermanOCRSupportTestCase(OrganizationTestCase):
|
||||
def setUp(self):
|
||||
self.document_type = DocumentType.objects.create(
|
||||
super(GermanOCRSupportTestCase, self).setUp()
|
||||
|
||||
self.document_type = DocumentType.on_organization.create(
|
||||
label=TEST_DOCUMENT_TYPE
|
||||
)
|
||||
|
||||
@@ -49,11 +52,12 @@ class GermanOCRSupportTestCase(TestCase):
|
||||
|
||||
with open(TEST_DEU_DOCUMENT_PATH) as file_object:
|
||||
self.document = self.document_type.new_document(
|
||||
file_object=File(file_object), language=language_code
|
||||
file_object=file_object, language=language_code
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
self.document_type.delete()
|
||||
super(GermanOCRSupportTestCase, self).tearDown()
|
||||
|
||||
def test_ocr_language_backends_end(self):
|
||||
content = self.document.pages.first().ocr_content.content
|
||||
|
||||
95
mayan/apps/ocr/tests/test_organization_views.py
Normal file
95
mayan/apps/ocr/tests/test_organization_views.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.models import DocumentType
|
||||
from documents.tests.literals import (
|
||||
TEST_DOCUMENT_TYPE, TEST_SMALL_DOCUMENT_PATH
|
||||
)
|
||||
from organizations.tests.test_organization_views import OrganizationViewTestCase
|
||||
|
||||
from ..models import DocumentPageContent
|
||||
|
||||
|
||||
@override_settings(OCR_AUTO_OCR=False)
|
||||
class OrganizationOCRViewTestCase(OrganizationViewTestCase):
|
||||
def create_document_type(self):
|
||||
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
|
||||
self.document_type = DocumentType.on_organization.create(
|
||||
label=TEST_DOCUMENT_TYPE
|
||||
)
|
||||
|
||||
def create_document(self):
|
||||
self.create_document_type()
|
||||
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
|
||||
|
||||
with open(TEST_SMALL_DOCUMENT_PATH) as file_object:
|
||||
self.document = self.document_type.new_document(
|
||||
file_object=file_object
|
||||
)
|
||||
|
||||
def test_document_content_view(self):
|
||||
self.create_document()
|
||||
self.document.submit_for_ocr()
|
||||
|
||||
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
|
||||
response = self.get(
|
||||
'ocr:document_content', args=(self.document.pk,)
|
||||
)
|
||||
self.assertContains(response, text='Mayan', status_code=200)
|
||||
|
||||
with self.settings(ORGANIZATION_ID=self.organization_b.pk):
|
||||
response = self.get(
|
||||
'ocr:document_content', args=(self.document.pk,)
|
||||
)
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
def test_document_submit_view(self):
|
||||
self.create_document()
|
||||
|
||||
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
|
||||
response = self.post(
|
||||
'ocr:document_submit', args=(self.document.pk,), follow=True
|
||||
)
|
||||
self.assertContains(response, text='uccess', status_code=200)
|
||||
|
||||
with self.settings(ORGANIZATION_ID=self.organization_b.pk):
|
||||
response = self.post(
|
||||
'ocr:document_submit', args=(self.document.pk,), follow=True
|
||||
)
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
def test_document_submit_all_view(self):
|
||||
self.create_document()
|
||||
|
||||
with self.settings(ORGANIZATION_ID=self.organization_b.pk):
|
||||
self.post('ocr:document_submit_all', follow=True)
|
||||
|
||||
with self.assertRaises(DocumentPageContent.DoesNotExist):
|
||||
# Use .objects manager to make sure we get all document pages
|
||||
# and that it indeed doesn't exists = no OCR happened.
|
||||
DocumentPageContent.objects.get(
|
||||
document_page=self.document.pages.first()
|
||||
)
|
||||
|
||||
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
|
||||
self.post('ocr:document_submit_all', follow=True)
|
||||
|
||||
self.assertIn(
|
||||
'Mayan', self.document.pages.first().ocr_content.content
|
||||
)
|
||||
|
||||
def test_document_type_ocr_settings_view(self):
|
||||
self.create_document_type()
|
||||
|
||||
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
|
||||
response = self.get(
|
||||
'ocr:document_type_ocr_settings', args=(self.document_type.pk,)
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
with self.settings(ORGANIZATION_ID=self.organization_b.pk):
|
||||
response = self.get(
|
||||
'ocr:document_type_ocr_settings', args=(self.document_type.pk,)
|
||||
)
|
||||
self.assertEqual(response.status_code, 404)
|
||||
@@ -1,32 +1,34 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.core.files.base import File
|
||||
from django.test import TestCase, override_settings
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.models import DocumentType
|
||||
from documents.tests import (
|
||||
TEST_DOCUMENT_PATH, TEST_DOCUMENT_TYPE, TEST_HYBRID_DOCUMENT_PATH
|
||||
)
|
||||
from organizations.tests import OrganizationTestCase
|
||||
|
||||
from ..classes import TextExtractor
|
||||
from ..parsers import PDFMinerParser, PopplerParser
|
||||
|
||||
|
||||
@override_settings(OCR_AUTO_OCR=False)
|
||||
class ParserTestCase(TestCase):
|
||||
class ParserTestCase(OrganizationTestCase):
|
||||
def setUp(self):
|
||||
super(ParserTestCase, self).setUp()
|
||||
|
||||
self.document_type = DocumentType.objects.create(
|
||||
self.document_type = DocumentType.on_organization.create(
|
||||
label=TEST_DOCUMENT_TYPE
|
||||
)
|
||||
|
||||
with open(TEST_DOCUMENT_PATH) as file_object:
|
||||
self.document = self.document_type.new_document(
|
||||
file_object=File(file_object)
|
||||
file_object=file_object
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
self.document_type.delete()
|
||||
super(ParserTestCase, self).tearDown()
|
||||
|
||||
def test_pdfminer_parser(self):
|
||||
parser = PDFMinerParser()
|
||||
@@ -48,19 +50,22 @@ class ParserTestCase(TestCase):
|
||||
|
||||
|
||||
@override_settings(OCR_AUTO_OCR=False)
|
||||
class TextExtractorTestCase(TestCase):
|
||||
class TextExtractorTestCase(OrganizationTestCase):
|
||||
def setUp(self):
|
||||
self.document_type = DocumentType.objects.create(
|
||||
super(TextExtractorTestCase, self).setUp()
|
||||
|
||||
self.document_type = DocumentType.on_organization.create(
|
||||
label=TEST_DOCUMENT_TYPE
|
||||
)
|
||||
|
||||
with open(TEST_HYBRID_DOCUMENT_PATH) as file_object:
|
||||
self.document = self.document_type.new_document(
|
||||
file_object=File(file_object)
|
||||
file_object=file_object
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
self.document_type.delete()
|
||||
super(TextExtractorTestCase, self).tearDown()
|
||||
|
||||
def test_text_extractor(self):
|
||||
TextExtractor.process_document_version(
|
||||
|
||||
@@ -32,7 +32,7 @@ class DocumentAllSubmitView(ConfirmView):
|
||||
|
||||
def view_action(self):
|
||||
count = 0
|
||||
for document in Document.objects.all():
|
||||
for document in Document.on_organization.all():
|
||||
document.submit_for_ocr()
|
||||
count += 1
|
||||
|
||||
@@ -49,7 +49,7 @@ class DocumentSubmitView(ConfirmView):
|
||||
}
|
||||
|
||||
def get_object(self):
|
||||
return Document.objects.get(pk=self.kwargs['pk'])
|
||||
return get_object_or_404(Document.on_organization, pk=self.kwargs['pk'])
|
||||
|
||||
def object_action(self, instance):
|
||||
try:
|
||||
@@ -77,7 +77,6 @@ class DocumentSubmitView(ConfirmView):
|
||||
|
||||
|
||||
class DocumentSubmitManyView(MultipleInstanceActionMixin, DocumentSubmitView):
|
||||
model = Document
|
||||
success_message = '%(count)d document submitted to the OCR queue.'
|
||||
success_message_plural = '%(count)d documents submitted to the OCR queue.'
|
||||
|
||||
@@ -87,6 +86,9 @@ class DocumentSubmitManyView(MultipleInstanceActionMixin, DocumentSubmitView):
|
||||
'title': _('Submit the selected documents to the OCR queue?')
|
||||
}
|
||||
|
||||
def get_queryset(self):
|
||||
return Document.on_organization.all()
|
||||
|
||||
|
||||
class DocumentTypeSubmitView(FormView):
|
||||
form_class = DocumentTypeSelectForm
|
||||
@@ -122,7 +124,7 @@ class DocumentTypeSettingsEditView(SingleObjectEditView):
|
||||
|
||||
def get_object(self, queryset=None):
|
||||
return get_object_or_404(
|
||||
DocumentType, pk=self.kwargs['pk']
|
||||
DocumentType.on_organization, pk=self.kwargs['pk']
|
||||
).ocr_settings
|
||||
|
||||
def get_extra_context(self):
|
||||
@@ -135,7 +137,6 @@ class DocumentTypeSettingsEditView(SingleObjectEditView):
|
||||
|
||||
class DocumentOCRContent(SingleObjectDetailView):
|
||||
form_class = DocumentContentForm
|
||||
model = Document
|
||||
object_permission = permission_ocr_content_view
|
||||
|
||||
def dispatch(self, request, *args, **kwargs):
|
||||
@@ -153,6 +154,9 @@ class DocumentOCRContent(SingleObjectDetailView):
|
||||
'title': _('OCR result for document: %s') % self.get_object(),
|
||||
}
|
||||
|
||||
def get_queryset(self):
|
||||
return Document.on_organization.all()
|
||||
|
||||
|
||||
class EntryListView(SingleObjectListView):
|
||||
extra_context = {
|
||||
@@ -162,4 +166,4 @@ class EntryListView(SingleObjectListView):
|
||||
view_permission = permission_ocr_document
|
||||
|
||||
def get_queryset(self):
|
||||
return DocumentVersionOCRError.objects.all()
|
||||
return DocumentVersionOCRError.on_organization.all()
|
||||
|
||||
Reference in New Issue
Block a user