Update OCR app to use organizations.

This commit is contained in:
Roberto Rosario
2016-06-08 19:29:20 -04:00
parent a2f8e8b8d8
commit aa0f48b1a0
13 changed files with 205 additions and 55 deletions

View File

@@ -16,7 +16,7 @@ class APIDocumentOCRView(generics.GenericAPIView):
'POST': (permission_ocr_document,)
}
permission_classes = (MayanPermission,)
queryset = Document.objects.all()
queryset = Document.on_organization.all()
def get_serializer_class(self):
return None
@@ -44,7 +44,7 @@ class APIDocumentVersionOCRView(generics.GenericAPIView):
'POST': (permission_ocr_document,)
}
permission_classes = (MayanPermission,)
queryset = DocumentVersion.objects.all()
queryset = DocumentVersion.on_organization.all()
def get_serializer_class(self):
return None
@@ -83,7 +83,7 @@ class APIDocumentPageContentView(generics.RetrieveAPIView):
}
permission_classes = (MayanPermission,)
serializer_class = DocumentPageContentSerializer
queryset = DocumentPage.objects.all()
queryset = DocumentPage.on_organization.all()
def retrieve(self, request, *args, **kwargs):
instance = self.get_object()

View File

@@ -60,7 +60,7 @@ class OCRBackendBase(object):
image = document_page.get_image()
try:
document_page_content, created = DocumentPageContent.objects.get_or_create(
document_page_content, created = DocumentPageContent.on_organization.get_or_create(
document_page=document_page
)
document_page_content.content = self.execute(

View File

@@ -57,5 +57,5 @@ class DocumentContentForm(forms.Form):
class DocumentTypeSelectForm(forms.Form):
document_type = forms.ModelChoiceField(
queryset=DocumentType.objects.all(), label=('Document type')
queryset=DocumentType.on_organization.all(), label=('Document type')
)

View File

@@ -20,6 +20,6 @@ def initialize_new_ocr_settings(sender, instance, **kwargs):
DocumentTypeSettings = get_model('ocr', 'DocumentTypeSettings')
if kwargs['created']:
DocumentTypeSettings.objects.create(
DocumentTypeSettings.on_organization.create(
document_type=instance, auto_ocr=setting_auto_ocr.value
)

View File

@@ -0,0 +1,37 @@
from __future__ import unicode_literals
from django.apps import apps
from django.db import models
class OrganizationDocumentTypeSettingsManager(models.Manager):
def get_queryset(self):
DocumentType = apps.get_model('documents', 'DocumentType')
return super(
OrganizationDocumentTypeSettingsManager, self
).get_queryset().filter(
document_type__in=DocumentType.on_organization.all(),
)
class OrganizationDocumentVersionOCRErrorManager(models.Manager):
def get_queryset(self):
DocumentVersion = apps.get_model('documents', 'DocumentVersion')
return super(
OrganizationDocumentVersionOCRErrorManager, self
).get_queryset().filter(
document_version__in=DocumentVersion.on_organization.all(),
)
class OrganizationDocumentPageContentManager(models.Manager):
def get_queryset(self):
DocumentPage = apps.get_model('documents', 'DocumentPage')
return super(
OrganizationDocumentPageContentManager, self
).get_queryset().filter(
document_page__in=DocumentPage.on_organization.all(),
)

View File

@@ -6,6 +6,12 @@ from django.utils.translation import ugettext_lazy as _
from documents.models import DocumentPage, DocumentType, DocumentVersion
from .managers import (
OrganizationDocumentTypeSettingsManager,
OrganizationDocumentVersionOCRErrorManager,
OrganizationDocumentPageContentManager
)
class DocumentTypeSettings(models.Model):
"""
@@ -20,6 +26,9 @@ class DocumentTypeSettings(models.Model):
verbose_name=_('Automatically queue newly created documents for OCR.')
)
objects = models.Manager()
on_organization = OrganizationDocumentTypeSettingsManager()
class Meta:
verbose_name = _('Document type settings')
verbose_name_plural = _('Document types settings')
@@ -35,14 +44,17 @@ class DocumentVersionOCRError(models.Model):
)
result = models.TextField(blank=True, null=True, verbose_name=_('Result'))
def __str__(self):
return unicode(self.document_version)
objects = models.Manager()
on_organization = OrganizationDocumentVersionOCRErrorManager()
class Meta:
ordering = ('datetime_submitted',)
verbose_name = _('Document Version OCR Error')
verbose_name_plural = _('Document Version OCR Errors')
def __str__(self):
return unicode(self.document_version)
@python_2_unicode_compatible
class DocumentPageContent(models.Model):
@@ -55,9 +67,12 @@ class DocumentPageContent(models.Model):
)
content = models.TextField(blank=True, verbose_name=_('Content'))
def __str__(self):
return unicode(self.document_page)
objects = models.Manager()
on_organization = OrganizationDocumentPageContentManager()
class Meta:
verbose_name = _('Document page content')
verbose_name_plural = _('Document pages contents')
def __str__(self):
return unicode(self.document_page)

View File

@@ -92,7 +92,7 @@ class Parser(object):
file_object = document_page.document_version.get_intermidiate_file()
try:
document_page_content, created = DocumentPageContent.objects.get_or_create(
document_page_content, created = DocumentPageContent.on_organization.get_or_create(
document_page=document_page
)
document_page_content.content = self.execute(

View File

@@ -35,7 +35,9 @@ def task_do_ocr(self, document_version_pk):
logger.debug('acquired lock: %s', lock_id)
document_version = None
try:
document_version = DocumentVersion.objects.get(pk=document_version_pk)
document_version = DocumentVersion.on_organization.get(
pk=document_version_pk
)
logger.info(
'Starting document OCR for document version: %s',
document_version
@@ -53,7 +55,7 @@ def task_do_ocr(self, document_version_pk):
exception
)
if document_version:
entry, created = DocumentVersionOCRError.objects.get_or_create(
entry, created = DocumentVersionOCRError.on_organization.get_or_create(
document_version=document_version
)
@@ -72,7 +74,7 @@ def task_do_ocr(self, document_version_pk):
'OCR complete for document version: %s', document_version
)
try:
entry = DocumentVersionOCRError.objects.get(
entry = DocumentVersionOCRError.on_organization.get(
document_version=document_version
)
except DocumentVersionOCRError.DoesNotExist:

View File

@@ -1,36 +1,23 @@
from __future__ import unicode_literals
import json
from django.contrib.auth import get_user_model
from django.core.urlresolvers import reverse
from rest_framework import status
from rest_framework.test import APITestCase
from documents.models import DocumentType
from documents.tests import TEST_DOCUMENT_TYPE, TEST_SMALL_DOCUMENT_PATH
from user_management.tests import (
TEST_ADMIN_EMAIL, TEST_ADMIN_PASSWORD, TEST_ADMIN_USERNAME
)
from rest_api.tests import GenericAPITestCase
class OCRAPITestCase(APITestCase):
class OCRAPITestCase(GenericAPITestCase):
"""
Test the OCR app API endpoints
"""
def setUp(self):
self.admin_user = get_user_model().objects.create_superuser(
username=TEST_ADMIN_USERNAME, email=TEST_ADMIN_EMAIL,
password=TEST_ADMIN_PASSWORD
)
super(OCRAPITestCase, self).setUp()
self.client.login(
username=TEST_ADMIN_USERNAME, password=TEST_ADMIN_PASSWORD
)
self.document_type = DocumentType.objects.create(
self.document_type = DocumentType.on_organization.create(
label=TEST_DOCUMENT_TYPE
)
@@ -41,6 +28,7 @@ class OCRAPITestCase(APITestCase):
def tearDown(self):
self.document_type.delete()
super(OCRAPITestCase, self).tearDown()
def test_submit_document(self):
response = self.client.post(
@@ -81,5 +69,5 @@ class OCRAPITestCase(APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertTrue(
'Mayan EDMS Documentation' in json.loads(response.content)['content']
'Mayan EDMS Documentation' in response.data['content']
)

View File

@@ -2,30 +2,31 @@
from __future__ import unicode_literals
from django.core.files.base import File
from django.test import TestCase
from documents.models import DocumentType
from documents.settings import setting_language_choices
from documents.tests import (
TEST_DEU_DOCUMENT_PATH, TEST_DOCUMENT_TYPE, TEST_SMALL_DOCUMENT_PATH
)
from organizations.tests import OrganizationTestCase
class DocumentOCRTestCase(TestCase):
class DocumentOCRTestCase(OrganizationTestCase):
def setUp(self):
self.document_type = DocumentType.objects.create(
super(DocumentOCRTestCase, self).setUp()
self.document_type = DocumentType.on_organization.create(
label=TEST_DOCUMENT_TYPE
)
with open(TEST_SMALL_DOCUMENT_PATH) as file_object:
self.document = self.document_type.new_document(
file_object=File(file_object),
file_object=file_object,
)
def tearDown(self):
self.document.delete()
self.document_type.delete()
super(DocumentOCRTestCase, self).tearDown()
def test_ocr_language_backends_end(self):
content = self.document.pages.first().ocr_content.content
@@ -33,9 +34,11 @@ class DocumentOCRTestCase(TestCase):
self.assertTrue('Mayan EDMS Documentation' in content)
class GermanOCRSupportTestCase(TestCase):
class GermanOCRSupportTestCase(OrganizationTestCase):
def setUp(self):
self.document_type = DocumentType.objects.create(
super(GermanOCRSupportTestCase, self).setUp()
self.document_type = DocumentType.on_organization.create(
label=TEST_DOCUMENT_TYPE
)
@@ -49,11 +52,12 @@ class GermanOCRSupportTestCase(TestCase):
with open(TEST_DEU_DOCUMENT_PATH) as file_object:
self.document = self.document_type.new_document(
file_object=File(file_object), language=language_code
file_object=file_object, language=language_code
)
def tearDown(self):
self.document_type.delete()
super(GermanOCRSupportTestCase, self).tearDown()
def test_ocr_language_backends_end(self):
content = self.document.pages.first().ocr_content.content

View File

@@ -0,0 +1,95 @@
from __future__ import unicode_literals
from django.test import override_settings
from documents.models import DocumentType
from documents.tests.literals import (
TEST_DOCUMENT_TYPE, TEST_SMALL_DOCUMENT_PATH
)
from organizations.tests.test_organization_views import OrganizationViewTestCase
from ..models import DocumentPageContent
@override_settings(OCR_AUTO_OCR=False)
class OrganizationOCRViewTestCase(OrganizationViewTestCase):
def create_document_type(self):
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
self.document_type = DocumentType.on_organization.create(
label=TEST_DOCUMENT_TYPE
)
def create_document(self):
self.create_document_type()
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
with open(TEST_SMALL_DOCUMENT_PATH) as file_object:
self.document = self.document_type.new_document(
file_object=file_object
)
def test_document_content_view(self):
self.create_document()
self.document.submit_for_ocr()
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
response = self.get(
'ocr:document_content', args=(self.document.pk,)
)
self.assertContains(response, text='Mayan', status_code=200)
with self.settings(ORGANIZATION_ID=self.organization_b.pk):
response = self.get(
'ocr:document_content', args=(self.document.pk,)
)
self.assertEqual(response.status_code, 404)
def test_document_submit_view(self):
self.create_document()
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
response = self.post(
'ocr:document_submit', args=(self.document.pk,), follow=True
)
self.assertContains(response, text='uccess', status_code=200)
with self.settings(ORGANIZATION_ID=self.organization_b.pk):
response = self.post(
'ocr:document_submit', args=(self.document.pk,), follow=True
)
self.assertEqual(response.status_code, 404)
def test_document_submit_all_view(self):
self.create_document()
with self.settings(ORGANIZATION_ID=self.organization_b.pk):
self.post('ocr:document_submit_all', follow=True)
with self.assertRaises(DocumentPageContent.DoesNotExist):
# Use .objects manager to make sure we get all document pages
# and that it indeed doesn't exists = no OCR happened.
DocumentPageContent.objects.get(
document_page=self.document.pages.first()
)
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
self.post('ocr:document_submit_all', follow=True)
self.assertIn(
'Mayan', self.document.pages.first().ocr_content.content
)
def test_document_type_ocr_settings_view(self):
self.create_document_type()
with self.settings(ORGANIZATION_ID=self.organization_a.pk):
response = self.get(
'ocr:document_type_ocr_settings', args=(self.document_type.pk,)
)
self.assertEqual(response.status_code, 200)
with self.settings(ORGANIZATION_ID=self.organization_b.pk):
response = self.get(
'ocr:document_type_ocr_settings', args=(self.document_type.pk,)
)
self.assertEqual(response.status_code, 404)

View File

@@ -1,32 +1,34 @@
from __future__ import unicode_literals
from django.core.files.base import File
from django.test import TestCase, override_settings
from django.test import override_settings
from documents.models import DocumentType
from documents.tests import (
TEST_DOCUMENT_PATH, TEST_DOCUMENT_TYPE, TEST_HYBRID_DOCUMENT_PATH
)
from organizations.tests import OrganizationTestCase
from ..classes import TextExtractor
from ..parsers import PDFMinerParser, PopplerParser
@override_settings(OCR_AUTO_OCR=False)
class ParserTestCase(TestCase):
class ParserTestCase(OrganizationTestCase):
def setUp(self):
super(ParserTestCase, self).setUp()
self.document_type = DocumentType.objects.create(
self.document_type = DocumentType.on_organization.create(
label=TEST_DOCUMENT_TYPE
)
with open(TEST_DOCUMENT_PATH) as file_object:
self.document = self.document_type.new_document(
file_object=File(file_object)
file_object=file_object
)
def tearDown(self):
self.document_type.delete()
super(ParserTestCase, self).tearDown()
def test_pdfminer_parser(self):
parser = PDFMinerParser()
@@ -48,19 +50,22 @@ class ParserTestCase(TestCase):
@override_settings(OCR_AUTO_OCR=False)
class TextExtractorTestCase(TestCase):
class TextExtractorTestCase(OrganizationTestCase):
def setUp(self):
self.document_type = DocumentType.objects.create(
super(TextExtractorTestCase, self).setUp()
self.document_type = DocumentType.on_organization.create(
label=TEST_DOCUMENT_TYPE
)
with open(TEST_HYBRID_DOCUMENT_PATH) as file_object:
self.document = self.document_type.new_document(
file_object=File(file_object)
file_object=file_object
)
def tearDown(self):
self.document_type.delete()
super(TextExtractorTestCase, self).tearDown()
def test_text_extractor(self):
TextExtractor.process_document_version(

View File

@@ -32,7 +32,7 @@ class DocumentAllSubmitView(ConfirmView):
def view_action(self):
count = 0
for document in Document.objects.all():
for document in Document.on_organization.all():
document.submit_for_ocr()
count += 1
@@ -49,7 +49,7 @@ class DocumentSubmitView(ConfirmView):
}
def get_object(self):
return Document.objects.get(pk=self.kwargs['pk'])
return get_object_or_404(Document.on_organization, pk=self.kwargs['pk'])
def object_action(self, instance):
try:
@@ -77,7 +77,6 @@ class DocumentSubmitView(ConfirmView):
class DocumentSubmitManyView(MultipleInstanceActionMixin, DocumentSubmitView):
model = Document
success_message = '%(count)d document submitted to the OCR queue.'
success_message_plural = '%(count)d documents submitted to the OCR queue.'
@@ -87,6 +86,9 @@ class DocumentSubmitManyView(MultipleInstanceActionMixin, DocumentSubmitView):
'title': _('Submit the selected documents to the OCR queue?')
}
def get_queryset(self):
return Document.on_organization.all()
class DocumentTypeSubmitView(FormView):
form_class = DocumentTypeSelectForm
@@ -122,7 +124,7 @@ class DocumentTypeSettingsEditView(SingleObjectEditView):
def get_object(self, queryset=None):
return get_object_or_404(
DocumentType, pk=self.kwargs['pk']
DocumentType.on_organization, pk=self.kwargs['pk']
).ocr_settings
def get_extra_context(self):
@@ -135,7 +137,6 @@ class DocumentTypeSettingsEditView(SingleObjectEditView):
class DocumentOCRContent(SingleObjectDetailView):
form_class = DocumentContentForm
model = Document
object_permission = permission_ocr_content_view
def dispatch(self, request, *args, **kwargs):
@@ -153,6 +154,9 @@ class DocumentOCRContent(SingleObjectDetailView):
'title': _('OCR result for document: %s') % self.get_object(),
}
def get_queryset(self):
return Document.on_organization.all()
class EntryListView(SingleObjectListView):
extra_context = {
@@ -162,4 +166,4 @@ class EntryListView(SingleObjectListView):
view_permission = permission_ocr_document
def get_queryset(self):
return DocumentVersionOCRError.objects.all()
return DocumentVersionOCRError.on_organization.all()