Refactor OCR app. Removes document parsing. Moves OCR processing to

model manager. Add submit and finish events.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2017-08-23 02:04:57 -04:00
parent 2052caada4
commit 317d07a355
20 changed files with 309 additions and 497 deletions

View File

@@ -9,17 +9,17 @@ from django.utils.translation import ugettext_lazy as _, ugettext
from common.widgets import TextAreaDiv
from documents.models import DocumentType
from .models import DocumentPageContent
from .models import DocumentPageOCRContent
class DocumentContentForm(forms.Form):
class DocumentOCRContentForm(forms.Form):
"""
Form that concatenates all of a document pages' text content into a
single textarea widget
"""
def __init__(self, *args, **kwargs):
self.document = kwargs.pop('instance', None)
super(DocumentContentForm, self).__init__(*args, **kwargs)
super(DocumentOCRContentForm, self).__init__(*args, **kwargs)
content = []
self.fields['contents'].initial = ''
try:
@@ -30,7 +30,7 @@ class DocumentContentForm(forms.Form):
for page in document_pages:
try:
page_content = page.ocr_content.content
except DocumentPageContent.DoesNotExist:
except DocumentPageOCRContent.DoesNotExist:
pass
else:
content.append(conditional_escape(force_text(page_content)))