Initial commit of the document parsing app.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2017-08-23 02:23:14 -04:00
parent 317d07a355
commit e9591c92f9
25 changed files with 1350 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
from __future__ import unicode_literals
from django.utils.encoding import force_text
from django.utils.html import conditional_escape
from .models import DocumentPageContent
def get_document_ocr_content(document):
for page in document.pages.all():
try:
page_content = page.ocr_content.content
except DocumentPageContent.DoesNotExist:
pass
else:
yield conditional_escape(force_text(page_content))