Files
mayan-edms/mayan/apps/ocr/test_parsers.py
2015-08-07 03:45:46 -04:00

49 lines
1.4 KiB
Python

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.core.files.base import File
from django.test import TestCase
from documents.models import DocumentType
from documents.test_models import TEST_DOCUMENT_PATH, TEST_DOCUMENT_TYPE
from .parsers import PDFMinerParser, PopplerParser
class ParserTestCase(TestCase):
def setUp(self):
self.document_type = DocumentType.objects.create(
label=TEST_DOCUMENT_TYPE
)
ocr_settings = self.document_type.ocr_settings
ocr_settings.auto_ocr = False
ocr_settings.save()
with open(TEST_DOCUMENT_PATH) as file_object:
self.document = self.document_type.new_document(
file_object=File(file_object)
)
def tearDown(self):
self.document.delete()
self.document_type.delete()
def test_pdfminer_parser(self):
parser = PDFMinerParser()
parser.process_document_version(self.document.latest_version)
self.assertTrue(
'Mayan EDMS Documentation' in self.document.pages.first().ocr_content.content
)
def test_poppler_parser(self):
parser = PopplerParser()
parser.process_document_version(self.document.latest_version)
self.assertTrue(
'Mayan EDMS Documentation' in self.document.pages.first().ocr_content.content
)