Disable OCR code until fixed and updated for new converter code

This commit is contained in:
Roberto Rosario
2015-06-08 22:30:34 -04:00
parent 719476a8e4
commit 0f0edddcd9
2 changed files with 7 additions and 9 deletions

View File

@@ -11,7 +11,7 @@ from django.utils.translation import ugettext_lazy as _
from common.settings import TEMPORARY_DIRECTORY
from common.utils import fs_cleanup
from converter.api import convert
from converter import converter_class
from documents.models import DocumentPage
from .exceptions import UnpaperError
@@ -46,10 +46,8 @@ def do_document_ocr(document_version):
except (ParserError, ParserUnknownFile):
# Fall back to doing visual OCR
document_filepath = document_page.document.get_image_cache_name(page=document_page.page_number, version=document_page.document_version.pk)
logger.debug('document_filepath: %s', document_filepath)
# TODO: disabling for now
"""
unpaper_input = convert(document_filepath, file_format=UNPAPER_FILE_FORMAT)
logger.debug('unpaper_input: %s', unpaper_input)
@@ -80,6 +78,7 @@ def do_document_ocr(document_version):
fs_cleanup(unpaper_input)
fs_cleanup(document_filepath)
fs_cleanup(unpaper_output)
"""
def ocr_cleanup(language, text):

View File

@@ -10,10 +10,9 @@ from django.utils.translation import ugettext_lazy as _
from common.settings import TEMPORARY_DIRECTORY
from common.utils import copyfile
from converter import office_converter
from converter.exceptions import OfficeConversionError
from converter.office_converter import (
CONVERTER_OFFICE_FILE_MIMETYPES, OfficeConverter
from converter.classes import (
CONVERTER_OFFICE_FILE_MIMETYPES
)
from ..settings import PDFTOTEXT_PATH
@@ -176,4 +175,4 @@ class PopplerParser(Parser):
register_parser(mimetypes=['application/pdf'], parsers=[PopplerParser, SlateParser])
register_parser(mimetypes=office_converter.CONVERTER_OFFICE_FILE_MIMETYPES, parsers=[OfficeParser])
# register_parser(mimetypes=office_converter.CONVERTER_OFFICE_FILE_MIMETYPES, parsers=[OfficeParser]) # TODO: FIX