Disable OCR code until fixed and updated for new converter code
This commit is contained in:
@@ -11,7 +11,7 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from common.settings import TEMPORARY_DIRECTORY
|
||||
from common.utils import fs_cleanup
|
||||
from converter.api import convert
|
||||
from converter import converter_class
|
||||
from documents.models import DocumentPage
|
||||
|
||||
from .exceptions import UnpaperError
|
||||
@@ -46,10 +46,8 @@ def do_document_ocr(document_version):
|
||||
except (ParserError, ParserUnknownFile):
|
||||
# Fall back to doing visual OCR
|
||||
|
||||
document_filepath = document_page.document.get_image_cache_name(page=document_page.page_number, version=document_page.document_version.pk)
|
||||
|
||||
logger.debug('document_filepath: %s', document_filepath)
|
||||
|
||||
# TODO: disabling for now
|
||||
"""
|
||||
unpaper_input = convert(document_filepath, file_format=UNPAPER_FILE_FORMAT)
|
||||
|
||||
logger.debug('unpaper_input: %s', unpaper_input)
|
||||
@@ -80,6 +78,7 @@ def do_document_ocr(document_version):
|
||||
fs_cleanup(unpaper_input)
|
||||
fs_cleanup(document_filepath)
|
||||
fs_cleanup(unpaper_output)
|
||||
"""
|
||||
|
||||
|
||||
def ocr_cleanup(language, text):
|
||||
|
||||
@@ -10,10 +10,9 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from common.settings import TEMPORARY_DIRECTORY
|
||||
from common.utils import copyfile
|
||||
from converter import office_converter
|
||||
from converter.exceptions import OfficeConversionError
|
||||
from converter.office_converter import (
|
||||
CONVERTER_OFFICE_FILE_MIMETYPES, OfficeConverter
|
||||
from converter.classes import (
|
||||
CONVERTER_OFFICE_FILE_MIMETYPES
|
||||
)
|
||||
|
||||
from ..settings import PDFTOTEXT_PATH
|
||||
@@ -176,4 +175,4 @@ class PopplerParser(Parser):
|
||||
|
||||
|
||||
register_parser(mimetypes=['application/pdf'], parsers=[PopplerParser, SlateParser])
|
||||
register_parser(mimetypes=office_converter.CONVERTER_OFFICE_FILE_MIMETYPES, parsers=[OfficeParser])
|
||||
# register_parser(mimetypes=office_converter.CONVERTER_OFFICE_FILE_MIMETYPES, parsers=[OfficeParser]) # TODO: FIX
|
||||
|
||||
Reference in New Issue
Block a user