Files
mayan-edms/mayan/apps/ocr/api.py
2015-07-22 18:21:37 -04:00

57 lines
1.4 KiB
Python

from __future__ import unicode_literals
import logging
import os
import tempfile
import sh
from common.settings import setting_temporary_directory
from .exceptions import UnpaperError
from .parsers import parse_document_page
from .parsers.exceptions import ParserError, ParserUnknownFile
from .settings import UNPAPER_PATH
logger = logging.getLogger(__name__)
try:
UNPAPER = sh.Command(UNPAPER_PATH).bake(
overwrite=True, no_multi_pages=True
)
except sh.CommandNotFound:
logger.debug('unpaper not found')
UNPAPER = None
"""
for document_page in document_version.pages.all():
try:
# Try to extract text by means of a parser
parse_document_page(document_page)
except (ParserError, ParserUnknownFile):
# Fall back to doing visual OCR
"""
def execute_unpaper(input_filepath, output_filepath=None):
"""
Executes the program unpaper using subprocess's Popen
"""
if UNPAPER:
if not output_filepath:
fd, output_filepath = tempfile.mkstemp(
dir=setting_temporary_directory.value
)
try:
UNPAPER(input_filepath, output_filepath)
except sh.ErrorReturnCode as exception:
logger.error(exception)
raise UnpaperError(exception.stderr)
else:
return output_filepath
finally:
os.close(fd)
else:
return input_filepath