Files
mayan-edms/mayan/apps/ocr/api.py
Roberto Rosario 48df3dcafa PEP8 cleanups
2015-06-24 17:11:24 -04:00

53 lines
1.4 KiB
Python

from __future__ import unicode_literals
import logging
import os
import tempfile
import sh
from common.settings import setting_temporary_directory
from .exceptions import UnpaperError
from .parsers import parse_document_page
from .parsers.exceptions import ParserError, ParserUnknownFile
from .settings import UNPAPER_PATH
logger = logging.getLogger(__name__)
try:
UNPAPER = sh.Command(UNPAPER_PATH).bake(overwrite=True, no_multi_pages=True)
except sh.CommandNotFound:
logger.debug('unpaper not found')
UNPAPER = None
"""
for document_page in document_version.pages.all():
try:
# Try to extract text by means of a parser
parse_document_page(document_page)
except (ParserError, ParserUnknownFile):
# Fall back to doing visual OCR
"""
def execute_unpaper(input_filepath, output_filepath=None):
"""
Executes the program unpaper using subprocess's Popen
"""
if UNPAPER:
if not output_filepath:
fd, output_filepath = tempfile.mkstemp(dir=setting_temporary_directory.value)
try:
UNPAPER(input_filepath, output_filepath)
except sh.ErrorReturnCode as exception:
logger.error(exception)
raise UnpaperError(exception.stderr)
else:
return output_filepath
finally:
os.close(fd)
else:
return input_filepath