diff --git a/apps/converter/api.py b/apps/converter/api.py index b60640f973..de7e4bb38d 100755 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -2,11 +2,13 @@ import os import shlex import subprocess import tempfile +import shutil from documents.utils import from_descriptor_to_tempfile from converter.conf.settings import CONVERT_PATH from converter.conf.settings import OCR_OPTIONS +from converter.conf.settings import UNOCONV_PATH from converter import TEMPORARY_DIRECTORY @@ -16,6 +18,12 @@ class ConvertError(Exception): self.status = status self.message = message +def cleanup(filename): + ''' tries to remove the given filename. Ignores non-existent files ''' + try: + os.remove(filename) + except OSError: + pass def get_errors(error_string): ''' @@ -28,8 +36,8 @@ def get_errors(error_string): #return '\n'.join(error_lines) +#TODO: Timeout & kill child def execute_convert(input_filepath, arguments, output_filepath): - #TODO: Timeout & kill child command = [CONVERT_PATH, input_filepath] command.extend(shlex.split(str(arguments))) command.append(output_filepath) @@ -37,9 +45,19 @@ def execute_convert(input_filepath, arguments, output_filepath): proc = subprocess.Popen(command, stderr=subprocess.PIPE) return (proc.wait(), proc.stderr.read()) +def execute_unoconv(input_filepath, output_filepath, arguments=''): + command = [UNOCONV_PATH] + command.extend(['--stdout']) + command.extend(shlex.split(str(arguments))) + command.append(input_filepath) + proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + with open(output_filepath, 'w') as output: + shutil.copyfileobj(proc.stdout, output) + return (proc.wait(), proc.stderr.read()) + def cache_cleanup(input_filepath, size, page=0, format='jpg'): - filepath = create_image_cache_filename(input_filepath, size, page=0, format='jpg') + filepath = create_image_cache_filename(input_filepath, size, page, format) try: os.remove(filepath) except OSError: @@ -50,7 +68,7 @@ def create_image_cache_filename(input_filepath, size, page=0, format='jpg'): if input_filepath: temp_filename, separator = os.path.splitext(os.path.basename(input_filepath)) temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename) - return '%s_%s%s%s' % (temp_path, size, os.extsep, format) + return '%s_%s_%s%s%s' % (temp_path, page, size, os.extsep, format) else: return None @@ -63,11 +81,20 @@ def in_image_cache(input_filepath, size, page=0, format='jpg'): return None -def convert(input_filepath, size, cache=True, page=0, format='jpg'): +def convert(input_filepath, size, cache=True, page=0, format='jpg', mimetype=None, extension=None): + unoconv_output = None output_filepath = create_image_cache_filename(input_filepath, size, page, format) if os.path.exists(output_filepath): return output_filepath - + if extension: + if extension.lower() == 'ods': + unoconv_output = '%s_pdf' % output_filepath + status, error_string = execute_unoconv(input_filepath, unoconv_output, arguments='-f pdf') + if status: + errors = get_errors(error_string) + raise ConvertError(status, errors) + cleanup(input_filepath) + input_filepath = unoconv_output #TODO: Check mimetype and use corresponding utility try: input_arg = '%s[%s]' % (input_filepath, page) @@ -76,6 +103,9 @@ def convert(input_filepath, size, cache=True, page=0, format='jpg'): errors = get_errors(error_string) raise ConvertError(status, errors) finally: + cleanup(input_filepath) + if unoconv_output: + cleanup(unoconv_output) return output_filepath diff --git a/docs/TODO b/docs/TODO index 6f78fd2659..ba91246acd 100755 --- a/docs/TODO +++ b/docs/TODO @@ -58,3 +58,4 @@ * Group documents by metadata * Support spreadsheets, wordprocessing docs using openoffice in server mode * WebDAV support +* Handle ziped or rar archives