Files
mayan-edms/apps/converter/office_converter.py

121 lines
4.1 KiB
Python

from __future__ import absolute_import
import os
import subprocess
import logging
from mimetype.api import get_mimetype
from common.conf.settings import TEMPORARY_DIRECTORY
from common.utils import id_generator
from .conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE
from .exceptions import (OfficeConversionError,
OfficeBackendError, UnknownFileFormat)
CACHED_FILE_SUFFIX = u'_office_converter'
CONVERTER_OFFICE_FILE_MIMETYPES = [
u'application/msword',
u'application/mswrite',
u'application/mspowerpoint',
u'application/msexcel',
u'application/vnd.ms-excel',
u'application/vnd.ms-powerpoint',
u'application/vnd.oasis.opendocument.presentation',
u'application/vnd.oasis.opendocument.text',
u'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
u'application/vnd.oasis.opendocument.spreadsheet',
u'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
u'application/vnd.oasis.opendocument.graphics',
u'application/vnd.ms-office',
u'text/plain',
u'text/rtf',
]
logger = logging.getLogger(__name__)
class OfficeConverter(object):
def __init__(self):
self.backend_class = OfficeConverterBackendUnoconv
self.backend = self.backend_class()
self.exists = False
self.mimetype = None
self.encoding = None
def mimetypes(self):
return CONVERTER_OFFICE_FILE_MIMETYPES
def convert(self, input_filepath, mimetype=None):
self.exists = False
self.mimetype = None
self.encoding = None
self.input_filepath = input_filepath
# Make sure file is of a known office format
if mimetype:
self.mimetype = mimetype
else:
self.mimetype, self.encoding = get_mimetype(open(self.input_filepath), self.input_filepath, mimetype_only=True)
if self.mimetype in CONVERTER_OFFICE_FILE_MIMETYPES:
# Cache results of conversion
self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([self.input_filepath, CACHED_FILE_SUFFIX]))
self.exists = os.path.exists(self.output_filepath)
if not self.exists:
try:
self.backend.convert(self.input_filepath, self.output_filepath)
self.exists = True
except OfficeBackendError, msg:
# convert exception so that at least the mime type icon is displayed
raise UnknownFileFormat(msg)
def __unicode__(self):
return getattr(self, 'output_filepath', None)
def __str__(self):
return str(self.__unicode__())
class OfficeConverterBackendUnoconv(object):
def __init__(self):
self.unoconv_path = UNOCONV_PATH if UNOCONV_PATH else u'/usr/bin/unoconv'
if not os.path.exists(self.unoconv_path):
raise OfficeBackendError('cannot find unoconv executable')
def convert(self, input_filepath, output_filepath):
'''
Executes the program unoconv using subprocess's Popen
'''
self.input_filepath = input_filepath
self.output_filepath = output_filepath
command = []
command.append(self.unoconv_path)
if UNOCONV_USE_PIPE:
command.append(u'--pipe')
command.append(u'mayan-%s' % id_generator())
command.append(u'--format')
command.append(u'pdf')
command.append(u'--output')
command.append(self.output_filepath)
command.append(self.input_filepath)
try:
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
logger.debug('return_code: %s' % return_code)
readline = proc.stderr.readline()
logger.debug('stderr: %s' % readline)
if return_code != 0:
raise OfficeBackendError(readline)
except OSError, msg:
raise OfficeBackendError(msg)
except Exception, msg:
logger.error('Unhandled exception', exc_info=msg)