Add improve office document conversion with new office converter backend

This commit is contained in:
Roberto Rosario
2012-05-29 04:23:18 -04:00
parent 4331c00b79
commit d235ca406f
3 changed files with 68 additions and 6 deletions

View File

@@ -15,6 +15,7 @@ register_settings(
{'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.python', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick, converter.backends.graphicsmagick and converter.backends.python.')},
{'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True, 'description': _(u'Path to the unoconv program.')},
{'name': u'UNOCONV_USE_PIPE', 'global_name': u'CONVERTER_UNOCONV_USE_PIPE', 'default': True, 'description': _(u'Use alternate method of connection to LibreOffice using a pipe, it is slower but less prone to segmentation faults.')},
{'name': u'LIBREOFFICE_PATH', 'global_name': u'CONVERTER_LIBREOFFICE_PATH', 'default': u'/usr/bin/libreoffice', 'exists': True, 'description': _(u'Path to the libreoffice program.')},
#{'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'},
#{'name': u'HIGH_QUALITY_OPTIONS', 'global_name': u'CONVERTER_HIGH_QUALITY_OPTIONS', 'default': u'-density 400'},

View File

@@ -8,7 +8,7 @@ from mimetype.api import get_mimetype
from common.conf.settings import TEMPORARY_DIRECTORY
from common.utils import id_generator
from .conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE
from .conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE, LIBREOFFICE_PATH
from .exceptions import (OfficeConversionError,
OfficeBackendError, UnknownFileFormat)
@@ -38,7 +38,7 @@ logger = logging.getLogger(__name__)
class OfficeConverter(object):
def __init__(self):
self.backend_class = OfficeConverterBackendUnoconv
self.backend_class = OfficeConverterBackendDirect
self.backend = self.backend_class()
self.exists = False
self.mimetype = None
@@ -86,9 +86,9 @@ class OfficeConverterBackendUnoconv(object):
raise OfficeBackendError('cannot find unoconv executable')
def convert(self, input_filepath, output_filepath):
'''
"""
Executes the program unoconv using subprocess's Popen
'''
"""
self.input_filepath = input_filepath
self.output_filepath = output_filepath
@@ -118,3 +118,53 @@ class OfficeConverterBackendUnoconv(object):
raise OfficeBackendError(msg)
except Exception, msg:
logger.error('Unhandled exception', exc_info=msg)
class OfficeConverterBackendDirect(object):
def __init__(self):
self.libreoffice_path = LIBREOFFICE_PATH if LIBREOFFICE_PATH else u'/usr/bin/libreoffice'
if not os.path.exists(self.libreoffice_path):
raise OfficeBackendError('cannot find LibreOffice executable')
logger.debug('self.libreoffice_path: %s' % self.libreoffice_path)
def convert(self, input_filepath, output_filepath):
"""
Executes libreoffice using subprocess's Popen
"""
self.input_filepath = input_filepath
self.output_filepath = output_filepath
command = []
command.append(self.libreoffice_path)
command.append(u'--headless')
command.append(u'--convert-to')
command.append(u'pdf')
command.append(self.input_filepath)
command.append(u'--outdir')
command.append(TEMPORARY_DIRECTORY)
logger.debug('command: %s' % command)
try:
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
logger.debug('return_code: %s' % return_code)
readline = proc.stderr.readline()
logger.debug('stderr: %s' % readline)
if return_code != 0:
raise OfficeBackendError(readline)
filename, extension = os.path.splitext(os.path.basename(self.input_filepath))
logger.debug('filename: %s' % filename)
logger.debug('extension: %s' % extension)
converted_output = os.path.join(TEMPORARY_DIRECTORY, os.path.extsep.join([filename, 'pdf']))
logger.debug('converted_output: %s' % converted_output)
os.rename(converted_output, self.output_filepath)
except OSError, msg:
raise OfficeBackendError(msg)
except Exception, msg:
logger.error('Unhandled exception', exc_info=msg)

View File

@@ -200,7 +200,7 @@ fine tune it's functionality as explained in the `GraphicsMagick documentation`_
Default: ``/usr/bin/unoconv``
Path to the unoconv program used to call LibreOffice for office document convertion.
Path to the unoconv program used to call LibreOffice for office document conversion.
.. setting:: CONVERTER_UNOCONV_USE_PIPE
@@ -211,8 +211,19 @@ Path to the unoconv program used to call LibreOffice for office document convert
Default: ``True``
Use alternate method of connection to LibreOffice using a pipe, it is slower but less prone to segmentation faults.
.. setting:: CONVERTER_LIBREOFFICE_PATH
**CONVERTER_LIBREOFFICE_PATH**
Default: ``/usr/bin/libreoffice``
Path to the libreoffice binary used to call LibreOffice for office document conversion.
Linking
=======