From d235ca406f83464df48293ef8dca184068dbe269 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 29 May 2012 04:23:18 -0400 Subject: [PATCH] Add improve office document conversion with new office converter backend --- apps/converter/conf/settings.py | 1 + apps/converter/office_converter.py | 58 +++++++++++++++++++++++++++--- docs/topics/settings.rst | 15 ++++++-- 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/apps/converter/conf/settings.py b/apps/converter/conf/settings.py index 1dbfe4d6bc..f13cc2c76f 100644 --- a/apps/converter/conf/settings.py +++ b/apps/converter/conf/settings.py @@ -15,6 +15,7 @@ register_settings( {'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.python', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick, converter.backends.graphicsmagick and converter.backends.python.')}, {'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True, 'description': _(u'Path to the unoconv program.')}, {'name': u'UNOCONV_USE_PIPE', 'global_name': u'CONVERTER_UNOCONV_USE_PIPE', 'default': True, 'description': _(u'Use alternate method of connection to LibreOffice using a pipe, it is slower but less prone to segmentation faults.')}, + {'name': u'LIBREOFFICE_PATH', 'global_name': u'CONVERTER_LIBREOFFICE_PATH', 'default': u'/usr/bin/libreoffice', 'exists': True, 'description': _(u'Path to the libreoffice program.')}, #{'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'}, #{'name': u'HIGH_QUALITY_OPTIONS', 'global_name': u'CONVERTER_HIGH_QUALITY_OPTIONS', 'default': u'-density 400'}, diff --git a/apps/converter/office_converter.py b/apps/converter/office_converter.py index 07c7a28f62..40e8ad1dc3 100644 --- a/apps/converter/office_converter.py +++ b/apps/converter/office_converter.py @@ -8,7 +8,7 @@ from mimetype.api import get_mimetype from common.conf.settings import TEMPORARY_DIRECTORY from common.utils import id_generator -from .conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE +from .conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE, LIBREOFFICE_PATH from .exceptions import (OfficeConversionError, OfficeBackendError, UnknownFileFormat) @@ -38,7 +38,7 @@ logger = logging.getLogger(__name__) class OfficeConverter(object): def __init__(self): - self.backend_class = OfficeConverterBackendUnoconv + self.backend_class = OfficeConverterBackendDirect self.backend = self.backend_class() self.exists = False self.mimetype = None @@ -86,9 +86,9 @@ class OfficeConverterBackendUnoconv(object): raise OfficeBackendError('cannot find unoconv executable') def convert(self, input_filepath, output_filepath): - ''' + """ Executes the program unoconv using subprocess's Popen - ''' + """ self.input_filepath = input_filepath self.output_filepath = output_filepath @@ -118,3 +118,53 @@ class OfficeConverterBackendUnoconv(object): raise OfficeBackendError(msg) except Exception, msg: logger.error('Unhandled exception', exc_info=msg) + + +class OfficeConverterBackendDirect(object): + def __init__(self): + self.libreoffice_path = LIBREOFFICE_PATH if LIBREOFFICE_PATH else u'/usr/bin/libreoffice' + if not os.path.exists(self.libreoffice_path): + raise OfficeBackendError('cannot find LibreOffice executable') + logger.debug('self.libreoffice_path: %s' % self.libreoffice_path) + + def convert(self, input_filepath, output_filepath): + """ + Executes libreoffice using subprocess's Popen + """ + self.input_filepath = input_filepath + self.output_filepath = output_filepath + + command = [] + command.append(self.libreoffice_path) + + command.append(u'--headless') + command.append(u'--convert-to') + command.append(u'pdf') + command.append(self.input_filepath) + command.append(u'--outdir') + command.append(TEMPORARY_DIRECTORY) + + logger.debug('command: %s' % command) + + try: + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + logger.debug('return_code: %s' % return_code) + + readline = proc.stderr.readline() + logger.debug('stderr: %s' % readline) + if return_code != 0: + raise OfficeBackendError(readline) + filename, extension = os.path.splitext(os.path.basename(self.input_filepath)) + logger.debug('filename: %s' % filename) + logger.debug('extension: %s' % extension) + + converted_output = os.path.join(TEMPORARY_DIRECTORY, os.path.extsep.join([filename, 'pdf'])) + logger.debug('converted_output: %s' % converted_output) + + os.rename(converted_output, self.output_filepath) + except OSError, msg: + raise OfficeBackendError(msg) + except Exception, msg: + logger.error('Unhandled exception', exc_info=msg) + diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst index ae395a435e..f40ae0ee69 100644 --- a/docs/topics/settings.rst +++ b/docs/topics/settings.rst @@ -200,7 +200,7 @@ fine tune it's functionality as explained in the `GraphicsMagick documentation`_ Default: ``/usr/bin/unoconv`` -Path to the unoconv program used to call LibreOffice for office document convertion. +Path to the unoconv program used to call LibreOffice for office document conversion. .. setting:: CONVERTER_UNOCONV_USE_PIPE @@ -211,8 +211,19 @@ Path to the unoconv program used to call LibreOffice for office document convert Default: ``True`` Use alternate method of connection to LibreOffice using a pipe, it is slower but less prone to segmentation faults. + + +.. setting:: CONVERTER_LIBREOFFICE_PATH + + +**CONVERTER_LIBREOFFICE_PATH** + +Default: ``/usr/bin/libreoffice`` + +Path to the libreoffice binary used to call LibreOffice for office document conversion. - + + Linking =======