Add improve office document conversion with new office converter backend
This commit is contained in:
@@ -15,6 +15,7 @@ register_settings(
|
||||
{'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.python', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick, converter.backends.graphicsmagick and converter.backends.python.')},
|
||||
{'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True, 'description': _(u'Path to the unoconv program.')},
|
||||
{'name': u'UNOCONV_USE_PIPE', 'global_name': u'CONVERTER_UNOCONV_USE_PIPE', 'default': True, 'description': _(u'Use alternate method of connection to LibreOffice using a pipe, it is slower but less prone to segmentation faults.')},
|
||||
{'name': u'LIBREOFFICE_PATH', 'global_name': u'CONVERTER_LIBREOFFICE_PATH', 'default': u'/usr/bin/libreoffice', 'exists': True, 'description': _(u'Path to the libreoffice program.')},
|
||||
|
||||
#{'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'},
|
||||
#{'name': u'HIGH_QUALITY_OPTIONS', 'global_name': u'CONVERTER_HIGH_QUALITY_OPTIONS', 'default': u'-density 400'},
|
||||
|
||||
@@ -8,7 +8,7 @@ from mimetype.api import get_mimetype
|
||||
from common.conf.settings import TEMPORARY_DIRECTORY
|
||||
from common.utils import id_generator
|
||||
|
||||
from .conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE
|
||||
from .conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE, LIBREOFFICE_PATH
|
||||
from .exceptions import (OfficeConversionError,
|
||||
OfficeBackendError, UnknownFileFormat)
|
||||
|
||||
@@ -38,7 +38,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class OfficeConverter(object):
|
||||
def __init__(self):
|
||||
self.backend_class = OfficeConverterBackendUnoconv
|
||||
self.backend_class = OfficeConverterBackendDirect
|
||||
self.backend = self.backend_class()
|
||||
self.exists = False
|
||||
self.mimetype = None
|
||||
@@ -86,9 +86,9 @@ class OfficeConverterBackendUnoconv(object):
|
||||
raise OfficeBackendError('cannot find unoconv executable')
|
||||
|
||||
def convert(self, input_filepath, output_filepath):
|
||||
'''
|
||||
"""
|
||||
Executes the program unoconv using subprocess's Popen
|
||||
'''
|
||||
"""
|
||||
self.input_filepath = input_filepath
|
||||
self.output_filepath = output_filepath
|
||||
|
||||
@@ -118,3 +118,53 @@ class OfficeConverterBackendUnoconv(object):
|
||||
raise OfficeBackendError(msg)
|
||||
except Exception, msg:
|
||||
logger.error('Unhandled exception', exc_info=msg)
|
||||
|
||||
|
||||
class OfficeConverterBackendDirect(object):
|
||||
def __init__(self):
|
||||
self.libreoffice_path = LIBREOFFICE_PATH if LIBREOFFICE_PATH else u'/usr/bin/libreoffice'
|
||||
if not os.path.exists(self.libreoffice_path):
|
||||
raise OfficeBackendError('cannot find LibreOffice executable')
|
||||
logger.debug('self.libreoffice_path: %s' % self.libreoffice_path)
|
||||
|
||||
def convert(self, input_filepath, output_filepath):
|
||||
"""
|
||||
Executes libreoffice using subprocess's Popen
|
||||
"""
|
||||
self.input_filepath = input_filepath
|
||||
self.output_filepath = output_filepath
|
||||
|
||||
command = []
|
||||
command.append(self.libreoffice_path)
|
||||
|
||||
command.append(u'--headless')
|
||||
command.append(u'--convert-to')
|
||||
command.append(u'pdf')
|
||||
command.append(self.input_filepath)
|
||||
command.append(u'--outdir')
|
||||
command.append(TEMPORARY_DIRECTORY)
|
||||
|
||||
logger.debug('command: %s' % command)
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
logger.debug('return_code: %s' % return_code)
|
||||
|
||||
readline = proc.stderr.readline()
|
||||
logger.debug('stderr: %s' % readline)
|
||||
if return_code != 0:
|
||||
raise OfficeBackendError(readline)
|
||||
filename, extension = os.path.splitext(os.path.basename(self.input_filepath))
|
||||
logger.debug('filename: %s' % filename)
|
||||
logger.debug('extension: %s' % extension)
|
||||
|
||||
converted_output = os.path.join(TEMPORARY_DIRECTORY, os.path.extsep.join([filename, 'pdf']))
|
||||
logger.debug('converted_output: %s' % converted_output)
|
||||
|
||||
os.rename(converted_output, self.output_filepath)
|
||||
except OSError, msg:
|
||||
raise OfficeBackendError(msg)
|
||||
except Exception, msg:
|
||||
logger.error('Unhandled exception', exc_info=msg)
|
||||
|
||||
|
||||
@@ -200,7 +200,7 @@ fine tune it's functionality as explained in the `GraphicsMagick documentation`_
|
||||
|
||||
Default: ``/usr/bin/unoconv``
|
||||
|
||||
Path to the unoconv program used to call LibreOffice for office document convertion.
|
||||
Path to the unoconv program used to call LibreOffice for office document conversion.
|
||||
|
||||
|
||||
.. setting:: CONVERTER_UNOCONV_USE_PIPE
|
||||
@@ -211,8 +211,19 @@ Path to the unoconv program used to call LibreOffice for office document convert
|
||||
Default: ``True``
|
||||
|
||||
Use alternate method of connection to LibreOffice using a pipe, it is slower but less prone to segmentation faults.
|
||||
|
||||
|
||||
.. setting:: CONVERTER_LIBREOFFICE_PATH
|
||||
|
||||
|
||||
**CONVERTER_LIBREOFFICE_PATH**
|
||||
|
||||
Default: ``/usr/bin/libreoffice``
|
||||
|
||||
Path to the libreoffice binary used to call LibreOffice for office document conversion.
|
||||
|
||||
|
||||
|
||||
|
||||
Linking
|
||||
=======
|
||||
|
||||
|
||||
Reference in New Issue
Block a user