From e590cb041cba77b2b0229b22f1d79900ecaf7bd2 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 21 Nov 2011 02:47:52 -0400 Subject: [PATCH] Finished office converter using MIME type detection --- apps/converter/api.py | 5 --- apps/converter/backends/python/base.py | 2 +- apps/converter/office_converter.py | 51 ++++++++++---------------- 3 files changed, 21 insertions(+), 37 deletions(-) diff --git a/apps/converter/api.py b/apps/converter/api.py index 82aefe63bb..e118cce3bb 100644 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -47,20 +47,15 @@ def convert(input_filepath, output_filepath=None, cleanup_files=False, *args, ** if output_filepath is None: output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs) - print 'cache image', output_filepath if os.path.exists(output_filepath): return output_filepath - print 'cleanup_files', cleanup_files - office_converter = OfficeConverter(input_filepath) if office_converter: try: - #cleanup_files =False. input_filepath = office_converter.output_filepath except OfficeConverter: - print 'office converter exception' raise UnknownFileFormat('office converter exception') if size: diff --git a/apps/converter/backends/python/base.py b/apps/converter/backends/python/base.py index a57e1f0d2a..a7e404bd46 100644 --- a/apps/converter/backends/python/base.py +++ b/apps/converter/backends/python/base.py @@ -64,7 +64,7 @@ class ConverterClass(ConverterBase): 'gs', '-q', '-dQUIET', '-dSAFER', '-dBATCH', '-dNOPAUSE', '-dNOPROMPT', first_page_tmpl, last_page_tmpl, - '-sDEVICE=jpeg', '-dJPEGQ=75', + '-sDEVICE=jpeg', '-dJPEGQ=95', '-r150', output_file_tmpl, input_file_tmpl, '-c "60000000 setvmthreshold"', # use 30MB diff --git a/apps/converter/office_converter.py b/apps/converter/office_converter.py index c592d4b765..093a234fd0 100644 --- a/apps/converter/office_converter.py +++ b/apps/converter/office_converter.py @@ -1,16 +1,13 @@ import os import subprocess -import hashlib from mimetype.api import get_mimetype from common.conf.settings import TEMPORARY_DIRECTORY -from converter.conf.settings import UNOCONV_PATH +from converter.conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE from converter.exceptions import (OfficeConversionError, OfficeBackendError, UnknownFileFormat) -HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() - CACHED_FILE_SUFFIX = u'_office_converter' CONVERTER_OFFICE_FILE_MIMETYPES = [ @@ -22,18 +19,17 @@ CONVERTER_OFFICE_FILE_MIMETYPES = [ 'application/vnd.ms-powerpoint', 'text/plain', 'application/vnd.oasis.opendocument.presentation', + 'application/vnd.oasis.opendocument.text', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.oasis.opendocument.spreadsheet', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/vnd.oasis.opendocument.graphics', ] -# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png', -# 'application/vnd.oasis.opendocument.spreadsheet': 'ODF_spreadsheet_32x32.png', -# 'application/vnd.oasis.opendocument.presentation': 'ODF_presentation_32x32.png', -# 'application/vnd.oasis.opendocument.graphics': 'ODF_drawing_32x32.png', -# 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'file_extension_xls.png', -# 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'file_extension_doc.png', -# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png', + class OfficeConverter(object): def __init__(self, input_filepath): - self.backend = OfficeConverterBackendUnoconv(unoconv_path=UNOCONV_PATH) + self.backend_class = OfficeConverterBackendUnoconv self.input_filepath = input_filepath self.exists = False @@ -42,27 +38,17 @@ class OfficeConverter(object): mimetype, encoding = get_mimetype(descriptor, self.input_filepath) if mimetype in CONVERTER_OFFICE_FILE_MIMETYPES: - # Hash file to cache results of conversion - #descriptor = open(self.input_filepath) - #file_hash = HASH_FUNCTION(descriptor.read()) - #descriptor.close() - - #self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([file_hash, CACHED_FILE_SUFFIX])) + # Cache results of conversion self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([self.input_filepath, CACHED_FILE_SUFFIX])) self.exists = os.path.exists(self.output_filepath) - print 'self.input_filepath',self.input_filepath - print 'self.output_filepath',self.output_filepath - print 'self.exists', self.exists if not self.exists: try: + self.backend = self.backend_class() self.backend.convert(self.input_filepath, self.output_filepath) except OfficeBackendError, msg: - print 'OFFICE EXCEPTION' - # convert exception so that atleas the mime type icon is displayed + # convert exception so that at least the mime type icon is displayed raise UnknownFileFormat(msg) - - def __unicode__(self): return getattr(self, 'output_filepath', None) @@ -76,8 +62,8 @@ class OfficeConverter(object): class OfficeConverterBackendUnoconv(object): - def __init__(self, unoconv_path=None): - self.unoconv_path = unoconv_path if unoconv_path else u'/usr/bin/unoconv' + def __init__(self): + self.unoconv_path = UNOCONV_PATH if UNOCONV_PATH else u'/usr/bin/unoconv' if not os.path.exists(self.unoconv_path): raise OfficeBackendError('cannot find unoconv executable') @@ -90,12 +76,15 @@ class OfficeConverterBackendUnoconv(object): command = [] command.append(self.unoconv_path) - #command.append(u'-v') - command.append(u'--pipe') - command.append(u'--format="pdf"') + + if UNOCONV_USE_PIPE: + command.append(u'--pipe') + command.append(u'mayan') + + command.append(u'--format=pdf') command.append(u'--output=%s' % self.output_filepath) command.append(self.input_filepath) - print 'convert' + try: proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return_code = proc.wait()