Finished office converter using MIME type detection

This commit is contained in:
Roberto Rosario
2011-11-21 02:47:52 -04:00
parent 67b3e19031
commit e590cb041c
3 changed files with 21 additions and 37 deletions

View File

@@ -47,20 +47,15 @@ def convert(input_filepath, output_filepath=None, cleanup_files=False, *args, **
if output_filepath is None:
output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs)
print 'cache image', output_filepath
if os.path.exists(output_filepath):
return output_filepath
print 'cleanup_files', cleanup_files
office_converter = OfficeConverter(input_filepath)
if office_converter:
try:
#cleanup_files =False.
input_filepath = office_converter.output_filepath
except OfficeConverter:
print 'office converter exception'
raise UnknownFileFormat('office converter exception')
if size:

View File

@@ -64,7 +64,7 @@ class ConverterClass(ConverterBase):
'gs', '-q', '-dQUIET', '-dSAFER', '-dBATCH',
'-dNOPAUSE', '-dNOPROMPT',
first_page_tmpl, last_page_tmpl,
'-sDEVICE=jpeg', '-dJPEGQ=75',
'-sDEVICE=jpeg', '-dJPEGQ=95',
'-r150', output_file_tmpl,
input_file_tmpl,
'-c "60000000 setvmthreshold"', # use 30MB

View File

@@ -1,16 +1,13 @@
import os
import subprocess
import hashlib
from mimetype.api import get_mimetype
from common.conf.settings import TEMPORARY_DIRECTORY
from converter.conf.settings import UNOCONV_PATH
from converter.conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE
from converter.exceptions import (OfficeConversionError,
OfficeBackendError, UnknownFileFormat)
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
CACHED_FILE_SUFFIX = u'_office_converter'
CONVERTER_OFFICE_FILE_MIMETYPES = [
@@ -22,18 +19,17 @@ CONVERTER_OFFICE_FILE_MIMETYPES = [
'application/vnd.ms-powerpoint',
'text/plain',
'application/vnd.oasis.opendocument.presentation',
'application/vnd.oasis.opendocument.text',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.oasis.opendocument.spreadsheet',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.oasis.opendocument.graphics',
]
# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png',
# 'application/vnd.oasis.opendocument.spreadsheet': 'ODF_spreadsheet_32x32.png',
# 'application/vnd.oasis.opendocument.presentation': 'ODF_presentation_32x32.png',
# 'application/vnd.oasis.opendocument.graphics': 'ODF_drawing_32x32.png',
# 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'file_extension_xls.png',
# 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'file_extension_doc.png',
# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png',
class OfficeConverter(object):
def __init__(self, input_filepath):
self.backend = OfficeConverterBackendUnoconv(unoconv_path=UNOCONV_PATH)
self.backend_class = OfficeConverterBackendUnoconv
self.input_filepath = input_filepath
self.exists = False
@@ -42,27 +38,17 @@ class OfficeConverter(object):
mimetype, encoding = get_mimetype(descriptor, self.input_filepath)
if mimetype in CONVERTER_OFFICE_FILE_MIMETYPES:
# Hash file to cache results of conversion
#descriptor = open(self.input_filepath)
#file_hash = HASH_FUNCTION(descriptor.read())
#descriptor.close()
#self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([file_hash, CACHED_FILE_SUFFIX]))
# Cache results of conversion
self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([self.input_filepath, CACHED_FILE_SUFFIX]))
self.exists = os.path.exists(self.output_filepath)
print 'self.input_filepath',self.input_filepath
print 'self.output_filepath',self.output_filepath
print 'self.exists', self.exists
if not self.exists:
try:
self.backend = self.backend_class()
self.backend.convert(self.input_filepath, self.output_filepath)
except OfficeBackendError, msg:
print 'OFFICE EXCEPTION'
# convert exception so that atleas the mime type icon is displayed
# convert exception so that at least the mime type icon is displayed
raise UnknownFileFormat(msg)
def __unicode__(self):
return getattr(self, 'output_filepath', None)
@@ -76,8 +62,8 @@ class OfficeConverter(object):
class OfficeConverterBackendUnoconv(object):
def __init__(self, unoconv_path=None):
self.unoconv_path = unoconv_path if unoconv_path else u'/usr/bin/unoconv'
def __init__(self):
self.unoconv_path = UNOCONV_PATH if UNOCONV_PATH else u'/usr/bin/unoconv'
if not os.path.exists(self.unoconv_path):
raise OfficeBackendError('cannot find unoconv executable')
@@ -90,12 +76,15 @@ class OfficeConverterBackendUnoconv(object):
command = []
command.append(self.unoconv_path)
#command.append(u'-v')
command.append(u'--pipe')
command.append(u'--format="pdf"')
if UNOCONV_USE_PIPE:
command.append(u'--pipe')
command.append(u'mayan')
command.append(u'--format=pdf')
command.append(u'--output=%s' % self.output_filepath)
command.append(self.input_filepath)
print 'convert'
try:
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()