Finished office converter using MIME type detection
This commit is contained in:
@@ -47,20 +47,15 @@ def convert(input_filepath, output_filepath=None, cleanup_files=False, *args, **
|
||||
|
||||
if output_filepath is None:
|
||||
output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs)
|
||||
print 'cache image', output_filepath
|
||||
|
||||
if os.path.exists(output_filepath):
|
||||
return output_filepath
|
||||
|
||||
print 'cleanup_files', cleanup_files
|
||||
|
||||
office_converter = OfficeConverter(input_filepath)
|
||||
if office_converter:
|
||||
try:
|
||||
#cleanup_files =False.
|
||||
input_filepath = office_converter.output_filepath
|
||||
except OfficeConverter:
|
||||
print 'office converter exception'
|
||||
raise UnknownFileFormat('office converter exception')
|
||||
|
||||
if size:
|
||||
|
||||
@@ -64,7 +64,7 @@ class ConverterClass(ConverterBase):
|
||||
'gs', '-q', '-dQUIET', '-dSAFER', '-dBATCH',
|
||||
'-dNOPAUSE', '-dNOPROMPT',
|
||||
first_page_tmpl, last_page_tmpl,
|
||||
'-sDEVICE=jpeg', '-dJPEGQ=75',
|
||||
'-sDEVICE=jpeg', '-dJPEGQ=95',
|
||||
'-r150', output_file_tmpl,
|
||||
input_file_tmpl,
|
||||
'-c "60000000 setvmthreshold"', # use 30MB
|
||||
|
||||
@@ -1,16 +1,13 @@
|
||||
import os
|
||||
import subprocess
|
||||
import hashlib
|
||||
|
||||
from mimetype.api import get_mimetype
|
||||
from common.conf.settings import TEMPORARY_DIRECTORY
|
||||
|
||||
from converter.conf.settings import UNOCONV_PATH
|
||||
from converter.conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE
|
||||
from converter.exceptions import (OfficeConversionError,
|
||||
OfficeBackendError, UnknownFileFormat)
|
||||
|
||||
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
|
||||
|
||||
CACHED_FILE_SUFFIX = u'_office_converter'
|
||||
|
||||
CONVERTER_OFFICE_FILE_MIMETYPES = [
|
||||
@@ -22,18 +19,17 @@ CONVERTER_OFFICE_FILE_MIMETYPES = [
|
||||
'application/vnd.ms-powerpoint',
|
||||
'text/plain',
|
||||
'application/vnd.oasis.opendocument.presentation',
|
||||
'application/vnd.oasis.opendocument.text',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/vnd.oasis.opendocument.spreadsheet',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'application/vnd.oasis.opendocument.graphics',
|
||||
]
|
||||
# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png',
|
||||
# 'application/vnd.oasis.opendocument.spreadsheet': 'ODF_spreadsheet_32x32.png',
|
||||
# 'application/vnd.oasis.opendocument.presentation': 'ODF_presentation_32x32.png',
|
||||
# 'application/vnd.oasis.opendocument.graphics': 'ODF_drawing_32x32.png',
|
||||
# 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'file_extension_xls.png',
|
||||
# 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'file_extension_doc.png',
|
||||
# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png',
|
||||
|
||||
|
||||
class OfficeConverter(object):
|
||||
def __init__(self, input_filepath):
|
||||
self.backend = OfficeConverterBackendUnoconv(unoconv_path=UNOCONV_PATH)
|
||||
self.backend_class = OfficeConverterBackendUnoconv
|
||||
self.input_filepath = input_filepath
|
||||
self.exists = False
|
||||
|
||||
@@ -42,27 +38,17 @@ class OfficeConverter(object):
|
||||
mimetype, encoding = get_mimetype(descriptor, self.input_filepath)
|
||||
|
||||
if mimetype in CONVERTER_OFFICE_FILE_MIMETYPES:
|
||||
# Hash file to cache results of conversion
|
||||
#descriptor = open(self.input_filepath)
|
||||
#file_hash = HASH_FUNCTION(descriptor.read())
|
||||
#descriptor.close()
|
||||
|
||||
#self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([file_hash, CACHED_FILE_SUFFIX]))
|
||||
# Cache results of conversion
|
||||
self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([self.input_filepath, CACHED_FILE_SUFFIX]))
|
||||
self.exists = os.path.exists(self.output_filepath)
|
||||
print 'self.input_filepath',self.input_filepath
|
||||
print 'self.output_filepath',self.output_filepath
|
||||
print 'self.exists', self.exists
|
||||
if not self.exists:
|
||||
try:
|
||||
self.backend = self.backend_class()
|
||||
self.backend.convert(self.input_filepath, self.output_filepath)
|
||||
except OfficeBackendError, msg:
|
||||
print 'OFFICE EXCEPTION'
|
||||
# convert exception so that atleas the mime type icon is displayed
|
||||
# convert exception so that at least the mime type icon is displayed
|
||||
raise UnknownFileFormat(msg)
|
||||
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
return getattr(self, 'output_filepath', None)
|
||||
|
||||
@@ -76,8 +62,8 @@ class OfficeConverter(object):
|
||||
|
||||
|
||||
class OfficeConverterBackendUnoconv(object):
|
||||
def __init__(self, unoconv_path=None):
|
||||
self.unoconv_path = unoconv_path if unoconv_path else u'/usr/bin/unoconv'
|
||||
def __init__(self):
|
||||
self.unoconv_path = UNOCONV_PATH if UNOCONV_PATH else u'/usr/bin/unoconv'
|
||||
if not os.path.exists(self.unoconv_path):
|
||||
raise OfficeBackendError('cannot find unoconv executable')
|
||||
|
||||
@@ -90,12 +76,15 @@ class OfficeConverterBackendUnoconv(object):
|
||||
|
||||
command = []
|
||||
command.append(self.unoconv_path)
|
||||
#command.append(u'-v')
|
||||
command.append(u'--pipe')
|
||||
command.append(u'--format="pdf"')
|
||||
|
||||
if UNOCONV_USE_PIPE:
|
||||
command.append(u'--pipe')
|
||||
command.append(u'mayan')
|
||||
|
||||
command.append(u'--format=pdf')
|
||||
command.append(u'--output=%s' % self.output_filepath)
|
||||
command.append(self.input_filepath)
|
||||
print 'convert'
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
|
||||
Reference in New Issue
Block a user