From 67b3e1903180842cab774216d62c5634549c6349 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Sun, 20 Nov 2011 02:48:34 -0400 Subject: [PATCH 01/18] Initial commit of the new office converter class --- apps/converter/api.py | 52 ++++---------- apps/converter/exceptions.py | 4 ++ apps/converter/office_converter.py | 106 +++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 38 deletions(-) create mode 100644 apps/converter/office_converter.py diff --git a/apps/converter/api.py b/apps/converter/api.py index bdcfe40f77..82aefe63bb 100644 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -4,8 +4,6 @@ import hashlib from common.conf.settings import TEMPORARY_DIRECTORY -from converter.conf.settings import UNOCONV_PATH -from converter.exceptions import OfficeConversionError from converter.literals import DEFAULT_PAGE_NUMBER, \ DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, DEFAULT_FILE_FORMAT @@ -16,28 +14,12 @@ from converter.literals import TRANSFORMATION_RESIZE, \ from converter.literals import DIMENSION_SEPARATOR from converter.literals import FILE_FORMATS from converter.utils import cleanup +from converter.office_converter import OfficeConverter + HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() - -CONVERTER_OFFICE_FILE_EXTENSIONS = [ - u'ods', u'docx', u'doc' -] - - -def execute_unoconv(input_filepath, arguments=''): - """ - Executes the program unoconv using subprocess's Popen - """ - command = [] - command.append(UNOCONV_PATH) - command.extend(unicode(arguments).split()) - command.append(input_filepath) - proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE) - return_code = proc.wait() - if return_code != 0: - raise OfficeConversionError(proc.stderr.readline()) - + def cache_cleanup(input_filepath, *args, **kwargs): try: os.remove(create_image_cache_filename(input_filepath, *args, **kwargs)) @@ -53,13 +35,6 @@ def create_image_cache_filename(input_filepath, *args, **kwargs): return None -def convert_office_document(input_filepath): - if os.path.exists(UNOCONV_PATH): - execute_unoconv(input_filepath, arguments='-f pdf') - return input_filepath + u'.pdf' - return None - - def convert(input_filepath, output_filepath=None, cleanup_files=False, *args, **kwargs): size = kwargs.get('size') file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT) @@ -70,20 +45,23 @@ def convert(input_filepath, output_filepath=None, cleanup_files=False, *args, ** if transformations is None: transformations = [] - unoconv_output = None - if output_filepath is None: output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs) + print 'cache image', output_filepath if os.path.exists(output_filepath): return output_filepath + + print 'cleanup_files', cleanup_files - path, extension = os.path.splitext(input_filepath) - if extension[1:].lower() in CONVERTER_OFFICE_FILE_EXTENSIONS: - result = convert_office_document(input_filepath) - if result: - unoconv_output = result - input_filepath = result + office_converter = OfficeConverter(input_filepath) + if office_converter: + try: + #cleanup_files =False. + input_filepath = office_converter.output_filepath + except OfficeConverter: + print 'office converter exception' + raise UnknownFileFormat('office converter exception') if size: transformations.append( @@ -114,8 +92,6 @@ def convert(input_filepath, output_filepath=None, cleanup_files=False, *args, ** finally: if cleanup_files: cleanup(input_filepath) - if unoconv_output: - cleanup(unoconv_output) return output_filepath diff --git a/apps/converter/exceptions.py b/apps/converter/exceptions.py index e90fd4bb34..1423d38002 100644 --- a/apps/converter/exceptions.py +++ b/apps/converter/exceptions.py @@ -29,3 +29,7 @@ class UnkownConvertError(ConvertError): class OfficeConversionError(ConvertError): pass + + +class OfficeBackendError(OfficeConversionError): + pass diff --git a/apps/converter/office_converter.py b/apps/converter/office_converter.py new file mode 100644 index 0000000000..c592d4b765 --- /dev/null +++ b/apps/converter/office_converter.py @@ -0,0 +1,106 @@ +import os +import subprocess +import hashlib + +from mimetype.api import get_mimetype +from common.conf.settings import TEMPORARY_DIRECTORY + +from converter.conf.settings import UNOCONV_PATH +from converter.exceptions import (OfficeConversionError, + OfficeBackendError, UnknownFileFormat) + +HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() + +CACHED_FILE_SUFFIX = u'_office_converter' + +CONVERTER_OFFICE_FILE_MIMETYPES = [ + 'application/msword', + 'application/mswrite', + 'application/mspowerpoint', + 'application/msexcel', + 'application/vnd.ms-excel', + 'application/vnd.ms-powerpoint', + 'text/plain', + 'application/vnd.oasis.opendocument.presentation', +] +# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png', +# 'application/vnd.oasis.opendocument.spreadsheet': 'ODF_spreadsheet_32x32.png', +# 'application/vnd.oasis.opendocument.presentation': 'ODF_presentation_32x32.png', +# 'application/vnd.oasis.opendocument.graphics': 'ODF_drawing_32x32.png', +# 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'file_extension_xls.png', +# 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'file_extension_doc.png', +# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png', + +class OfficeConverter(object): + def __init__(self, input_filepath): + self.backend = OfficeConverterBackendUnoconv(unoconv_path=UNOCONV_PATH) + self.input_filepath = input_filepath + self.exists = False + + # Make sure file is of a known office format + descriptor = open(self.input_filepath) + mimetype, encoding = get_mimetype(descriptor, self.input_filepath) + + if mimetype in CONVERTER_OFFICE_FILE_MIMETYPES: + # Hash file to cache results of conversion + #descriptor = open(self.input_filepath) + #file_hash = HASH_FUNCTION(descriptor.read()) + #descriptor.close() + + #self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([file_hash, CACHED_FILE_SUFFIX])) + self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([self.input_filepath, CACHED_FILE_SUFFIX])) + self.exists = os.path.exists(self.output_filepath) + print 'self.input_filepath',self.input_filepath + print 'self.output_filepath',self.output_filepath + print 'self.exists', self.exists + if not self.exists: + try: + self.backend.convert(self.input_filepath, self.output_filepath) + except OfficeBackendError, msg: + print 'OFFICE EXCEPTION' + # convert exception so that atleas the mime type icon is displayed + raise UnknownFileFormat(msg) + + + + def __unicode__(self): + return getattr(self, 'output_filepath', None) + + def __str__(self): + return str(self.__unicode__()) + + def __nonzero__(self): + return self.exists + + __bool__ = __nonzero__ + + +class OfficeConverterBackendUnoconv(object): + def __init__(self, unoconv_path=None): + self.unoconv_path = unoconv_path if unoconv_path else u'/usr/bin/unoconv' + if not os.path.exists(self.unoconv_path): + raise OfficeBackendError('cannot find unoconv executable') + + def convert(self, input_filepath, output_filepath): + """ + Executes the program unoconv using subprocess's Popen + """ + self.input_filepath = input_filepath + self.output_filepath = output_filepath + + command = [] + command.append(self.unoconv_path) + #command.append(u'-v') + command.append(u'--pipe') + command.append(u'--format="pdf"') + command.append(u'--output=%s' % self.output_filepath) + command.append(self.input_filepath) + print 'convert' + try: + proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + return_code = proc.wait() + readline = proc.stderr.readline() + if return_code != 0: + raise OfficeBackendError(proc.stderr.readline()) + except OSError, msg: + raise OfficeBackendError(msg) From e590cb041cba77b2b0229b22f1d79900ecaf7bd2 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 21 Nov 2011 02:47:52 -0400 Subject: [PATCH 02/18] Finished office converter using MIME type detection --- apps/converter/api.py | 5 --- apps/converter/backends/python/base.py | 2 +- apps/converter/office_converter.py | 51 ++++++++++---------------- 3 files changed, 21 insertions(+), 37 deletions(-) diff --git a/apps/converter/api.py b/apps/converter/api.py index 82aefe63bb..e118cce3bb 100644 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -47,20 +47,15 @@ def convert(input_filepath, output_filepath=None, cleanup_files=False, *args, ** if output_filepath is None: output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs) - print 'cache image', output_filepath if os.path.exists(output_filepath): return output_filepath - print 'cleanup_files', cleanup_files - office_converter = OfficeConverter(input_filepath) if office_converter: try: - #cleanup_files =False. input_filepath = office_converter.output_filepath except OfficeConverter: - print 'office converter exception' raise UnknownFileFormat('office converter exception') if size: diff --git a/apps/converter/backends/python/base.py b/apps/converter/backends/python/base.py index a57e1f0d2a..a7e404bd46 100644 --- a/apps/converter/backends/python/base.py +++ b/apps/converter/backends/python/base.py @@ -64,7 +64,7 @@ class ConverterClass(ConverterBase): 'gs', '-q', '-dQUIET', '-dSAFER', '-dBATCH', '-dNOPAUSE', '-dNOPROMPT', first_page_tmpl, last_page_tmpl, - '-sDEVICE=jpeg', '-dJPEGQ=75', + '-sDEVICE=jpeg', '-dJPEGQ=95', '-r150', output_file_tmpl, input_file_tmpl, '-c "60000000 setvmthreshold"', # use 30MB diff --git a/apps/converter/office_converter.py b/apps/converter/office_converter.py index c592d4b765..093a234fd0 100644 --- a/apps/converter/office_converter.py +++ b/apps/converter/office_converter.py @@ -1,16 +1,13 @@ import os import subprocess -import hashlib from mimetype.api import get_mimetype from common.conf.settings import TEMPORARY_DIRECTORY -from converter.conf.settings import UNOCONV_PATH +from converter.conf.settings import UNOCONV_PATH, UNOCONV_USE_PIPE from converter.exceptions import (OfficeConversionError, OfficeBackendError, UnknownFileFormat) -HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() - CACHED_FILE_SUFFIX = u'_office_converter' CONVERTER_OFFICE_FILE_MIMETYPES = [ @@ -22,18 +19,17 @@ CONVERTER_OFFICE_FILE_MIMETYPES = [ 'application/vnd.ms-powerpoint', 'text/plain', 'application/vnd.oasis.opendocument.presentation', + 'application/vnd.oasis.opendocument.text', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.oasis.opendocument.spreadsheet', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/vnd.oasis.opendocument.graphics', ] -# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png', -# 'application/vnd.oasis.opendocument.spreadsheet': 'ODF_spreadsheet_32x32.png', -# 'application/vnd.oasis.opendocument.presentation': 'ODF_presentation_32x32.png', -# 'application/vnd.oasis.opendocument.graphics': 'ODF_drawing_32x32.png', -# 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'file_extension_xls.png', -# 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'file_extension_doc.png', -# 'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png', + class OfficeConverter(object): def __init__(self, input_filepath): - self.backend = OfficeConverterBackendUnoconv(unoconv_path=UNOCONV_PATH) + self.backend_class = OfficeConverterBackendUnoconv self.input_filepath = input_filepath self.exists = False @@ -42,27 +38,17 @@ class OfficeConverter(object): mimetype, encoding = get_mimetype(descriptor, self.input_filepath) if mimetype in CONVERTER_OFFICE_FILE_MIMETYPES: - # Hash file to cache results of conversion - #descriptor = open(self.input_filepath) - #file_hash = HASH_FUNCTION(descriptor.read()) - #descriptor.close() - - #self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([file_hash, CACHED_FILE_SUFFIX])) + # Cache results of conversion self.output_filepath = os.path.join(TEMPORARY_DIRECTORY, u''.join([self.input_filepath, CACHED_FILE_SUFFIX])) self.exists = os.path.exists(self.output_filepath) - print 'self.input_filepath',self.input_filepath - print 'self.output_filepath',self.output_filepath - print 'self.exists', self.exists if not self.exists: try: + self.backend = self.backend_class() self.backend.convert(self.input_filepath, self.output_filepath) except OfficeBackendError, msg: - print 'OFFICE EXCEPTION' - # convert exception so that atleas the mime type icon is displayed + # convert exception so that at least the mime type icon is displayed raise UnknownFileFormat(msg) - - def __unicode__(self): return getattr(self, 'output_filepath', None) @@ -76,8 +62,8 @@ class OfficeConverter(object): class OfficeConverterBackendUnoconv(object): - def __init__(self, unoconv_path=None): - self.unoconv_path = unoconv_path if unoconv_path else u'/usr/bin/unoconv' + def __init__(self): + self.unoconv_path = UNOCONV_PATH if UNOCONV_PATH else u'/usr/bin/unoconv' if not os.path.exists(self.unoconv_path): raise OfficeBackendError('cannot find unoconv executable') @@ -90,12 +76,15 @@ class OfficeConverterBackendUnoconv(object): command = [] command.append(self.unoconv_path) - #command.append(u'-v') - command.append(u'--pipe') - command.append(u'--format="pdf"') + + if UNOCONV_USE_PIPE: + command.append(u'--pipe') + command.append(u'mayan') + + command.append(u'--format=pdf') command.append(u'--output=%s' % self.output_filepath) command.append(self.input_filepath) - print 'convert' + try: proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return_code = proc.wait() From e4b9b135dac3aa9e2dc0d55864acc8c0c6fb800d Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 21 Nov 2011 02:48:29 -0400 Subject: [PATCH 03/18] Added new configuration option CONVERTER_UNOCONV_USE_PIPE to let uniconv use pipes instead of ports to call libreoffice --- apps/converter/conf/settings.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/converter/conf/settings.py b/apps/converter/conf/settings.py index 08377880b4..1dbfe4d6bc 100644 --- a/apps/converter/conf/settings.py +++ b/apps/converter/conf/settings.py @@ -1,4 +1,5 @@ -"""Configuration options for the converter app""" +'''Configuration options for the converter app''' + from django.utils.translation import ugettext_lazy as _ from smart_settings.api import register_settings @@ -12,7 +13,9 @@ register_settings( {'name': u'GM_PATH', 'global_name': u'CONVERTER_GM_PATH', 'default': u'/usr/bin/gm', 'description': _(u'File path to graphicsmagick\'s program.'), 'exists': True}, {'name': u'GM_SETTINGS', 'global_name': u'CONVERTER_GM_SETTINGS', 'default': u''}, {'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.python', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick, converter.backends.graphicsmagick and converter.backends.python.')}, - {'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True}, + {'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True, 'description': _(u'Path to the unoconv program.')}, + {'name': u'UNOCONV_USE_PIPE', 'global_name': u'CONVERTER_UNOCONV_USE_PIPE', 'default': True, 'description': _(u'Use alternate method of connection to LibreOffice using a pipe, it is slower but less prone to segmentation faults.')}, + #{'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'}, #{'name': u'HIGH_QUALITY_OPTIONS', 'global_name': u'CONVERTER_HIGH_QUALITY_OPTIONS', 'default': u'-density 400'}, #{'name': u'PRINT_QUALITY_OPTIONS', 'global_name': u'CONVERTER_PRINT_QUALITY_OPTIONS', 'default': u'-density 500'}, From 9b95201b4d656a79ee82a9d4cf11578de68ec289 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 21 Nov 2011 05:38:05 -0400 Subject: [PATCH 04/18] Moved project wide schedule instance to the runtime.py file --- apps/scheduler/__init__.py | 3 --- apps/scheduler/api.py | 2 +- apps/scheduler/runtime.py | 4 ++++ 3 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 apps/scheduler/runtime.py diff --git a/apps/scheduler/__init__.py b/apps/scheduler/__init__.py index a9440e946b..8b13789179 100644 --- a/apps/scheduler/__init__.py +++ b/apps/scheduler/__init__.py @@ -1,4 +1 @@ -from apscheduler.scheduler import Scheduler -scheduler = Scheduler() -scheduler.start() diff --git a/apps/scheduler/api.py b/apps/scheduler/api.py index 59162e10d0..5f79d2433b 100644 --- a/apps/scheduler/api.py +++ b/apps/scheduler/api.py @@ -1,4 +1,4 @@ -from scheduler import scheduler +from scheduler.runtime import scheduler from scheduler.exceptions import AlreadyScheduled registered_jobs = {} diff --git a/apps/scheduler/runtime.py b/apps/scheduler/runtime.py new file mode 100644 index 0000000000..a9440e946b --- /dev/null +++ b/apps/scheduler/runtime.py @@ -0,0 +1,4 @@ +from apscheduler.scheduler import Scheduler + +scheduler = Scheduler() +scheduler.start() From e8f62874dd74223f748f64a8924a2d8790168f64 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 21 Nov 2011 05:38:35 -0400 Subject: [PATCH 05/18] Updated generic_config template to store the views' previous url --- apps/common/templates/generic_confirm.html | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/common/templates/generic_confirm.html b/apps/common/templates/generic_confirm.html index 10a6e927d6..b020583e2e 100644 --- a/apps/common/templates/generic_confirm.html +++ b/apps/common/templates/generic_confirm.html @@ -22,7 +22,11 @@