from __future__ import unicode_literals import logging import os import subprocess import tempfile try: from cStringIO import StringIO except ImportError: from StringIO import StringIO from PIL import Image from django.utils.translation import ugettext_lazy as _ from common.settings import setting_temporary_directory from common.utils import fs_cleanup from mimetype.api import get_mimetype from .exceptions import InvalidOfficeFormat, OfficeConversionError from .literals import DEFAULT_PAGE_NUMBER, DEFAULT_FILE_FORMAT from .settings import setting_libreoffice_path CHUNK_SIZE = 1024 logger = logging.getLogger(__name__) CONVERTER_OFFICE_FILE_MIMETYPES = ( 'application/msword', 'application/mswrite', 'application/mspowerpoint', 'application/msexcel', 'application/pgp-keys', 'application/vnd.ms-excel', 'application/vnd.ms-excel.addin.macroEnabled.12', 'application/vnd.ms-excel.sheet.binary.macroEnabled.12', 'application/vnd.ms-powerpoint', 'application/vnd.oasis.opendocument.chart', 'application/vnd.oasis.opendocument.chart-template', 'application/vnd.oasis.opendocument.formula', 'application/vnd.oasis.opendocument.formula-template', 'application/vnd.oasis.opendocument.graphics', 'application/vnd.oasis.opendocument.graphics-template', 'application/vnd.oasis.opendocument.image', 'application/vnd.oasis.opendocument.image-template', 'application/vnd.oasis.opendocument.presentation', 'application/vnd.oasis.opendocument.presentation-template', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', 'application/vnd.openxmlformats-officedocument.presentationml.template', 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'application/vnd.openxmlformats-officedocument.presentationml.slide', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', 'application/vnd.oasis.opendocument.spreadsheet', 'application/vnd.oasis.opendocument.spreadsheet-template', 'application/vnd.oasis.opendocument.text', 'application/vnd.oasis.opendocument.text-master', 'application/vnd.oasis.opendocument.text-template', 'application/vnd.oasis.opendocument.text-web', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/vnd.ms-office', 'application/xml', 'text/x-c', 'text/x-c++', 'text/x-pascal', 'text/x-msdos-batch', 'text/x-python', 'text/x-shellscript', 'text/plain', 'text/rtf', ) class ConverterBase(object): @staticmethod def soffice(file_object): """ Executes libreoffice using subprocess's Popen """ if not os.path.exists(setting_libreoffice_path.value): raise OfficeConversionError(_('LibreOffice not installed or not found at path: %s') % setting_libreoffice_path.value) new_file_object, input_filepath = tempfile.mkstemp() file_object.seek(0) os.write(new_file_object, file_object.read()) file_object.seek(0) os.lseek(new_file_object, 0, os.SEEK_SET) os.close(new_file_object) command = [] command.append(setting_libreoffice_path.value) command.append('--headless') command.append('--convert-to') command.append('pdf') command.append(input_filepath) command.append('--outdir') command.append(setting_temporary_directory.value) logger.debug('command: %s', command) os.environ['HOME'] = setting_temporary_directory.value proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return_code = proc.wait() logger.debug('return_code: %s', return_code) fs_cleanup(input_filepath) readline = proc.stderr.readline() logger.debug('stderr: %s', readline) if return_code != 0: raise OfficeConversionError(readline) filename, extension = os.path.splitext(os.path.basename(input_filepath)) logger.debug('filename: %s', filename) logger.debug('extension: %s', extension) converted_output = os.path.join(setting_temporary_directory.value, os.path.extsep.join([filename, 'pdf'])) logger.debug('converted_output: %s', converted_output) with open(converted_output) as converted_file_object: while True: data = converted_file_object.read(CHUNK_SIZE) if not data: break yield data fs_cleanup(input_filepath) def __init__(self, file_object, mime_type=None): self.file_object = file_object self.image = None self.mime_type = mime_type or get_mimetype(file_object=file_object, mimetype_only=False)[0] self.soffice_file = None def to_pdf(self): if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES: return ConverterBase.soffice(self.file_object) else: raise InvalidOfficeFormat(_('Not an office file format.')) def seek(self, page_number): # Starting with #0 self.file_object.seek(0) try: self.image = Image.open(self.file_object) except IOError: # Cannot identify image file self.image = self.convert(page_number=page_number) else: self.image.seek(page_number) self.image.load() def get_page(self, output_format=DEFAULT_FILE_FORMAT): if not self.image: self.seek(0) image_buffer = StringIO() self.image.save(image_buffer, format=output_format) image_buffer.seek(0) return image_buffer def convert(self, page_number=DEFAULT_PAGE_NUMBER): self.page_number = page_number def transform(self, transformation): if not self.image: self.seek(0) self.image = transformation.execute_on(self.image) def transform_many(self, transformations): if not self.image: self.seek(0) for transformation in transformations: self.image = transformation.execute_on(self.image) def get_page_count(self): try: self.soffice_file = self.to_pdf() except InvalidOfficeFormat: pass class BaseTransformation(object): name = 'base_transformation' arguments = () _registry = {} @classmethod def register(cls, transformation): cls._registry[transformation.name] = transformation @classmethod def get_transformation_choices(cls): return [(name, klass.label) for name, klass in cls._registry.items()] @classmethod def get(cls, name): return cls._registry[name] def __init__(self, **kwargs): for argument_name in self.arguments: setattr(self, argument_name, kwargs.get(argument_name)) def execute_on(self, image): self.image = image self.aspect = 1.0 * image.size[0] / image.size[1] class TransformationResize(BaseTransformation): name = 'resize' arguments = ('width', 'height') label = _('Resize ') def execute_on(self, *args, **kwargs): super(TransformationResize, self).execute_on(*args, **kwargs) fit = False width = int(self.width) height = int(self.height or 1.0 * width * self.aspect) factor = 1 while self.image.size[0] / factor > 2 * width and self.image.size[1] * 2 / factor > 2 * height: factor *= 2 if factor > 1: self.image.thumbnail((self.image.size[0] / factor, self.image.size[1] / factor), Image.NEAREST) # calculate the cropping box and get the cropped part if fit: x1 = y1 = 0 x2, y2 = self.image.size wRatio = 1.0 * x2 / width hRatio = 1.0 * y2 / height if hRatio > wRatio: y1 = y2 / 2 - height * wRatio / 2 y2 = y2 / 2 + height * wRatio / 2 else: x1 = x2 / 2 - width * hRatio / 2 x2 = x2 / 2 + width * hRatio / 2 self.image = self.image.crop((x1, y1, x2, y2)) # Resize the image with best quality algorithm ANTI-ALIAS self.image.thumbnail((width, height), Image.ANTIALIAS) return self.image class TransformationRotate(BaseTransformation): name = 'rotate' arguments = ('degrees',) label = _('Rotate ') def execute_on(self, *args, **kwargs): super(TransformationRotate, self).execute_on(*args, **kwargs) return self.image.rotate(360 - self.degrees, resample=Image.BICUBIC, expand=True) class TransformationZoom(BaseTransformation): name = 'zoom' arguments = ('percent',) label = _('Zoom ') def execute_on(self, *args, **kwargs): super(TransformationZoom, self).execute_on(*args, **kwargs) decimal_value = float(self.percent) / 100 return self.image.resize((int(self.image.size[0] * decimal_value), int(self.image.size[1] * decimal_value)), Image.ANTIALIAS) BaseTransformation.register(TransformationResize) BaseTransformation.register(TransformationRotate) BaseTransformation.register(TransformationZoom)