diff --git a/mayan/apps/converter/__init__.py b/mayan/apps/converter/__init__.py index e69de29bb2..04b9fa2c07 100644 --- a/mayan/apps/converter/__init__.py +++ b/mayan/apps/converter/__init__.py @@ -0,0 +1,4 @@ +from .classes import ( + TransformationResize, TransformationRotate, TransformationZoom +) +from .runtime import converter_class diff --git a/mayan/apps/converter/api.py b/mayan/apps/converter/api.py index e999313eab..42ecf485e8 100644 --- a/mayan/apps/converter/api.py +++ b/mayan/apps/converter/api.py @@ -15,7 +15,6 @@ from .literals import ( DEFAULT_FILE_FORMAT, TRANSFORMATION_CHOICES, TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM, DIMENSION_SEPARATOR ) -from .runtime import backend, office_converter HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest() diff --git a/mayan/apps/converter/backends/__init__.py b/mayan/apps/converter/backends/__init__.py index 76f4a214c5..8b13789179 100644 --- a/mayan/apps/converter/backends/__init__.py +++ b/mayan/apps/converter/backends/__init__.py @@ -1,16 +1 @@ -from __future__ import unicode_literals - -class ConverterBase(object): - """ - Base class that all backend classes must inherit - """ - - def convert(self, input_data, ): - raise NotImplementedError() - - def transform(self, input_data, transformations): - raise NotImplementedError() - - def get_page_count(self, input_data): - raise NotImplementedError() diff --git a/mayan/apps/converter/backends/graphicsmagick.py b/mayan/apps/converter/backends/graphicsmagick.py index dac8a20c16..b308ec16bb 100644 --- a/mayan/apps/converter/backends/graphicsmagick.py +++ b/mayan/apps/converter/backends/graphicsmagick.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals import subprocess -from . import ConverterBase +from ..classes import ConverterBase from ..exceptions import ConvertError, IdentifyError, UnknownFileFormat from ..literals import ( TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM diff --git a/mayan/apps/converter/backends/imagemagick.py b/mayan/apps/converter/backends/imagemagick.py index 1d5020188e..d5b25f8131 100644 --- a/mayan/apps/converter/backends/imagemagick.py +++ b/mayan/apps/converter/backends/imagemagick.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals import subprocess -from . import ConverterBase +from ..classes import ConverterBase from ..exceptions import ConvertError, IdentifyError, UnknownFileFormat from ..literals import ( DEFAULT_FILE_FORMAT, DEFAULT_PAGE_NUMBER, DIMENSION_SEPARATOR, diff --git a/mayan/apps/converter/backends/python.py b/mayan/apps/converter/backends/python.py index 1c7dc44658..a82cb08ac7 100644 --- a/mayan/apps/converter/backends/python.py +++ b/mayan/apps/converter/backends/python.py @@ -17,7 +17,7 @@ import sh from common.utils import fs_cleanup from mimetype.api import get_mimetype -from . import ConverterBase +from ..classes import ConverterBase from ..exceptions import ConvertError, UnknownFileFormat from ..literals import ( DEFAULT_FILE_FORMAT, DEFAULT_PAGE_NUMBER, TRANSFORMATION_RESIZE, @@ -37,18 +37,38 @@ logger = logging.getLogger(__name__) class Python(ConverterBase): - def get_page_count(self, file_object, mimetype=None): + + def convert(self, *args, **kwargs): + super(Python, self).convert(*args, **kwargs) + + if self.mime_type == 'application/pdf' and pdftoppm: + + new_file_object, input_filepath = tempfile.mkstemp() + + if self.soffice_file_object: + os.write(new_file_object, self.soffice_file_object.read()) + self.soffice_file_object.close() + else: + os.write(new_file_object, self.file_object.read()) + self.file_object.seek(0) + + os.close(new_file_object) + + image_buffer = io.BytesIO() + try: + pdftoppm(input_filepath, f=self.page_number + 1, l=self.page_number + 1, _out=image_buffer) + image_buffer.seek(0) + return Image.open(image_buffer) + finally: + fs_cleanup(input_filepath) + + def get_page_count(self): page_count = 1 - if not mimetype: - mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True) - else: - encoding = None - - if mimetype == 'application/pdf': + if self.mime_type == 'application/pdf': # If file is a PDF open it with slate to determine the page count try: - pages = slate.PDF(file_object) + pages = slate.PDF(self.file_object) except Exception as exception: logger.error('slate exception; %s', exception) return 1 @@ -56,120 +76,22 @@ class Python(ConverterBase): else: return len(pages) finally: - file_object.seek(0) + self.file_object.seek(0) try: - image = Image.open(file_object) - except IOError: # cannot identify image file - raise UnknownFileFormat + image = Image.open(self.file_object) finally: - file_object.seek(0) + self.file_object.seek(0) try: while True: image.seek(image.tell() + 1) page_count += 1 - # do something to im - except EOFError: - pass # end of sequence - - return page_count - - def convert(self, file_object, mimetype=None, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER): - if not mimetype: - mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True) - - if mimetype == 'application/pdf' and pdftoppm: - image_buffer = io.BytesIO() - - new_file_object, input_filepath = tempfile.mkstemp() - os.write(new_file_object, file_object.read()) - os.close(new_file_object) - - pdftoppm(input_filepath, f=page, l=page, _out=image_buffer) - image_buffer.seek(0) - image = Image.open(image_buffer) - fs_cleanup(input_filepath) - else: - image = Image.open(file_object) - - current_page = 0 - try: - while current_page == page - 1: - image.seek(image.tell() + 1) - current_page += 1 - # do something to im except EOFError: # end of sequence pass - if image.mode not in ('L', 'RGB'): - image = image.convert('RGB') + return page_count - output = StringIO() - image.save(output, format=output_format) - return output - ''' - try: - if transformations: - aspect = 1.0 * im.size[0] / im.size[1] - for transformation in transformations: - arguments = transformation.get('arguments') - if transformation['transformation'] == TRANSFORMATION_RESIZE: - width = int(arguments.get('width', 0)) - height = int(arguments.get('height', 1.0 * width * aspect)) - im = self.resize(im, (width, height)) - elif transformation['transformation'] == TRANSFORMATION_ZOOM: - decimal_value = float(arguments.get('percent', 100)) / 100 - im = im.transform((int(im.size[0] * decimal_value), int(im.size[1] * decimal_value)), Image.EXTENT, (0, 0, im.size[0], im.size[1])) - elif transformation['transformation'] == TRANSFORMATION_ROTATE: - # PIL counter degress counter-clockwise, reverse them - im = im.rotate(360 - arguments.get('degrees', 0)) - except: - # Ignore all transformation error - pass - ''' - - # From: http://united-coders.com/christian-harms/image-resizing-tips-general-and-for-python - def resize(self, img, box, fit=False, out=None): - """ - Downsample the image. - @param img: Image - an Image-object - @param box: tuple(x, y) - the bounding box of the result image - @param fit: boolean - crop the image to fill the box - @param out: file-like-object - save the image into the output stream - """ - # preresize image with factor 2, 4, 8 and fast algorithm - factor = 1 - while img.size[0] / factor > 2 * box[0] and img.size[1] * 2 / factor > 2 * box[1]: - factor *= 2 - if factor > 1: - img.thumbnail((img.size[0] / factor, img.size[1] / factor), Image.NEAREST) - - # calculate the cropping box and get the cropped part - if fit: - x1 = y1 = 0 - x2, y2 = img.size - wRatio = 1.0 * x2 / box[0] - hRatio = 1.0 * y2 / box[1] - if hRatio > wRatio: - y1 = y2 / 2 - box[1] * wRatio / 2 - y2 = y2 / 2 + box[1] * wRatio / 2 - else: - x1 = x2 / 2 - box[0] * hRatio / 2 - x2 = x2 / 2 + box[0] * hRatio / 2 - img = img.crop((x1, y1, x2, y2)) - - # Resize the image with best quality algorithm ANTI-ALIAS - img.thumbnail(box, Image.ANTIALIAS) - - if out: - # save it into a file-like object - img.save(out, 'JPEG', quality=75) - else: - return img - - # if isinstance(self.regex, basestring): - # self.regex = re.compile(regex) diff --git a/mayan/apps/converter/classes.py b/mayan/apps/converter/classes.py index 1412f28e3d..bc3424c5e4 100644 --- a/mayan/apps/converter/classes.py +++ b/mayan/apps/converter/classes.py @@ -1,12 +1,21 @@ from __future__ import unicode_literals import logging +import io import os import subprocess from tempfile import mkstemp +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +from PIL import Image + from django.utils.encoding import smart_str from django.utils.module_loading import import_string +from django.utils.translation import ugettext_lazy as _ from common.settings import TEMPORARY_DIRECTORY from common.utils import fs_cleanup @@ -18,7 +27,6 @@ from .literals import ( DEFAULT_FILE_FORMAT, TRANSFORMATION_CHOICES, TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM, DIMENSION_SEPARATOR ) -from .office_converter import OfficeConverter from .settings import GRAPHICS_BACKEND, LIBREOFFICE_PATH CONVERTER_OFFICE_FILE_MIMETYPES = [ @@ -70,44 +78,7 @@ CONVERTER_OFFICE_FILE_MIMETYPES = [ logger = logging.getLogger(__name__) -logger.debug('initializing office backend') -try: - office_converter = OfficeConverter() -except OfficeBackendError as exception: - logger.error('error initializing office backend; %s', exception) - office_converter = None -else: - logger.debug('office_backend initialized') - -backend = import_string(GRAPHICS_BACKEND)() - - -class BaseTransformation(object): - name = 'base_transformation' - arguments = () - - def __init__(self, **kwargs): - for argument_name in self.arguments: - setattr(self, argument_name, kwargs.get(argument_name)) - - -class TransformationResize(BaseTransformation): - name = 'resize' - arguments = ('width', 'height') - - -class TransformationRotate(BaseTransformation): - name = 'rotate' - arguments = ('degrees',) - - -class TransformationScale(BaseTransformation): - name = 'scale' - arguments = ('percent',) - - -class Converter(object): - +class ConverterBase(object): @staticmethod def soffice(file_object): """ @@ -154,38 +125,140 @@ class Converter(object): def __init__(self, file_object, mime_type=None): self.file_object = file_object self.mime_type = mime_type or get_mimetype(file_object=file_object, mimetype_only=False)[0] - self.temporary_files = [] + self.soffice_file_object = None - def transform(self, transformations, page=DEFAULT_PAGE_NUMBER): - pass + def seek(self, page_number): + # Starting with #0 + self.file_object.seek(0) + + try: + self.image = Image.open(self.file_object) + except IOError: + # Cannot identify image file + self.image = self.convert(page_number=page_number) + else: + self.image.seek(page_number) + self.image.load() + + def get_page(self, output_format=DEFAULT_FILE_FORMAT): + if not self.image: + self.seek(1) + + image_buffer = StringIO() + self.image.save(image_buffer, format=output_format) + image_buffer.seek(0) + + return image_buffer + + def convert(self, page_number=DEFAULT_PAGE_NUMBER): + self.page_number = page_number + + self.mime_type = 'application/pdf' - def convert(self, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER): if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES: if os.path.exists(LIBREOFFICE_PATH): - converted_output = Converter.soffice(self.file_object) - self.file_object.close() - self.file_object = open(converted_output) - self.mime_type = 'application/pdf' - self.temporary_file.append(converted_output) + if not self.soffice_file_object: + converted_output = Converter.soffice(self.file_object) + self.file_object.seek(0) + self.soffice_file_object = open(converted_output) + self.mime_type = 'application/pdf' + fs_cleanup(converted_output) + else: + self.soffice_file_object.seek(0) else: # TODO: NO LIBREOFFICE FOUND ERROR pass - for temporary_file in self.temporary_files: - fs_cleanup(temporary_file) + def transform(self, transformation): + self.image = transformation.execute_on(self.image) - return backend.convert(file_object=self.file_object, mimetype=self.mime_type, output_format=output_format, page=page) + def transform_many(self, transformations): + for transformation in transformations: + self.image = transformation.execute_on(self.image) def get_page_count(self): - return backend.get_page_count(file_object) + raise NotImplementedError() -''' -def get_available_transformations_choices(): - result = [] - for transformation in backend.get_available_transformations(): - result.append((transformation, TRANSFORMATION_CHOICES[transformation]['label'])) +class BaseTransformation(object): + name = 'base_transformation' + arguments = () - return result -''' + _registry = {} + @classmethod + def get_transformations_classes(cls): + return map(lambda name: getattr(cls, name), filter(lambda entry: entry.startswith('Transform'), dir(cls))) + + @classmethod + def get_transformations_choices(cls): + return [(transformation.name, transformation.label) for transformation in cls.get_transformations_classes()] + + def __init__(self, **kwargs): + for argument_name in self.arguments: + setattr(self, argument_name, kwargs.get(argument_name)) + + def execute_on(self, image): + self.image = image + self.aspect = 1.0 * image.size[0] / image.size[1] + + +class TransformationResize(BaseTransformation): + name = 'resize' + arguments = ('width', 'height') + label = _('Resize') + + def execute_on(self, *args, **kwargs): + super(TransformationResize, self).execute_on(*args, **kwargs) + fit = False + + width = int(self.width) + height = int(self.height or 1.0 * width * self.aspect) + + factor = 1 + while self.image.size[0] / factor > 2 * width and self.image.size[1] * 2 / factor > 2 * height: + factor *= 2 + if factor > 1: + self.image.thumbnail((self.image.size[0] / factor, self.image.size[1] / factor), Image.NEAREST) + + # calculate the cropping box and get the cropped part + if fit: + x1 = y1 = 0 + x2, y2 = self.image.size + wRatio = 1.0 * x2 / width + hRatio = 1.0 * y2 / height + if hRatio > wRatio: + y1 = y2 / 2 - height * wRatio / 2 + y2 = y2 / 2 + height * wRatio / 2 + else: + x1 = x2 / 2 - width * hRatio / 2 + x2 = x2 / 2 + width * hRatio / 2 + self.image = self.image.crop((x1, y1, x2, y2)) + + # Resize the image with best quality algorithm ANTI-ALIAS + self.image.thumbnail((width, height), Image.ANTIALIAS) + + return self.image + + +class TransformationRotate(BaseTransformation): + name = 'rotate' + arguments = ('degrees',) + label = _('Rotate') + + def execute_on(self, *args, **kwargs): + super(TransformationRotate, self).execute_on(*args, **kwargs) + + return self.image.rotate(360 - self.degrees) + + +class TransformationZoom(BaseTransformation): + name = 'zoom' + arguments = ('percent',) + label = _('Zoom') + + def execute_on(self, *args, **kwargs): + super(TransformationZoom, self).execute_on(*args, **kwargs) + + decimal_value = float(self.percent) / 100 + return self.image.resize((int(self.image.size[0] * decimal_value), int(self.image.size[1] * decimal_value)), Image.ANTIALIAS) diff --git a/mayan/apps/converter/runtime.py b/mayan/apps/converter/runtime.py index 858c2a3909..23449d62f4 100644 --- a/mayan/apps/converter/runtime.py +++ b/mayan/apps/converter/runtime.py @@ -4,20 +4,7 @@ import logging from django.utils.module_loading import import_string -from .exceptions import OfficeBackendError -from .office_converter import OfficeConverter from .settings import GRAPHICS_BACKEND logger = logging.getLogger(__name__) - -logger.debug('initializing office backend') -try: - office_converter = OfficeConverter() -except OfficeBackendError as exception: - logger.error('error initializing office backend; %s', exception) - office_converter = None -else: - logger.debug('office_backend initialized') - - -backend = import_string(GRAPHICS_BACKEND)() +backend = converter_class = import_string(GRAPHICS_BACKEND) diff --git a/mayan/apps/documents/models.py b/mayan/apps/documents/models.py index 71feb15e1f..a39bc61989 100644 --- a/mayan/apps/documents/models.py +++ b/mayan/apps/documents/models.py @@ -18,7 +18,9 @@ from django.utils.translation import ugettext_lazy as _ from acls.utils import apply_default_acls from common.settings import TEMPORARY_DIRECTORY from common.utils import fs_cleanup -from converter.classes import Converter +from converter import ( + converter_class, TransformationResize, TransformationRotate, TransformationZoom +) from converter.exceptions import UnknownFileFormat from converter.literals import ( DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, DEFAULT_PAGE_NUMBER @@ -355,7 +357,7 @@ class DocumentVersion(models.Model): #self.save_to_file(filepath) try: with self.open() as file_object: - converter = Converter(file_object=file_object, mimetype=self.mimetype) + converter = converter_class(file_object=file_object, mimetype=self.mimetype) detected_pages = converter.get_page_count() except UnknownFileFormat: # If converter backend doesn't understand the format, @@ -536,45 +538,53 @@ class DocumentPage(models.Model): def get_image(self, *args, **kwargs): transformations = kwargs.pop('transformations', []) - #size=DISPLAY_SIZE, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, as_base64=False, version=None): - #if zoom < ZOOM_MIN_LEVEL: - # zoom = ZOOM_MIN_LEVEL + size = kwargs.pop('size', DISPLAY_SIZE) + rotation = kwargs.pop('rotation', DEFAULT_ROTATION) + zoom_level = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL) - #if zoom > ZOOM_MAX_LEVEL: - # zoom = ZOOM_MAX_LEVEL + if zoom_level < ZOOM_MIN_LEVEL: + zoom_level = ZOOM_MIN_LEVEL - #rotation = rotation % 360 + if zoom_level > ZOOM_MAX_LEVEL: + zoom_level = ZOOM_MAX_LEVEL + + rotation = rotation % 360 - #file_path = self.get_valid_image(size=size, page=page, zoom=zoom, rotation=rotation, version=version) - #logger.debug('file_path: %s', file_path) as_base64 = kwargs.pop('as_base64', False) cache_filename = self.get_cache_filename() - if os.path.exists(cache_filename) and 0: - with open(cache_filename) as file_object: - data = file_object.read() + if os.path.exists(cache_filename): + converter = converter_class(file_object=open(cache_filename)) - if as_base64: - return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data)) - else: - return data + converter.seek(0) else: try: - converter = Converter(file_object=self.document_version.open()) - image_buffer = converter.convert(page=self.page_number, output_format='PNG') + converter = converter_class(file_object=self.document_version.open()) + converter.seek(page_number=self.page_number - 1) + + page_image = converter.get_page() with open(cache_filename, 'wb+') as file_object: - file_object.write(image_buffer.getvalue()) + file_object.write(page_image.getvalue()) except: fs_cleanup(cache_filename) raise - else: - data = image_buffer.getvalue() - image_buffer.close() - if as_base64: - return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data)) - else: - return data + + if rotation: + converter.transform(transformation=TransformationRotate(degrees=rotation)) + + if size: + converter.transform(transformation=TransformationResize(**dict(zip(('width', 'height'), (size.split('x')))))) + + if zoom_level: + converter.transform(transformation=TransformationZoom(percent=zoom_level)) + + page_image = converter.get_page() + + if as_base64: + return 'data:%s;base64,%s' % ('image/png', base64.b64encode(page_image.getvalue())) + else: + return page_image def argument_validator(value): diff --git a/mayan/apps/documents/views.py b/mayan/apps/documents/views.py index 3e01408aa1..320008faab 100644 --- a/mayan/apps/documents/views.py +++ b/mayan/apps/documents/views.py @@ -372,10 +372,11 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE): task = task_get_document_page_image.apply_async(kwargs=dict(document_page_id=document_page.pk, size=size, zoom=zoom, rotation=rotation, as_base64=False, version=version), queue='converter') data = task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT) - response = HttpResponse(data, content_type='image') - return response + return HttpResponse(data, content_type='image') # TODO: remove sendfile + # TODO: test if celery result store can store binary blobs or switch to + # full base64 in JS #return sendfile.sendfile(request, task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT), mimetype=DEFAULT_FILE_FORMAT_MIMETYPE)