Implement class based converter transformations

This commit is contained in:
Roberto Rosario
2015-06-07 05:29:31 -04:00
parent f4752a3f3f
commit b256758db2
10 changed files with 212 additions and 231 deletions

View File

@@ -0,0 +1,4 @@
from .classes import (
TransformationResize, TransformationRotate, TransformationZoom
)
from .runtime import converter_class

View File

@@ -15,7 +15,6 @@ from .literals import (
DEFAULT_FILE_FORMAT, TRANSFORMATION_CHOICES, TRANSFORMATION_RESIZE,
TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM, DIMENSION_SEPARATOR
)
from .runtime import backend, office_converter
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()

View File

@@ -1,16 +1 @@
from __future__ import unicode_literals
class ConverterBase(object):
"""
Base class that all backend classes must inherit
"""
def convert(self, input_data, ):
raise NotImplementedError()
def transform(self, input_data, transformations):
raise NotImplementedError()
def get_page_count(self, input_data):
raise NotImplementedError()

View File

@@ -2,7 +2,7 @@ from __future__ import unicode_literals
import subprocess
from . import ConverterBase
from ..classes import ConverterBase
from ..exceptions import ConvertError, IdentifyError, UnknownFileFormat
from ..literals import (
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM

View File

@@ -2,7 +2,7 @@ from __future__ import unicode_literals
import subprocess
from . import ConverterBase
from ..classes import ConverterBase
from ..exceptions import ConvertError, IdentifyError, UnknownFileFormat
from ..literals import (
DEFAULT_FILE_FORMAT, DEFAULT_PAGE_NUMBER, DIMENSION_SEPARATOR,

View File

@@ -17,7 +17,7 @@ import sh
from common.utils import fs_cleanup
from mimetype.api import get_mimetype
from . import ConverterBase
from ..classes import ConverterBase
from ..exceptions import ConvertError, UnknownFileFormat
from ..literals import (
DEFAULT_FILE_FORMAT, DEFAULT_PAGE_NUMBER, TRANSFORMATION_RESIZE,
@@ -37,18 +37,38 @@ logger = logging.getLogger(__name__)
class Python(ConverterBase):
def get_page_count(self, file_object, mimetype=None):
def convert(self, *args, **kwargs):
super(Python, self).convert(*args, **kwargs)
if self.mime_type == 'application/pdf' and pdftoppm:
new_file_object, input_filepath = tempfile.mkstemp()
if self.soffice_file_object:
os.write(new_file_object, self.soffice_file_object.read())
self.soffice_file_object.close()
else:
os.write(new_file_object, self.file_object.read())
self.file_object.seek(0)
os.close(new_file_object)
image_buffer = io.BytesIO()
try:
pdftoppm(input_filepath, f=self.page_number + 1, l=self.page_number + 1, _out=image_buffer)
image_buffer.seek(0)
return Image.open(image_buffer)
finally:
fs_cleanup(input_filepath)
def get_page_count(self):
page_count = 1
if not mimetype:
mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True)
else:
encoding = None
if mimetype == 'application/pdf':
if self.mime_type == 'application/pdf':
# If file is a PDF open it with slate to determine the page count
try:
pages = slate.PDF(file_object)
pages = slate.PDF(self.file_object)
except Exception as exception:
logger.error('slate exception; %s', exception)
return 1
@@ -56,120 +76,22 @@ class Python(ConverterBase):
else:
return len(pages)
finally:
file_object.seek(0)
self.file_object.seek(0)
try:
image = Image.open(file_object)
except IOError: # cannot identify image file
raise UnknownFileFormat
image = Image.open(self.file_object)
finally:
file_object.seek(0)
self.file_object.seek(0)
try:
while True:
image.seek(image.tell() + 1)
page_count += 1
# do something to im
except EOFError:
pass # end of sequence
return page_count
def convert(self, file_object, mimetype=None, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):
if not mimetype:
mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True)
if mimetype == 'application/pdf' and pdftoppm:
image_buffer = io.BytesIO()
new_file_object, input_filepath = tempfile.mkstemp()
os.write(new_file_object, file_object.read())
os.close(new_file_object)
pdftoppm(input_filepath, f=page, l=page, _out=image_buffer)
image_buffer.seek(0)
image = Image.open(image_buffer)
fs_cleanup(input_filepath)
else:
image = Image.open(file_object)
current_page = 0
try:
while current_page == page - 1:
image.seek(image.tell() + 1)
current_page += 1
# do something to im
except EOFError:
# end of sequence
pass
if image.mode not in ('L', 'RGB'):
image = image.convert('RGB')
return page_count
output = StringIO()
image.save(output, format=output_format)
return output
'''
try:
if transformations:
aspect = 1.0 * im.size[0] / im.size[1]
for transformation in transformations:
arguments = transformation.get('arguments')
if transformation['transformation'] == TRANSFORMATION_RESIZE:
width = int(arguments.get('width', 0))
height = int(arguments.get('height', 1.0 * width * aspect))
im = self.resize(im, (width, height))
elif transformation['transformation'] == TRANSFORMATION_ZOOM:
decimal_value = float(arguments.get('percent', 100)) / 100
im = im.transform((int(im.size[0] * decimal_value), int(im.size[1] * decimal_value)), Image.EXTENT, (0, 0, im.size[0], im.size[1]))
elif transformation['transformation'] == TRANSFORMATION_ROTATE:
# PIL counter degress counter-clockwise, reverse them
im = im.rotate(360 - arguments.get('degrees', 0))
except:
# Ignore all transformation error
pass
'''
# From: http://united-coders.com/christian-harms/image-resizing-tips-general-and-for-python
def resize(self, img, box, fit=False, out=None):
"""
Downsample the image.
@param img: Image - an Image-object
@param box: tuple(x, y) - the bounding box of the result image
@param fit: boolean - crop the image to fill the box
@param out: file-like-object - save the image into the output stream
"""
# preresize image with factor 2, 4, 8 and fast algorithm
factor = 1
while img.size[0] / factor > 2 * box[0] and img.size[1] * 2 / factor > 2 * box[1]:
factor *= 2
if factor > 1:
img.thumbnail((img.size[0] / factor, img.size[1] / factor), Image.NEAREST)
# calculate the cropping box and get the cropped part
if fit:
x1 = y1 = 0
x2, y2 = img.size
wRatio = 1.0 * x2 / box[0]
hRatio = 1.0 * y2 / box[1]
if hRatio > wRatio:
y1 = y2 / 2 - box[1] * wRatio / 2
y2 = y2 / 2 + box[1] * wRatio / 2
else:
x1 = x2 / 2 - box[0] * hRatio / 2
x2 = x2 / 2 + box[0] * hRatio / 2
img = img.crop((x1, y1, x2, y2))
# Resize the image with best quality algorithm ANTI-ALIAS
img.thumbnail(box, Image.ANTIALIAS)
if out:
# save it into a file-like object
img.save(out, 'JPEG', quality=75)
else:
return img
# if isinstance(self.regex, basestring):
# self.regex = re.compile(regex)

View File

@@ -1,12 +1,21 @@
from __future__ import unicode_literals
import logging
import io
import os
import subprocess
from tempfile import mkstemp
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from PIL import Image
from django.utils.encoding import smart_str
from django.utils.module_loading import import_string
from django.utils.translation import ugettext_lazy as _
from common.settings import TEMPORARY_DIRECTORY
from common.utils import fs_cleanup
@@ -18,7 +27,6 @@ from .literals import (
DEFAULT_FILE_FORMAT, TRANSFORMATION_CHOICES, TRANSFORMATION_RESIZE,
TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM, DIMENSION_SEPARATOR
)
from .office_converter import OfficeConverter
from .settings import GRAPHICS_BACKEND, LIBREOFFICE_PATH
CONVERTER_OFFICE_FILE_MIMETYPES = [
@@ -70,44 +78,7 @@ CONVERTER_OFFICE_FILE_MIMETYPES = [
logger = logging.getLogger(__name__)
logger.debug('initializing office backend')
try:
office_converter = OfficeConverter()
except OfficeBackendError as exception:
logger.error('error initializing office backend; %s', exception)
office_converter = None
else:
logger.debug('office_backend initialized')
backend = import_string(GRAPHICS_BACKEND)()
class BaseTransformation(object):
name = 'base_transformation'
arguments = ()
def __init__(self, **kwargs):
for argument_name in self.arguments:
setattr(self, argument_name, kwargs.get(argument_name))
class TransformationResize(BaseTransformation):
name = 'resize'
arguments = ('width', 'height')
class TransformationRotate(BaseTransformation):
name = 'rotate'
arguments = ('degrees',)
class TransformationScale(BaseTransformation):
name = 'scale'
arguments = ('percent',)
class Converter(object):
class ConverterBase(object):
@staticmethod
def soffice(file_object):
"""
@@ -154,38 +125,140 @@ class Converter(object):
def __init__(self, file_object, mime_type=None):
self.file_object = file_object
self.mime_type = mime_type or get_mimetype(file_object=file_object, mimetype_only=False)[0]
self.temporary_files = []
self.soffice_file_object = None
def transform(self, transformations, page=DEFAULT_PAGE_NUMBER):
pass
def seek(self, page_number):
# Starting with #0
self.file_object.seek(0)
try:
self.image = Image.open(self.file_object)
except IOError:
# Cannot identify image file
self.image = self.convert(page_number=page_number)
else:
self.image.seek(page_number)
self.image.load()
def get_page(self, output_format=DEFAULT_FILE_FORMAT):
if not self.image:
self.seek(1)
image_buffer = StringIO()
self.image.save(image_buffer, format=output_format)
image_buffer.seek(0)
return image_buffer
def convert(self, page_number=DEFAULT_PAGE_NUMBER):
self.page_number = page_number
self.mime_type = 'application/pdf'
def convert(self, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):
if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
if os.path.exists(LIBREOFFICE_PATH):
if not self.soffice_file_object:
converted_output = Converter.soffice(self.file_object)
self.file_object.close()
self.file_object = open(converted_output)
self.file_object.seek(0)
self.soffice_file_object = open(converted_output)
self.mime_type = 'application/pdf'
self.temporary_file.append(converted_output)
fs_cleanup(converted_output)
else:
self.soffice_file_object.seek(0)
else:
# TODO: NO LIBREOFFICE FOUND ERROR
pass
for temporary_file in self.temporary_files:
fs_cleanup(temporary_file)
def transform(self, transformation):
self.image = transformation.execute_on(self.image)
return backend.convert(file_object=self.file_object, mimetype=self.mime_type, output_format=output_format, page=page)
def transform_many(self, transformations):
for transformation in transformations:
self.image = transformation.execute_on(self.image)
def get_page_count(self):
return backend.get_page_count(file_object)
raise NotImplementedError()
'''
def get_available_transformations_choices():
result = []
for transformation in backend.get_available_transformations():
result.append((transformation, TRANSFORMATION_CHOICES[transformation]['label']))
class BaseTransformation(object):
name = 'base_transformation'
arguments = ()
return result
'''
_registry = {}
@classmethod
def get_transformations_classes(cls):
return map(lambda name: getattr(cls, name), filter(lambda entry: entry.startswith('Transform'), dir(cls)))
@classmethod
def get_transformations_choices(cls):
return [(transformation.name, transformation.label) for transformation in cls.get_transformations_classes()]
def __init__(self, **kwargs):
for argument_name in self.arguments:
setattr(self, argument_name, kwargs.get(argument_name))
def execute_on(self, image):
self.image = image
self.aspect = 1.0 * image.size[0] / image.size[1]
class TransformationResize(BaseTransformation):
name = 'resize'
arguments = ('width', 'height')
label = _('Resize')
def execute_on(self, *args, **kwargs):
super(TransformationResize, self).execute_on(*args, **kwargs)
fit = False
width = int(self.width)
height = int(self.height or 1.0 * width * self.aspect)
factor = 1
while self.image.size[0] / factor > 2 * width and self.image.size[1] * 2 / factor > 2 * height:
factor *= 2
if factor > 1:
self.image.thumbnail((self.image.size[0] / factor, self.image.size[1] / factor), Image.NEAREST)
# calculate the cropping box and get the cropped part
if fit:
x1 = y1 = 0
x2, y2 = self.image.size
wRatio = 1.0 * x2 / width
hRatio = 1.0 * y2 / height
if hRatio > wRatio:
y1 = y2 / 2 - height * wRatio / 2
y2 = y2 / 2 + height * wRatio / 2
else:
x1 = x2 / 2 - width * hRatio / 2
x2 = x2 / 2 + width * hRatio / 2
self.image = self.image.crop((x1, y1, x2, y2))
# Resize the image with best quality algorithm ANTI-ALIAS
self.image.thumbnail((width, height), Image.ANTIALIAS)
return self.image
class TransformationRotate(BaseTransformation):
name = 'rotate'
arguments = ('degrees',)
label = _('Rotate')
def execute_on(self, *args, **kwargs):
super(TransformationRotate, self).execute_on(*args, **kwargs)
return self.image.rotate(360 - self.degrees)
class TransformationZoom(BaseTransformation):
name = 'zoom'
arguments = ('percent',)
label = _('Zoom')
def execute_on(self, *args, **kwargs):
super(TransformationZoom, self).execute_on(*args, **kwargs)
decimal_value = float(self.percent) / 100
return self.image.resize((int(self.image.size[0] * decimal_value), int(self.image.size[1] * decimal_value)), Image.ANTIALIAS)

View File

@@ -4,20 +4,7 @@ import logging
from django.utils.module_loading import import_string
from .exceptions import OfficeBackendError
from .office_converter import OfficeConverter
from .settings import GRAPHICS_BACKEND
logger = logging.getLogger(__name__)
logger.debug('initializing office backend')
try:
office_converter = OfficeConverter()
except OfficeBackendError as exception:
logger.error('error initializing office backend; %s', exception)
office_converter = None
else:
logger.debug('office_backend initialized')
backend = import_string(GRAPHICS_BACKEND)()
backend = converter_class = import_string(GRAPHICS_BACKEND)

View File

@@ -18,7 +18,9 @@ from django.utils.translation import ugettext_lazy as _
from acls.utils import apply_default_acls
from common.settings import TEMPORARY_DIRECTORY
from common.utils import fs_cleanup
from converter.classes import Converter
from converter import (
converter_class, TransformationResize, TransformationRotate, TransformationZoom
)
from converter.exceptions import UnknownFileFormat
from converter.literals import (
DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, DEFAULT_PAGE_NUMBER
@@ -355,7 +357,7 @@ class DocumentVersion(models.Model):
#self.save_to_file(filepath)
try:
with self.open() as file_object:
converter = Converter(file_object=file_object, mimetype=self.mimetype)
converter = converter_class(file_object=file_object, mimetype=self.mimetype)
detected_pages = converter.get_page_count()
except UnknownFileFormat:
# If converter backend doesn't understand the format,
@@ -536,45 +538,53 @@ class DocumentPage(models.Model):
def get_image(self, *args, **kwargs):
transformations = kwargs.pop('transformations', [])
#size=DISPLAY_SIZE, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, as_base64=False, version=None):
#if zoom < ZOOM_MIN_LEVEL:
# zoom = ZOOM_MIN_LEVEL
size = kwargs.pop('size', DISPLAY_SIZE)
rotation = kwargs.pop('rotation', DEFAULT_ROTATION)
zoom_level = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL)
#if zoom > ZOOM_MAX_LEVEL:
# zoom = ZOOM_MAX_LEVEL
if zoom_level < ZOOM_MIN_LEVEL:
zoom_level = ZOOM_MIN_LEVEL
#rotation = rotation % 360
if zoom_level > ZOOM_MAX_LEVEL:
zoom_level = ZOOM_MAX_LEVEL
rotation = rotation % 360
#file_path = self.get_valid_image(size=size, page=page, zoom=zoom, rotation=rotation, version=version)
#logger.debug('file_path: %s', file_path)
as_base64 = kwargs.pop('as_base64', False)
cache_filename = self.get_cache_filename()
if os.path.exists(cache_filename) and 0:
with open(cache_filename) as file_object:
data = file_object.read()
if os.path.exists(cache_filename):
converter = converter_class(file_object=open(cache_filename))
if as_base64:
return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data))
else:
return data
converter.seek(0)
else:
try:
converter = Converter(file_object=self.document_version.open())
image_buffer = converter.convert(page=self.page_number, output_format='PNG')
converter = converter_class(file_object=self.document_version.open())
converter.seek(page_number=self.page_number - 1)
page_image = converter.get_page()
with open(cache_filename, 'wb+') as file_object:
file_object.write(image_buffer.getvalue())
file_object.write(page_image.getvalue())
except:
fs_cleanup(cache_filename)
raise
else:
data = image_buffer.getvalue()
image_buffer.close()
if rotation:
converter.transform(transformation=TransformationRotate(degrees=rotation))
if size:
converter.transform(transformation=TransformationResize(**dict(zip(('width', 'height'), (size.split('x'))))))
if zoom_level:
converter.transform(transformation=TransformationZoom(percent=zoom_level))
page_image = converter.get_page()
if as_base64:
return 'data:%s;base64,%s' % ('image/png', base64.b64encode(data))
return 'data:%s;base64,%s' % ('image/png', base64.b64encode(page_image.getvalue()))
else:
return data
return page_image
def argument_validator(value):

View File

@@ -372,10 +372,11 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE):
task = task_get_document_page_image.apply_async(kwargs=dict(document_page_id=document_page.pk, size=size, zoom=zoom, rotation=rotation, as_base64=False, version=version), queue='converter')
data = task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT)
response = HttpResponse(data, content_type='image')
return response
return HttpResponse(data, content_type='image')
# TODO: remove sendfile
# TODO: test if celery result store can store binary blobs or switch to
# full base64 in JS
#return sendfile.sendfile(request, task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT), mimetype=DEFAULT_FILE_FORMAT_MIMETYPE)