Initial commit of the converter refactor
This commit is contained in:
@@ -5,17 +5,12 @@ class ConverterBase(object):
|
||||
"""
|
||||
Base class that all backend classes must inherit
|
||||
"""
|
||||
def convert_file(self, input_filepath, *args, **kwargs):
|
||||
raise NotImplementedError('Your %s class has not defined a convert_file() method, which is required.' % self.__class__.__name__)
|
||||
|
||||
def convert_document(self, document, *args, **kwargs):
|
||||
raise NotImplementedError('Your %s class has not defined a convert_document() method, which is required.' % self.__class__.__name__)
|
||||
def convert(self, input_data, ):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_format_list(self):
|
||||
raise NotImplementedError('Your %s class has not defined a get_format_list() method, which is required.' % self.__class__.__name__)
|
||||
def transform(self, input_data, transformations):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_available_transformations(self):
|
||||
raise NotImplementedError('Your %s class has not defined a get_available_transformations() method, which is required.' % self.__class__.__name__)
|
||||
|
||||
def get_page_count(self, input_filepath):
|
||||
raise NotImplementedError('Your %s class has not defined a get_page_count() method, which is required.' % self.__class__.__name__)
|
||||
def get_page_count(self, input_data):
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -83,12 +83,6 @@ class GraphicsMagick(ConverterBase):
|
||||
else:
|
||||
raise ConvertError(error_line)
|
||||
|
||||
def get_available_transformations(self):
|
||||
return [
|
||||
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE,
|
||||
TRANSFORMATION_ZOOM
|
||||
]
|
||||
|
||||
def get_page_count(self, input_filepath):
|
||||
try:
|
||||
return len(self.identify_file(unicode(input_filepath)).splitlines())
|
||||
|
||||
@@ -77,12 +77,6 @@ class ImageMagick(ConverterBase):
|
||||
else:
|
||||
raise ConvertError(error_line)
|
||||
|
||||
def get_available_transformations(self):
|
||||
return [
|
||||
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE,
|
||||
TRANSFORMATION_ZOOM
|
||||
]
|
||||
|
||||
def get_page_count(self, input_filepath):
|
||||
try:
|
||||
return len(self.identify_file(unicode(input_filepath)).splitlines())
|
||||
|
||||
@@ -2,6 +2,13 @@ from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
|
||||
import slate
|
||||
from PIL import Image
|
||||
@@ -30,29 +37,42 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Python(ConverterBase):
|
||||
def get_page_count(self, input_filepath):
|
||||
def get_page_count(self, file_object, mimetype=None):
|
||||
page_count = 1
|
||||
|
||||
mimetype, encoding = get_mimetype(open(input_filepath, 'rb'), input_filepath, mimetype_only=True)
|
||||
#file_object, input_filepath = mkstemp()
|
||||
#file_object.write(input_data)
|
||||
|
||||
if not mimetype:
|
||||
#mimetype, encoding = get_mimetype(file_description=open(input_filepath, 'rb'), filepath=None, mimetype_only=True)
|
||||
mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True)
|
||||
else:
|
||||
encoding = None
|
||||
|
||||
if mimetype == 'application/pdf':
|
||||
# If file is a PDF open it with slate to determine the page
|
||||
# count
|
||||
with open(input_filepath) as fd:
|
||||
try:
|
||||
pages = slate.PDF(fd)
|
||||
except:
|
||||
return 1
|
||||
# TODO: Maybe return UnknownFileFormat to display proper unknwon file format message in document description
|
||||
return len(pages)
|
||||
# If file is a PDF open it with slate to determine the page count
|
||||
#with open(input_filepath) as fd:
|
||||
try:
|
||||
pages = slate.PDF(file_object)
|
||||
except:
|
||||
return 1
|
||||
# TODO: Maybe return UnknownFileFormat to display proper unknwon file format message in document description
|
||||
else:
|
||||
return len(pages)
|
||||
finally:
|
||||
file_object.seek(0)
|
||||
|
||||
try:
|
||||
im = Image.open(input_filepath)
|
||||
#im = Image.fromarray(input_data)
|
||||
image = Image.open(file_object)
|
||||
except IOError: # cannot identify image file
|
||||
raise UnknownFileFormat
|
||||
finally:
|
||||
file_object.seek(0)
|
||||
|
||||
try:
|
||||
while True:
|
||||
im.seek(im.tell() + 1)
|
||||
image.seek(image.tell() + 1)
|
||||
page_count += 1
|
||||
# do something to im
|
||||
except EOFError:
|
||||
@@ -60,40 +80,59 @@ class Python(ConverterBase):
|
||||
|
||||
return page_count
|
||||
|
||||
def convert_file(self, input_filepath, output_filepath, transformations=None, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT, **kwargs):
|
||||
tmpfile = None
|
||||
mimetype = kwargs.get('mimetype', None)
|
||||
if not mimetype:
|
||||
mimetype, encoding = get_mimetype(open(input_filepath, 'rb'), input_filepath, mimetype_only=True)
|
||||
def convert(self, file_object, mimetype=None, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):
|
||||
|
||||
try:
|
||||
if mimetype == 'application/pdf' and pdftoppm:
|
||||
image_buffer = io.BytesIO()
|
||||
pdftoppm(input_filepath, f=page, l=page, _out=image_buffer)
|
||||
image_buffer.seek(0)
|
||||
im = Image.open(image_buffer)
|
||||
else:
|
||||
im = Image.open(input_filepath)
|
||||
except Exception as exception:
|
||||
logger.error('Error converting image; %s', exception)
|
||||
# Python Imaging Library doesn't recognize it as an image
|
||||
raise ConvertError
|
||||
except IOError: # cannot identify image file
|
||||
raise UnknownFileFormat
|
||||
finally:
|
||||
if tmpfile:
|
||||
fs_cleanup(tmpfile)
|
||||
#tmpfile = None
|
||||
#mimetype = kwargs.get('mimetype', None)
|
||||
|
||||
if not mimetype:
|
||||
mimetype, encoding = get_mimetype(file_object=file_object, mimetype_only=True)
|
||||
|
||||
##try:
|
||||
print "MIME!", mimetype
|
||||
if mimetype == 'application/pdf' and pdftoppm:
|
||||
image_buffer = io.BytesIO()
|
||||
|
||||
new_file_object, input_filepath = tempfile.mkstemp()
|
||||
os.write(new_file_object, file_object.read())
|
||||
#file_object.seek(0)
|
||||
#new_file_object.seek(0)
|
||||
os.close(new_file_object)
|
||||
|
||||
|
||||
|
||||
pdftoppm(input_filepath, f=page, l=page, _out=image_buffer)
|
||||
image_buffer.seek(0)
|
||||
image = Image.open(image_buffer)
|
||||
# TODO: remove input_filepath
|
||||
else:
|
||||
image = Image.open(file_object)
|
||||
|
||||
|
||||
|
||||
##except Exception as exception:
|
||||
## logger.error('Error converting image; %s', exception)
|
||||
## # Python Imaging Library doesn't recognize it as an image
|
||||
## raise ConvertError
|
||||
##except IOError: # cannot identify image file
|
||||
## raise UnknownFileFormat
|
||||
|
||||
|
||||
#finally:
|
||||
# if tmpfile:
|
||||
# fs_cleanup(tmpfile)
|
||||
|
||||
current_page = 0
|
||||
try:
|
||||
while current_page == page - 1:
|
||||
im.seek(im.tell() + 1)
|
||||
image.seek(image.tell() + 1)
|
||||
current_page += 1
|
||||
# do something to im
|
||||
except EOFError:
|
||||
# end of sequence
|
||||
pass
|
||||
|
||||
'''
|
||||
try:
|
||||
if transformations:
|
||||
aspect = 1.0 * im.size[0] / im.size[1]
|
||||
@@ -112,17 +151,16 @@ class Python(ConverterBase):
|
||||
except:
|
||||
# Ignore all transformation error
|
||||
pass
|
||||
'''
|
||||
|
||||
if im.mode not in ('L', 'RGB'):
|
||||
im = im.convert('RGB')
|
||||
if image.mode not in ('L', 'RGB'):
|
||||
image = image.convert('RGB')
|
||||
|
||||
im.save(output_filepath, format=file_format)
|
||||
|
||||
def get_available_transformations(self):
|
||||
return [
|
||||
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE,
|
||||
TRANSFORMATION_ZOOM
|
||||
]
|
||||
output = StringIO()
|
||||
image.save(output, format=output_format)
|
||||
|
||||
return output
|
||||
|
||||
# From: http://united-coders.com/christian-harms/image-resizing-tips-general-and-for-python
|
||||
def resize(self, img, box, fit=False, out=None):
|
||||
|
||||
282
mayan/apps/converter/classes.py
Normal file
282
mayan/apps/converter/classes.py
Normal file
@@ -0,0 +1,282 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from tempfile import mkstemp
|
||||
|
||||
from django.utils.encoding import smart_str
|
||||
from django.utils.module_loading import import_string
|
||||
|
||||
from common.settings import TEMPORARY_DIRECTORY
|
||||
from common.utils import fs_cleanup
|
||||
from mimetype.api import get_mimetype
|
||||
|
||||
from .exceptions import OfficeConversionError, UnknownFileFormat
|
||||
from .literals import (
|
||||
DEFAULT_PAGE_NUMBER, DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION,
|
||||
DEFAULT_FILE_FORMAT, TRANSFORMATION_CHOICES, TRANSFORMATION_RESIZE,
|
||||
TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM, DIMENSION_SEPARATOR
|
||||
)
|
||||
from .office_converter import OfficeConverter
|
||||
from .runtime import backend, office_converter
|
||||
from .settings import GRAPHICS_BACKEND, LIBREOFFICE_PATH
|
||||
|
||||
CONVERTER_OFFICE_FILE_MIMETYPES = [
|
||||
'application/msword',
|
||||
'application/mswrite',
|
||||
'application/mspowerpoint',
|
||||
'application/msexcel',
|
||||
'application/pgp-keys',
|
||||
'application/vnd.ms-excel',
|
||||
'application/vnd.ms-excel.addin.macroEnabled.12',
|
||||
'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
|
||||
'application/vnd.ms-powerpoint',
|
||||
'application/vnd.oasis.opendocument.chart',
|
||||
'application/vnd.oasis.opendocument.chart-template',
|
||||
'application/vnd.oasis.opendocument.formula',
|
||||
'application/vnd.oasis.opendocument.formula-template',
|
||||
'application/vnd.oasis.opendocument.graphics',
|
||||
'application/vnd.oasis.opendocument.graphics-template',
|
||||
'application/vnd.oasis.opendocument.image',
|
||||
'application/vnd.oasis.opendocument.image-template',
|
||||
'application/vnd.oasis.opendocument.presentation',
|
||||
'application/vnd.oasis.opendocument.presentation-template',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.template',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.slide',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
|
||||
'application/vnd.oasis.opendocument.spreadsheet',
|
||||
'application/vnd.oasis.opendocument.spreadsheet-template',
|
||||
'application/vnd.oasis.opendocument.text',
|
||||
'application/vnd.oasis.opendocument.text-master',
|
||||
'application/vnd.oasis.opendocument.text-template',
|
||||
'application/vnd.oasis.opendocument.text-web',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'application/vnd.ms-office',
|
||||
'application/xml',
|
||||
'text/x-c',
|
||||
'text/x-c++',
|
||||
'text/x-pascal',
|
||||
'text/x-msdos-batch',
|
||||
'text/x-python',
|
||||
'text/x-shellscript',
|
||||
'text/plain',
|
||||
'text/rtf',
|
||||
]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
logger.debug('initializing office backend')
|
||||
try:
|
||||
office_converter = OfficeConverter()
|
||||
except OfficeBackendError as exception:
|
||||
logger.error('error initializing office backend; %s', exception)
|
||||
office_converter = None
|
||||
else:
|
||||
logger.debug('office_backend initialized')
|
||||
|
||||
backend = import_string(GRAPHICS_BACKEND)()
|
||||
|
||||
|
||||
class BaseTransformation(object):
|
||||
name = 'base_transformation'
|
||||
arguments = ()
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
for argument_name in self.arguments:
|
||||
setattr(self, argument_name, kwargs.get(argument_name))
|
||||
|
||||
|
||||
class TransformationResize(BaseTransformation):
|
||||
name = 'resize'
|
||||
arguments = ('width', 'height')
|
||||
|
||||
|
||||
class TransformationRotate(BaseTransformation):
|
||||
name = 'rotate'
|
||||
arguments = ('degrees',)
|
||||
|
||||
|
||||
class TransformationScale(BaseTransformation):
|
||||
name = 'scale'
|
||||
arguments = ('percent',)
|
||||
|
||||
|
||||
class Converter(object):
|
||||
"""
|
||||
def cache_cleanup(input_filepath, *args, **kwargs):
|
||||
try:
|
||||
os.remove(create_image_cache_filename(input_filepath, *args, **kwargs))
|
||||
except OSError:
|
||||
pass
|
||||
"""
|
||||
|
||||
"""
|
||||
def create_image_cache_filename(input_filepath, *args, **kwargs):
|
||||
if input_filepath:
|
||||
hash_value = HASH_FUNCTION(''.join([HASH_FUNCTION(smart_str(input_filepath)), unicode(args), unicode(kwargs)]))
|
||||
return os.path.join(TEMPORARY_DIRECTORY, hash_value)
|
||||
else:
|
||||
return None
|
||||
"""
|
||||
|
||||
|
||||
@staticmethod
|
||||
def soffice(file_object):
|
||||
"""
|
||||
Executes libreoffice using subprocess's Popen
|
||||
"""
|
||||
|
||||
new_file_object, input_filepath = tempfile.mkstemp()
|
||||
new_file_object.write(file_object.read())
|
||||
file_object.seek(0)
|
||||
new_file_object.seek(0)
|
||||
new_file_object.close()
|
||||
|
||||
command = []
|
||||
command.append(LIBREOFFICE_PATH)
|
||||
|
||||
command.append('--headless')
|
||||
command.append('--convert-to')
|
||||
command.append('pdf')
|
||||
command.append(input_filepath)
|
||||
command.append('--outdir')
|
||||
command.append(TEMPORARY_DIRECTORY)
|
||||
|
||||
logger.debug('command: %s', command)
|
||||
|
||||
os.environ['HOME'] = TEMPORARY_DIRECTORY
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
logger.debug('return_code: %s', return_code)
|
||||
|
||||
readline = proc.stderr.readline()
|
||||
logger.debug('stderr: %s', readline)
|
||||
if return_code != 0:
|
||||
#raise OfficeBackendError(readline)
|
||||
raise Exception(readline)
|
||||
|
||||
filename, extension = os.path.splitext(os.path.basename(input_filepath))
|
||||
logger.debug('filename: %s', filename)
|
||||
logger.debug('extension: %s', extension)
|
||||
|
||||
converted_output = os.path.join(TEMPORARY_DIRECTORY, os.path.extsep.join([filename, 'pdf']))
|
||||
logger.debug('converted_output: %s', converted_output)
|
||||
|
||||
return open(converted_output)
|
||||
#os.rename(converted_output, output_filepath)
|
||||
# TODO: remove temp file
|
||||
|
||||
|
||||
def __init__(self, file_object, mime_type=None):
|
||||
self.file_object = file_object
|
||||
self.mime_type = mime_type or get_mimetype(file_object=file_object, mimetype_only=False)[0]
|
||||
|
||||
if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
|
||||
if os.path.exists(LIBREOFFICE_PATH):
|
||||
#file_object, filename = mkstemp()
|
||||
|
||||
# Cache results of conversion
|
||||
#output_filepath = os.path.join(TEMPORARY_DIRECTORY, ''.join([self.input_filepath, CACHED_FILE_SUFFIX]))
|
||||
|
||||
result = Converter.soffice(file_object)
|
||||
file_object.close()
|
||||
self.file_object = result
|
||||
self.mime_type = 'application/pdf'
|
||||
|
||||
#try:
|
||||
# self.backend.convert(self.input_filepath, self.output_filepath)
|
||||
# self.exists = True
|
||||
#except OfficeBackendError as exception:
|
||||
# # convert exception so that at least the mime type icon is displayed
|
||||
# raise UnknownFileFormat(exception)
|
||||
#else:
|
||||
# result = office_converter.convert(self.file_object, mimetype=mime_type)
|
||||
# self.file_object.close()
|
||||
# self.file_object = result
|
||||
else:
|
||||
# TODO: NO LIBREOFFICE ERROR
|
||||
pass
|
||||
|
||||
|
||||
def transform(self, transformations, page=DEFAULT_PAGE_NUMBER):
|
||||
pass
|
||||
|
||||
def convert(self, output_format=DEFAULT_FILE_FORMAT, page=DEFAULT_PAGE_NUMBER):#, *args, **kwargs):
|
||||
#size = kwargs.get('size')
|
||||
#file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT)
|
||||
#zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL)
|
||||
#rotation = kwargs.get('rotation', DEFAULT_ROTATION)
|
||||
#page = kwargs.get('page', DEFAULT_PAGE_NUMBER)
|
||||
#transformations = kwargs.get('transformations', [])
|
||||
|
||||
#if transformations is None:
|
||||
# transformations = []
|
||||
|
||||
#if output_filepath is None:
|
||||
# output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs)
|
||||
|
||||
#if os.path.exists(output_filepath):
|
||||
# return output_filepath
|
||||
|
||||
'''
|
||||
if office_converter:
|
||||
try:
|
||||
office_converter.convert(input_filepath, mimetype=mimetype)
|
||||
if office_converter.exists:
|
||||
input_filepath = office_converter.output_filepath
|
||||
mimetype = 'application/pdf'
|
||||
else:
|
||||
# Recycle the already detected mimetype
|
||||
mimetype = office_converter.mimetype
|
||||
|
||||
except OfficeConversionError:
|
||||
raise UnknownFileFormat('office converter exception')
|
||||
|
||||
if size:
|
||||
transformations.append(
|
||||
{
|
||||
'transformation': TRANSFORMATION_RESIZE,
|
||||
'arguments': dict(zip(['width', 'height'], size.split(DIMENSION_SEPARATOR)))
|
||||
}
|
||||
)
|
||||
|
||||
if zoom != 100:
|
||||
transformations.append(
|
||||
{
|
||||
'transformation': TRANSFORMATION_ZOOM,
|
||||
'arguments': {'percent': zoom}
|
||||
}
|
||||
)
|
||||
|
||||
if rotation != 0 and rotation != 360:
|
||||
transformations.append(
|
||||
{
|
||||
'transformation': TRANSFORMATION_ROTATE,
|
||||
'arguments': {'degrees': rotation}
|
||||
}
|
||||
)
|
||||
'''
|
||||
|
||||
return backend.convert(file_object=self.file_object, mimetype=self.mime_type, output_format=output_format, page=page)
|
||||
|
||||
def get_page_count(self):
|
||||
return backend.get_page_count(file_object)
|
||||
|
||||
|
||||
|
||||
'''
|
||||
def get_available_transformations_choices():
|
||||
result = []
|
||||
for transformation in backend.get_available_transformations():
|
||||
result.append((transformation, TRANSFORMATION_CHOICES[transformation]['label']))
|
||||
|
||||
return result
|
||||
'''
|
||||
|
||||
@@ -17,9 +17,7 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from acls.utils import apply_default_acls
|
||||
from common.settings import TEMPORARY_DIRECTORY
|
||||
from converter.api import (
|
||||
convert, get_page_count, get_available_transformations_choices
|
||||
)
|
||||
from converter.classes import Converter
|
||||
from converter.exceptions import UnknownFileFormat
|
||||
from converter.literals import (
|
||||
DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, DEFAULT_PAGE_NUMBER
|
||||
@@ -127,6 +125,7 @@ class Document(models.Model):
|
||||
else:
|
||||
event_document_create.commit(target=self)
|
||||
|
||||
"""
|
||||
def get_cached_image_name(self, page, version):
|
||||
document_version = DocumentVersion.objects.get(pk=version)
|
||||
document_page = document_version.pages.get(page_number=page)
|
||||
@@ -165,19 +164,20 @@ class Document(models.Model):
|
||||
logger.debug('file_path: %s', file_path)
|
||||
|
||||
if as_base64:
|
||||
mimetype = get_mimetype(open(file_path, 'r'), file_path, mimetype_only=True)[0]
|
||||
image = open(file_path, 'r')
|
||||
base64_data = base64.b64encode(image.read())
|
||||
image.close()
|
||||
return 'data:%s;base64,%s' % (mimetype, base64_data)
|
||||
with open(file_path, 'r') as file_object:
|
||||
mimetype = get_mimetype(file_object=file_object, mimetype_only=True)[0]
|
||||
base64_data = base64.b64encode(file_object.read())
|
||||
return 'data:%s;base64,%s' % (mimetype, base64_data)
|
||||
else:
|
||||
return file_path
|
||||
"""
|
||||
|
||||
def invalidate_cached_image(self, page):
|
||||
try:
|
||||
os.unlink(self.get_cached_image_name(page, self.latest_version.pk)[0])
|
||||
except OSError:
|
||||
pass
|
||||
pass
|
||||
#try:
|
||||
# os.unlink(self.get_cached_image_name(page, self.latest_version.pk)[0])
|
||||
#except OSError:
|
||||
# pass
|
||||
|
||||
def add_as_recent_document_for_user(self, user):
|
||||
RecentDocument.objects.add_document_for_user(user, self)
|
||||
@@ -347,33 +347,33 @@ class DocumentVersion(models.Model):
|
||||
self.save()
|
||||
|
||||
def update_page_count(self, save=True):
|
||||
handle, filepath = tempfile.mkstemp()
|
||||
#handle, filepath = tempfile.mkstemp()
|
||||
# Just need the filepath, close the file description
|
||||
os.close(handle)
|
||||
#os.close(handle)
|
||||
|
||||
self.save_to_file(filepath)
|
||||
#self.save_to_file(filepath)
|
||||
try:
|
||||
detected_pages = get_page_count(filepath)
|
||||
with self.open() as file_object:
|
||||
converter = Converter(file_object=file_object, mimetype=self.mimetype)
|
||||
detected_pages = converter.get_page_count()
|
||||
except UnknownFileFormat:
|
||||
# If converter backend doesn't understand the format,
|
||||
# use 1 as the total page count
|
||||
detected_pages = 1
|
||||
self.description = _('This document\'s file format is not known, the page count has therefore defaulted to 1.')
|
||||
self.save()
|
||||
try:
|
||||
os.remove(filepath)
|
||||
except OSError:
|
||||
pass
|
||||
#try:
|
||||
# os.remove(filepath)
|
||||
#except OSError:
|
||||
# pass
|
||||
|
||||
current_pages = self.pages.order_by('page_number',)
|
||||
if current_pages.count() > detected_pages:
|
||||
for page in current_pages[detected_pages:]:
|
||||
page.delete()
|
||||
# TODO: put inside a DB transaction
|
||||
self.pages.all().delete()
|
||||
|
||||
for page_number in range(detected_pages):
|
||||
DocumentPage.objects.get_or_create(
|
||||
document_version=self, page_number=page_number + 1)
|
||||
DocumentPage.objects.create(
|
||||
document_version=self, page_number=page_number + 1
|
||||
)
|
||||
|
||||
# TODO: is this needed anymore
|
||||
if save:
|
||||
self.save()
|
||||
|
||||
@@ -408,7 +408,8 @@ class DocumentVersion(models.Model):
|
||||
"""
|
||||
if self.exists():
|
||||
try:
|
||||
self.mimetype, self.encoding = get_mimetype(self.open(), self.document.label)
|
||||
with self.open() as file_object:
|
||||
self.mimetype, self.encoding = get_mimetype(file_object=file_object)
|
||||
except:
|
||||
self.mimetype = ''
|
||||
self.encoding = ''
|
||||
@@ -525,6 +526,33 @@ class DocumentPage(models.Model):
|
||||
def document(self):
|
||||
return self.document_version.document
|
||||
|
||||
def get_image(self, *args, **kargs):
|
||||
#size=DISPLAY_SIZE, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION, as_base64=False, version=None):
|
||||
#if zoom < ZOOM_MIN_LEVEL:
|
||||
# zoom = ZOOM_MIN_LEVEL
|
||||
|
||||
#if zoom > ZOOM_MAX_LEVEL:
|
||||
# zoom = ZOOM_MAX_LEVEL
|
||||
|
||||
#rotation = rotation % 360
|
||||
|
||||
#file_path = self.get_valid_image(size=size, page=page, zoom=zoom, rotation=rotation, version=version)
|
||||
#logger.debug('file_path: %s', file_path)
|
||||
|
||||
converter = Converter(file_object=self.document_version.open())
|
||||
data = converter.convert(page=self.page_number)
|
||||
#print "data!!!!", data.getvalue()
|
||||
##, *args, **kwargs):
|
||||
return 'data:%s;base64,%s' % ('PNG', base64.b64encode(data.getvalue()))
|
||||
|
||||
#if as_base64:
|
||||
# with open(file_path, 'r') as file_object:
|
||||
# #mimetype = get_mimetype(file_object=file_object, mimetype_only=True)[0]
|
||||
# base64_data = base64.b64encode(file_object.read())
|
||||
# return 'data:%s;base64,%s' % (mimetype, base64_data)
|
||||
#else:
|
||||
# return file_path
|
||||
|
||||
|
||||
def argument_validator(value):
|
||||
"""
|
||||
@@ -545,7 +573,8 @@ class DocumentPageTransformation(models.Model):
|
||||
"""
|
||||
document_page = models.ForeignKey(DocumentPage, verbose_name=_('Document page'))
|
||||
order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_('Order'), db_index=True)
|
||||
transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_('Transformation'))
|
||||
#transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_('Transformation'))
|
||||
transformation = models.CharField(max_length=128, verbose_name=_('Transformation'))
|
||||
arguments = models.TextField(blank=True, null=True, verbose_name=_('Arguments'), help_text=_('Use dictionaries to indentify arguments, example: {\'degrees\':90}'), validators=[argument_validator])
|
||||
objects = DocumentPageTransformationManager()
|
||||
|
||||
|
||||
@@ -17,7 +17,8 @@ logger = logging.getLogger(__name__)
|
||||
@app.task(compression='zlib')
|
||||
def task_get_document_image(document_id, *args, **kwargs):
|
||||
document = Document.objects.get(pk=document_id)
|
||||
return document.get_image(*args, **kwargs)
|
||||
first_page = document.latest_version.pages.first()
|
||||
return first_page.get_image(*args, **kwargs)
|
||||
|
||||
|
||||
@app.task(ignore_result=True)
|
||||
|
||||
@@ -341,6 +341,9 @@ def document_multiple_document_type_edit(request):
|
||||
)
|
||||
|
||||
|
||||
from django.http import HttpResponse
|
||||
|
||||
|
||||
# TODO: Get rid of this view and convert widget to use API and base64 only images
|
||||
def get_document_image(request, document_id, size=PREVIEW_SIZE):
|
||||
document = get_object_or_404(Document, pk=document_id)
|
||||
@@ -364,7 +367,17 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE):
|
||||
rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360
|
||||
|
||||
task = task_get_document_image.apply_async(kwargs=dict(document_id=document.pk, size=size, page=page, zoom=zoom, rotation=rotation, as_base64=False, version=version), queue='converter')
|
||||
return sendfile.sendfile(request, task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT), mimetype=DEFAULT_FILE_FORMAT_MIMETYPE)
|
||||
data = task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT)
|
||||
|
||||
response = HttpResponse(data, content_type='data/PNG')
|
||||
#response['Content-Disposition'] = 'attachment; filename="somefilename.pdf"'
|
||||
|
||||
return response
|
||||
|
||||
#print 'data!!!!!!!!!!!', task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT)
|
||||
#re
|
||||
|
||||
#return sendfile.sendfile(request, task.get(timeout=DOCUMENT_IMAGE_TASK_TIMEOUT), mimetype=DEFAULT_FILE_FORMAT_MIMETYPE)
|
||||
|
||||
|
||||
def document_download(request, document_id=None, document_id_list=None, document_version_pk=None):
|
||||
|
||||
@@ -2,16 +2,10 @@ from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
|
||||
try:
|
||||
import magic
|
||||
USE_PYTHON_MAGIC = True
|
||||
except:
|
||||
import mimetypes
|
||||
mimetypes.init()
|
||||
USE_PYTHON_MAGIC = False
|
||||
import magic
|
||||
|
||||
|
||||
def get_mimetype(file_description, filepath, mimetype_only=False):
|
||||
def get_mimetype(file_object, mimetype_only=False):
|
||||
"""
|
||||
Determine a file's mimetype by calling the system's libmagic
|
||||
library via python-magic or fallback to use python's mimetypes
|
||||
@@ -19,17 +13,15 @@ def get_mimetype(file_description, filepath, mimetype_only=False):
|
||||
"""
|
||||
file_mimetype = None
|
||||
file_mime_encoding = None
|
||||
if USE_PYTHON_MAGIC:
|
||||
mime = magic.Magic(mime=True)
|
||||
file_mimetype = mime.from_buffer(file_description.read())
|
||||
if not mimetype_only:
|
||||
file_description.seek(0)
|
||||
mime_encoding = magic.Magic(mime_encoding=True)
|
||||
file_mime_encoding = mime_encoding.from_buffer(file_description.read())
|
||||
else:
|
||||
path, filename = os.path.split(filepath)
|
||||
file_mimetype, file_mime_encoding = mimetypes.guess_type(filename)
|
||||
|
||||
file_description.close()
|
||||
mime = magic.Magic(mime=True)
|
||||
file_mimetype = mime.from_buffer(file_object.read())
|
||||
file_object.seek(0)
|
||||
|
||||
if not mimetype_only:
|
||||
file_object.seek(0)
|
||||
mime_encoding = magic.Magic(mime_encoding=True)
|
||||
file_mime_encoding = mime_encoding.from_buffer(file_object.read())
|
||||
file_object.seek(0)
|
||||
|
||||
return file_mimetype, file_mime_encoding
|
||||
|
||||
@@ -19,7 +19,7 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from model_utils.managers import InheritanceManager
|
||||
|
||||
from converter.api import get_available_transformations_choices
|
||||
#from converter.api import get_available_transformations_choices
|
||||
from converter.literals import DIMENSION_SEPARATOR
|
||||
from djcelery.models import PeriodicTask, IntervalSchedule
|
||||
from documents.models import Document, DocumentType
|
||||
@@ -376,7 +376,8 @@ class SourceTransformation(models.Model):
|
||||
object_id = models.PositiveIntegerField()
|
||||
content_object = generic.GenericForeignKey('content_type', 'object_id')
|
||||
order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_('Order'), db_index=True)
|
||||
transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_('Transformation'))
|
||||
#transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_('Transformation'))
|
||||
transformation = models.CharField(max_length=128, verbose_name=_('Transformation'))
|
||||
arguments = models.TextField(blank=True, null=True, verbose_name=_('Arguments'), help_text=_('Use dictionaries to indentify arguments, example: {\'degrees\':90}'), validators=[argument_validator])
|
||||
|
||||
objects = models.Manager()
|
||||
|
||||
@@ -23,9 +23,9 @@ djangorestframework==2.4.4
|
||||
pdfminer==20110227
|
||||
pycountry==1.10
|
||||
python-dateutil==2.4.2
|
||||
pytz==2015.4
|
||||
python-gnupg==0.3.7
|
||||
python-magic==0.4.6
|
||||
pytz==2015.4
|
||||
|
||||
sh==1.11
|
||||
slate==0.3
|
||||
|
||||
Reference in New Issue
Block a user