Merge branch 'transformation_refactor' into converter_export

This commit is contained in:
Roberto Rosario
2011-07-16 01:20:19 -04:00
18 changed files with 420 additions and 209 deletions

View File

@@ -1,7 +1,16 @@
from django.utils.translation import ugettext_lazy as _
from django.core.exceptions import ImproperlyConfigured
from navigation.api import register_sidebar_template
from converter.utils import load_backend
from converter.conf.settings import GRAPHICS_BACKEND
formats_list = {'text': _('file formats'), 'view': 'formats_list', 'famfam': 'pictures'}
register_sidebar_template(['formats_list'], 'converter_file_formats_help.html')
try:
backend = load_backend().ConverterClass()
except ImproperlyConfigured:
raise ImproperlyConfigured(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND)

View File

@@ -1,9 +1,6 @@
import os
import subprocess
from django.utils.importlib import import_module
from django.template.defaultfilters import slugify
from django.core.exceptions import ImproperlyConfigured
import hashlib
from common import TEMPORARY_DIRECTORY
from documents.utils import document_save_to_temp_dir
@@ -12,21 +9,23 @@ from converter.conf.settings import UNPAPER_PATH
from converter.conf.settings import OCR_OPTIONS
from converter.conf.settings import UNOCONV_PATH
from converter.exceptions import UnpaperError, OfficeConversionError
from converter.utils import load_backend
from converter.literals import DEFAULT_PAGE_INDEX_NUMBER, \
from converter.literals import DEFAULT_PAGE_NUMBER, \
DEFAULT_OCR_FILE_FORMAT, QUALITY_DEFAULT, DEFAULT_ZOOM_LEVEL, \
DEFAULT_ROTATION, DEFAULT_FILE_FORMAT, QUALITY_PRINT
DEFAULT_ROTATION, DEFAULT_FILE_FORMAT, QUALITY_HIGH
from converter import backend
from converter.literals import TRANSFORMATION_CHOICES
from converter.literals import TRANSFORMATION_RESIZE, \
TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
TRANSFORMATION_ZOOM
from converter.literals import DIMENSION_SEPARATOR
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
CONVERTER_OFFICE_FILE_EXTENSIONS = [
u'ods', u'docx', u'doc'
]
try:
backend = load_backend().ConverterClass()
except ImproperlyConfigured:
raise ImproperlyConfigured(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND)
def cleanup(filename):
"""
Tries to remove the given filename. Ignores non-existent files
@@ -75,19 +74,11 @@ def cache_cleanup(input_filepath, *args, **kwargs):
def create_image_cache_filename(input_filepath, *args, **kwargs):
if input_filepath:
temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))
temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename)
final_filepath = []
[final_filepath.append(str(arg)) for arg in args]
final_filepath.extend([u'%s_%s' % (key, value) for key, value in kwargs.items()])
temp_path += slugify(u'_'.join(final_filepath))
return temp_path
hash_value = HASH_FUNCTION(u''.join([input_filepath, unicode(args), unicode(kwargs)]))
return os.path.join(TEMPORARY_DIRECTORY, hash_value)
else:
return None
def convert_office_document(input_filepath):
if os.path.exists(UNOCONV_PATH):
@@ -104,15 +95,14 @@ def convert_document(document, *args, **kwargs):
return convert(document_save_to_temp_dir(document, document.checksum), *args, **kwargs)
def convert(input_filepath, *args, **kwargs):
def convert(input_filepath, cleanup_files=True, *args, **kwargs):
size = kwargs.get('size')
file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT)
extra_options = kwargs.get('extra_options', u'')
zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL)
rotation = kwargs.get('rotation', DEFAULT_ROTATION)
page = kwargs.get('page', DEFAULT_PAGE_INDEX_NUMBER)
cleanup_files = kwargs.get('cleanup_files', True)
page = kwargs.get('page', DEFAULT_PAGE_NUMBER)
quality = kwargs.get('quality', QUALITY_DEFAULT)
transformations = kwargs.get('transformations', [])
unoconv_output = None
@@ -126,20 +116,32 @@ def convert(input_filepath, *args, **kwargs):
if result:
unoconv_output = result
input_filepath = result
extra_options = u''
input_arg = u'%s[%s]' % (input_filepath, page)
extra_options += u' -resize %s' % size
transformations.append(
{
'transformation': TRANSFORMATION_RESIZE,
'arguments': dict(zip([u'width', u'height'], size.split(DIMENSION_SEPARATOR)))
}
)
if zoom != 100:
extra_options += u' -resize %d%% ' % zoom
transformations.append(
{
'transformation': TRANSFORMATION_ZOOM,
'arguments': {'percent': zoom}
}
)
if rotation != 0 and rotation != 360:
extra_options += u' -rotate %d ' % rotation
transformations.append(
{
'transformation': TRANSFORMATION_ROTATE,
'arguments': {'degrees': rotation}
}
)
if format == u'jpg':
extra_options += u' -quality 85'
try:
backend.convert_file(input_filepath=input_arg, arguments=extra_options, output_filepath=u'%s:%s' % (file_format, output_filepath), quality=quality)
backend.convert_file(input_filepath=input_filepath, output_filepath=output_filepath, quality=quality, transformations=transformations, page=page, file_format=file_format)
finally:
if cleanup_files:
cleanup(input_filepath)
@@ -150,11 +152,7 @@ def convert(input_filepath, *args, **kwargs):
def get_page_count(input_filepath):
try:
return len(backend.identify_file(unicode(input_filepath)).splitlines())
except:
#TODO: send to other page number identifying program
return 1
return backend.get_page_count(input_filepath)
def get_document_dimensions(document, *args, **kwargs):
@@ -166,7 +164,7 @@ def get_document_dimensions(document, *args, **kwargs):
return [0, 0]
def convert_document_for_ocr(document, page=DEFAULT_PAGE_INDEX_NUMBER, file_format=DEFAULT_OCR_FILE_FORMAT):
def convert_document_for_ocr(document, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_OCR_FILE_FORMAT):
#Extract document file
input_filepath = document_save_to_temp_dir(document, document.uuid)
@@ -178,14 +176,12 @@ def convert_document_for_ocr(document, page=DEFAULT_PAGE_INDEX_NUMBER, file_form
unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep)
convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format)
input_arg = u'%s[%s]' % (input_filepath, page)
try:
document_page = document.documentpage_set.get(page_number=page + 1)
document_page = document.documentpage_set.get(page_number=page)
transformation_string, warnings = document_page.get_transformation_string()
#Apply default transformations
backend.convert_file(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file)
backend.convert_file(input_filepath=input_filepath, page=page, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file)
#Do OCR operations
backend.convert_file(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
# Process by unpaper
@@ -198,3 +194,12 @@ def convert_document_for_ocr(document, page=DEFAULT_PAGE_INDEX_NUMBER, file_form
cleanup(unpaper_output_file)
return convert_output_file
def get_available_transformations_choices():
result = []
for transformation in backend.get_available_transformations():
transformation_template = u'%s %s' % (TRANSFORMATION_CHOICES[transformation]['label'], u','.join(['<%s>' % argument['name'] if argument['required'] else '[%s]' % argument['name'] for argument in TRANSFORMATION_CHOICES[transformation]['arguments']]))
result.append([transformation, transformation_template])
return result

View File

@@ -21,9 +21,6 @@ class ConverterBase(object):
def get_available_transformations(self):
raise NotImplementedError("Your %s class has not defined a get_available_transformations() method, which is required." % self.__class__.__name__)
def get_available_transformations_labels(self):
return ([(name, data['label']) for name, data in self.get_available_transformations().items()])
def get_transformation_string(self, transformation_list):
transformations = []
warnings = []
@@ -41,3 +38,5 @@ class ConverterBase(object):
return u' '.join(transformations), warnings
def get_page_count(self):
raise NotImplementedError("Your %s class has not defined a get_page_count() method, which is required." % self.__class__.__name__)

View File

@@ -1,13 +1,16 @@
import subprocess
import re
from django.utils.translation import ugettext_lazy as _
from converter.conf.settings import GM_PATH
from converter.conf.settings import GM_SETTINGS
from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS
from converter.exceptions import ConvertError, UnknownFormat, IdentifyError
from converter.backends import ConverterBase
from converter.literals import TRANSFORMATION_RESIZE, \
TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
TRANSFORMATION_ZOOM
from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \
DEFAULT_FILE_FORMAT
CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format'
CONVERTER_ERROR_STARTS_WITH = u'starts with'
@@ -27,16 +30,44 @@ class ConverterClass(ConverterBase):
raise IdentifyError(proc.stderr.readline())
return proc.stdout.read()
def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
arguments = []
if transformations:
for transformation in transformations:
if transformation['transformation'] == TRANSFORMATION_RESIZE:
dimensions = []
dimensions.append(unicode(transformation['arguments']['width']))
if 'height' in transformation['arguments']:
dimensions.append(unicode(transformation['arguments']['height']))
arguments.append(u'-resize')
arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions))
elif transformation['transformation'] == TRANSFORMATION_ZOOM:
arguments.append(u'-resize')
arguments.append(u'%d%%' % transformation['arguments']['percent'])
elif transformation['transformation'] == TRANSFORMATION_ROTATE:
arguments.append(u'-rotate')
arguments.append(u'%s' % transformation['arguments']['degrees'])
if format == u'jpg':
arguments.append(u'-quality')
arguments.append(u'85')
# Graphicsmagick page number is 0 base
input_arg = u'%s[%d]' % (input_filepath, page - 1)
# Specify the file format next to the output filename
output_filepath = u'%s:%s' % (file_format, output_filepath)
def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
command = []
command.append(unicode(GM_PATH))
command.append(u'convert')
command.extend(unicode(QUALITY_SETTINGS[quality]).split())
command.extend(unicode(GM_SETTINGS).split())
command.append(unicode(input_filepath))
command.append(unicode(input_arg))
if arguments:
command.extend(unicode(arguments).split())
command.extend(arguments)
command.append(unicode(output_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
@@ -49,13 +80,12 @@ class ConverterClass(ConverterBase):
else:
raise ConvertError(error_line)
def get_format_list(self):
"""
Call GraphicsMagick to parse all of it's supported file formats, and
return a list of the names and descriptions
"""
format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')
format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')
formats = []
command = []
command.append(unicode(GM_PATH))
@@ -66,20 +96,23 @@ class ConverterClass(ConverterBase):
return_code = proc.wait()
if return_code != 0:
raise ConvertError(proc.stderr.readline())
for line in proc.stdout.readlines():
fields = format_regex.findall(line)
if fields:
formats.append((fields[0][0], fields[0][3]))
return formats
def get_available_transformations(self):
return {
'rotate': {
'label': _(u'Rotate [degrees]'),
'arguments': [{'name': 'degrees'}],
'command_line': u'-rotate %(degrees)d'
}
}
return [
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \
TRANSFORMATION_DENSITY, TRANSFORMATION_ZOOM
]
def get_page_count(self, input_filepath):
try:
return len(self.identify_file(unicode(input_filepath)).splitlines())
except:
#TODO: send to other page number identifying program
return 1

View File

@@ -9,7 +9,10 @@ from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS
from converter.exceptions import ConvertError, UnknownFormat, \
IdentifyError
from converter.backends import ConverterBase
from converter.literals import TRANSFORMATION_RESIZE, \
TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
TRANSFORMATION_ZOOM
CONVERTER_ERROR_STRING_NO_DECODER = u'no decode delegate for this image format'
@@ -28,13 +31,42 @@ class ConverterClass(ConverterBase):
return proc.stdout.read()
def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
arguments = []
if transformations:
for transformation in transformations:
if transformation['transformation'] == TRANSFORMATION_RESIZE:
dimensions = []
dimensions.append(unicode(transformation['arguments']['width']))
if 'height' in transformation['arguments']:
dimensions.append(unicode(transformation['arguments']['height']))
arguments.append(u'-resize')
arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions))
elif transformation['transformation'] == TRANSFORMATION_ZOOM:
arguments.append(u'-resize')
arguments.append(u'%d%%' % transformation['arguments']['percent'])
elif transformation['transformation'] == TRANSFORMATION_ROTATE:
arguments.append(u'-rotate')
arguments.append(u'%s' % transformation['arguments']['degrees'])
if format == u'jpg':
arguments.append(u'-quality')
arguments.append(u'85')
# Imagemagick page number is 0 base
input_arg = u'%s[%d]' % (input_filepath, page - 1)
# Specify the file format next to the output filename
output_filepath = u'%s:%s' % (file_format, output_filepath)
command = []
command.append(unicode(IM_CONVERT_PATH))
command.extend(unicode(QUALITY_SETTINGS[quality]).split())
command.append(unicode(input_filepath))
command.append(unicode(input_arg))
if arguments:
command.extend(unicode(arguments).split())
command.extend(arguments)
command.append(unicode(output_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
@@ -73,10 +105,15 @@ class ConverterClass(ConverterBase):
def get_available_transformations(self):
return {
'rotate': {
'label': _(u'Rotate [degrees]'),
'arguments': [{'name': 'degrees'}],
'command_line': u'-rotate %(degrees)d'
}
}
return [
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \
TRANSFORMATION_DENSITY, TRANSFORMATION_ZOOM
]
def get_page_count(self, input_filepath):
try:
return len(self.identify_file(unicode(input_filepath)).splitlines())
except:
#TODO: send to other page number identifying program
return 1

View File

@@ -0,0 +1,3 @@
from PIL import Image
Image.init()

View File

@@ -0,0 +1,80 @@
from PIL import Image
from django.utils.translation import ugettext_lazy as _
from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS
from converter.exceptions import ConvertError, UnknownFormat, IdentifyError
from converter.backends import ConverterBase
from converter.literals import TRANSFORMATION_RESIZE, \
TRANSFORMATION_ROTATE
class ConverterClass(ConverterBase):
def identify_file(self, input_filepath, arguments=None):
pass
def get_page_count(self, input_filepath):
page_count = 1
im = Image.open(input_filepath)
try:
while 1:
im.seek(im.tell()+1)
page_count += 1
# do something to im
except EOFError:
pass # end of sequence
return page_count
def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
im = Image.open(input_filepath)
outfile, format = output_filepath.split(u':')
im.save(outfile, format)
'''
command = []
command.append(unicode(GM_PATH))
command.append(u'convert')
command.extend(unicode(QUALITY_SETTINGS[quality]).split())
command.extend(unicode(GM_SETTINGS).split())
command.append(unicode(input_filepath))
if arguments:
command.extend(unicode(arguments).split())
command.append(unicode(output_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
#Got an error from convert program
error_line = proc.stderr.readline()
if (CONVERTER_ERROR_STRING_NO_DECODER in error_line) or (CONVERTER_ERROR_STARTS_WITH in error_line):
#Try to determine from error message which class of error is it
raise UnknownFormat
else:
raise ConvertError(error_line)
'''
def get_format_list(self):
"""
Introspect PIL's internal registry to obtain a list of the
supported file types
"""
formats = []
for format_name in Image.ID:
formats.append((format_name, u''))
return formats
def get_available_transformations(self):
return [
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE
]
def get_page_count(self, input_filepath):
try:
return len(self.identify_file(unicode(input_filepath)).splitlines())
except:
#TODO: send to other page number identifying program
return 1

View File

@@ -12,7 +12,7 @@ register_settings(
{'name': u'UNPAPER_PATH', 'global_name': u'CONVERTER_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True},
{'name': u'GM_PATH', 'global_name': u'CONVERTER_GM_PATH', 'default': u'/usr/bin/gm', 'description': _(u'File path to graphicsmagick\'s program.'), 'exists': True},
{'name': u'GM_SETTINGS', 'global_name': u'CONVERTER_GM_SETTINGS', 'default': u''},
{'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.imagemagick', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick and converter.backends.graphicsmagick.')},
{'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.python', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick, converter.backends.graphicsmagick and converter.backends.python.')},
{'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True},
{'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'},
{'name': u'DEFAULT_OPTIONS', 'global_name': u'CONVERTER_DEFAULT_OPTIONS', 'default': u''},

View File

@@ -1,3 +1,5 @@
from django.utils.translation import ugettext_lazy as _
from converter.conf.settings import DEFAULT_OPTIONS
from converter.conf.settings import LOW_QUALITY_OPTIONS
from converter.conf.settings import HIGH_QUALITY_OPTIONS
@@ -5,7 +7,7 @@ from converter.conf.settings import PRINT_QUALITY_OPTIONS
DEFAULT_ZOOM_LEVEL = 100
DEFAULT_ROTATION = 0
DEFAULT_PAGE_INDEX_NUMBER = 0
DEFAULT_PAGE_NUMBER = 1
DEFAULT_FILE_FORMAT = u'jpg'
DEFAULT_OCR_FILE_FORMAT = u'tif'
@@ -20,3 +22,43 @@ QUALITY_SETTINGS = {
QUALITY_HIGH: HIGH_QUALITY_OPTIONS,
QUALITY_PRINT: PRINT_QUALITY_OPTIONS
}
DIMENSION_SEPARATOR = u'x'
TRANSFORMATION_RESIZE = u'resize'
TRANSFORMATION_ROTATE = u'rotate'
TRANSFORMATION_DENSITY = u'density'
TRANSFORMATION_ZOOM = u'zoom'
TRANSFORMATION_CHOICES = {
TRANSFORMATION_RESIZE: {
'label': _(u'Resize'),
'description': _(u'Resize.'),
'arguments': [
{'name': 'width', 'label': _(u'width'), 'required': True},
{'name': 'height', 'label': _(u'height'), 'required': False},
]
},
TRANSFORMATION_ROTATE: {
'label': _(u'Rotate'),
'description': _(u'Rotate by n degress.'),
'arguments': [
{'name': 'degrees', 'label': _(u'degrees'), 'required': True}
]
},
TRANSFORMATION_DENSITY: {
'label': _(u'Density'),
'description': _(u'Change the resolution (ie: DPI) without resizing.'),
'arguments': [
{'name': 'width', 'label': _(u'width'), 'required': True},
{'name': 'height', 'label': _(u'height'), 'required': False},
]
},
TRANSFORMATION_ZOOM: {
'label': _(u'Zoom'),
'description': _(u'Zoom by n percent.'),
'arguments': [
{'name': 'percent', 'label': _(u'percent'), 'required': True}
]
},
}

View File

@@ -1,38 +1,18 @@
from django.utils.translation import ugettext_lazy as _
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.utils.importlib import import_module
from converter import backend
from converter.conf.settings import GRAPHICS_BACKEND
def _lazy_load(fn):
_cached = []
def _decorated():
if not _cached:
_cached.append(fn())
return _cached[0]
return _decorated
@_lazy_load
def _get_backend():
return import_module(GRAPHICS_BACKEND)
try:
backend = _get_backend()
except ImportError:
raise ImportError(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND)
def formats_list(request):
#check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])
context = {
'title': _(u'suported file formats'),
'hide_object': True,
'object_list': backend.get_format_list(),
'object_list': sorted(backend.get_format_list()),
'extra_columns': [
{
'name': _(u'name'),

View File

@@ -13,3 +13,24 @@ class RecentDocumentManager(models.Manager):
to_delete = self.model.objects.filter(user=user)[RECENT_COUNT:]
for recent_to_delete in to_delete:
recent_to_delete.delete()
class DocumentPageTransformationManager(models.Manager):
def get_for_document_page(self, document_page):
return self.model.objects.filter(document_page=document_page)
def get_for_document_page_as_list(self, document_page):
warnings = []
transformations = []
for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments'):
try:
transformations.append(
{
'transformation': transformation['transformation'],
'arguments': eval(transformation['arguments'], {})
}
)
except Exception, e:
warnings.append(e)
return transformations, warnings

View File

@@ -12,12 +12,13 @@ from python_magic import magic
from taggit.managers import TaggableManager
from dynamic_search.api import register
from converter.api import get_page_count
from converter.api import backend
from converter.api import get_available_transformations_choices
from documents.conf.settings import CHECKSUM_FUNCTION
from documents.conf.settings import UUID_FUNCTION
from documents.conf.settings import STORAGE_BACKEND
from documents.managers import RecentDocumentManager
from documents.managers import RecentDocumentManager, \
DocumentPageTransformationManager
def get_filename_from_uuid(instance, filename):
@@ -89,7 +90,7 @@ class Document(models.Model):
mimetype, page count and transformation when originally created
"""
new_document = not self.pk
transformations = kwargs.pop('transformations', None)
super(Document, self).save(*args, **kwargs)
if new_document:
@@ -98,7 +99,8 @@ class Document(models.Model):
self.update_mimetype(save=False)
self.save()
self.update_page_count(save=False)
self.apply_default_transformations()
if transformations:
self.apply_default_transformations(transformations)
@models.permalink
def get_absolute_url(self):
@@ -199,21 +201,21 @@ class Document(models.Model):
exists in storage
"""
return self.file.storage.exists(self.file.path)
def apply_default_transformations(self):
def apply_default_transformations(self, transformations):
#Only apply default transformations on new documents
if DEFAULT_TRANSFORMATIONS and reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0:
for transformation in DEFAULT_TRANSFORMATIONS:
if 'name' in transformation:
for document_page in self.documentpage_set.all():
page_transformation = DocumentPageTransformation(
document_page=document_page,
order=0,
transformation=transformation['name'])
if 'arguments' in transformation:
page_transformation.arguments = transformation['arguments']
if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0:
for transformation in transformations:
for document_page in self.documentpage_set.all():
page_transformation = DocumentPageTransformation(
document_page=document_page,
order=0,
transformation=transformation.get('transformation'),
arguments=transformation.get('arguments')
)
page_transformation.save()
page_transformation.save()
class DocumentTypeFilename(models.Model):
@@ -259,9 +261,6 @@ class DocumentPage(models.Model):
def get_absolute_url(self):
return ('document_page_view', [self.pk])
def get_transformation_string(self):
return backend.get_transformation_string(self.documentpagetransformation_set.values('transformation', 'arguments'))
class DocumentPageTransformation(models.Model):
"""
@@ -270,9 +269,11 @@ class DocumentPageTransformation(models.Model):
"""
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True)
transformation = models.CharField(choices=backend.get_available_transformations_labels(), max_length=128, verbose_name=_(u'transformation'))
transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}'))
objects = DocumentPageTransformationManager()
def __unicode__(self):
return u'"%s" for %s' % (self.get_transformation_display(), unicode(self.document_page))

View File

@@ -20,10 +20,11 @@ from common.widgets import two_state_template
from common.literals import PAGE_SIZE_DIMENSIONS, \
PAGE_ORIENTATION_PORTRAIT, PAGE_ORIENTATION_LANDSCAPE
from common.conf.settings import DEFAULT_PAPER_SIZE
from converter.api import convert_document, QUALITY_DEFAULT
from converter.api import convert_document
from converter.exceptions import UnkownConvertError, UnknownFormat
from converter.api import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
DEFAULT_FILE_FORMAT, QUALITY_PRINT
from converter.literals import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
DEFAULT_FILE_FORMAT, QUALITY_PRINT, QUALITY_DEFAULT, \
DEFAULT_PAGE_NUMBER
from filetransfers.api import serve_file
from grouping.utils import get_document_group_subtemplate
from metadata.api import save_metadata_list, \
@@ -286,38 +287,14 @@ def document_edit(request, document_id):
}, context_instance=RequestContext(request))
def calculate_converter_arguments(document, *args, **kwargs):
size = kwargs.pop('size', PREVIEW_SIZE)
quality = kwargs.pop('quality', QUALITY_DEFAULT)
page = kwargs.pop('page', 1)
file_format = kwargs.pop('file_format', DEFAULT_FILE_FORMAT)
zoom = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL)
rotation = kwargs.pop('rotation', DEFAULT_ROTATION)
document_page = DocumentPage.objects.get(document=document, page_number=page)
transformation_string, warnings = document_page.get_transformation_string()
arguments = {
'size': size,
'file_format': file_format,
'quality': quality,
'extra_options': transformation_string,
'page': page - 1,
'zoom': zoom,
'rotation': rotation
}
return arguments, warnings
def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_DEFAULT):
check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])
document = get_object_or_404(Document, pk=document_id)
page = int(request.GET.get('page', 1))
page = int(request.GET.get('page', DEFAULT_PAGE_NUMBER))
zoom = int(request.GET.get('zoom', 100))
zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))
if zoom < ZOOM_MIN_LEVEL:
zoom = ZOOM_MIN_LEVEL
@@ -325,16 +302,17 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
if zoom > ZOOM_MAX_LEVEL:
zoom = ZOOM_MAX_LEVEL
rotation = int(request.GET.get('rotation', 0)) % 360
rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360
arguments, warnings = calculate_converter_arguments(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation)
document_page = get_object_or_404(document.documentpage_set, page_number=page)
transformations, warnings = DocumentPageTransformation.objects.get_for_document_page_as_list(document_page)
if warnings and (request.user.is_staff or request.user.is_superuser):
for warning in warnings:
messages.warning(request, _(u'Page transformation error: %s') % warning)
try:
output_file = convert_document(document, **arguments)
output_file = convert_document(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation, transformations=transformations)
except UnkownConvertError, e:
if request.user.is_staff or request.user.is_superuser:
messages.error(request, e)
@@ -592,13 +570,13 @@ def document_page_view(request, document_page_id):
document_page = get_object_or_404(DocumentPage, pk=document_page_id)
zoom = int(request.GET.get('zoom', 100))
rotation = int(request.GET.get('rotation', 0))
zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))
rotation = int(request.GET.get('rotation', DEFAULT_ROTATION))
document_page_form = DocumentPageForm(instance=document_page, zoom=zoom, rotation=rotation)
base_title = _(u'details for: %s') % document_page
if zoom != 100:
if zoom != DEFAULT_ZOOM_LEVEL:
zoom_text = u'(%d%%)' % zoom
else:
zoom_text = u''

View File

@@ -6,9 +6,7 @@ from django.shortcuts import render_to_response, get_object_or_404
from django.template import RequestContext
from django.contrib import messages
from django.views.generic.list_detail import object_list
from django.core.urlresolvers import reverse
from django.utils.translation import ugettext_lazy as _
from django.conf import settings
from celery.task.control import inspect
from permissions.api import check_permissions

View File

@@ -6,3 +6,19 @@ class SourceTransformationManager(models.Manager):
def get_for_object(self, obj):
ct = ContentType.objects.get_for_model(obj)
return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk)
def get_for_object_as_list(self, obj):
warnings = []
transformations = []
for transformation in self.get_for_object(obj).values('transformation', 'arguments'):
try:
transformations.append(
{
'transformation': transformation['transformation'],
'arguments': eval(transformation['arguments'], {})
}
)
except Exception, e:
warnings.append(e)
return transformations, warnings

View File

@@ -6,7 +6,8 @@ from django.contrib.contenttypes import generic
from documents.models import DocumentType
from documents.managers import RecentDocumentManager
from metadata.models import MetadataType
from converter.api import backend
from converter.api import get_available_transformations_choices
from converter.literals import DIMENSION_SEPARATOR
from sources.managers import SourceTransformationManager
@@ -118,7 +119,7 @@ class StagingFolder(InteractiveBaseModel):
if self.preview_height:
dimensions.append(unicode(self.preview_height))
return u'x'.join(dimensions)
return DIMENSION_SEPARATOR.join(dimensions)
class Meta(InteractiveBaseModel.Meta):
verbose_name = _(u'staging folder')
@@ -162,8 +163,8 @@ class SourceTransformation(models.Model):
object_id = models.PositiveIntegerField()
content_object = generic.GenericForeignKey('content_type', 'object_id')
order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True)
transformation = models.CharField(choices=backend.get_available_transformations_labels(), max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}'))
transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}')
objects = SourceTransformationManager()

View File

@@ -11,7 +11,6 @@ from django.utils.translation import ugettext_lazy as _
from converter.api import convert, cache_cleanup
DEFAULT_STAGING_DIRECTORY = u'/tmp'
#from documents.conf.settings import DEFAULT_TRANSFORMATIONS
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
#TODO: Do benchmarks
@@ -106,16 +105,15 @@ class StagingFile(object):
def upload(self):
"""
Return a StagingFile encapsulated in a File class instance to
allow for easier upload a staging files
allow for easier upload of staging files
"""
try:
return File(file(self.filepath, 'rb'), name=self.filename)
except Exception, exc:
raise Exception(ugettext(u'Unable to upload staging file: %s') % exc)
def delete(self, preview_size):
# tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS)
cache_cleanup(self.filepath, size=preview_size)# , extra_options=tranformation_string)
def delete(self, preview_size, transformations):
cache_cleanup(self.filepath, size=preview_size, transformations=transformations)
try:
os.unlink(self.filepath)
except OSError, exc:
@@ -124,24 +122,7 @@ class StagingFile(object):
else:
raise OSError(ugettext(u'Unable to delete staging file: %s') % exc)
def preview(self, preview_size):
def preview(self, preview_size, transformations):
errors = []
# tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS)
# output_file = convert(self.filepath, size=STAGING_FILES_PREVIEW_SIZE, extra_options=tranformation_string, cleanup_files=False)
output_file = convert(self.filepath, size=preview_size, cleanup_files=False)
output_file = convert(self.filepath, size=preview_size, cleanup_files=False, transformations=transformations)
return output_file, errors
def get_transformation_string(transformations):
transformation_list = []
errors = []
#for transformation in transformations:
# try:
# if transformation['name'] in TRANFORMATION_CHOICES:
# output = TRANFORMATION_CHOICES[transformation['name']] % eval(transformation['arguments'])
# transformation_list.append(output)
# except Exception, e:
# errors.append(e)
#tranformation_string = ' '.join(transformation_list)
return tranformation_string, errors

View File

@@ -129,9 +129,13 @@ def upload_interactive(request, source_type=None, source_id=None):
expand = True
else:
expand = False
if (not expand) or (expand and not _handle_zip_file(request, request.FILES['file'], document_type)):
transformations, errors = SourceTransformation.objects.get_for_object_as_list(web_form)
if (not expand) or (expand and not _handle_zip_file(request, request.FILES['file'], document_type=document_type, transformations=transformations)):
instance = form.save()
instance.save()
instance.apply_default_transformations(transformations)
if document_type:
instance.document_type = document_type
_handle_save_document(request, instance, form)
@@ -174,16 +178,18 @@ def upload_interactive(request, source_type=None, source_id=None):
expand = True
else:
expand = False
if (not expand) or (expand and not _handle_zip_file(request, staging_file.upload(), document_type)):
transformations, errors = SourceTransformation.objects.get_for_object_as_list(staging_folder)
if (not expand) or (expand and not _handle_zip_file(request, staging_file.upload(), document_type=document_type, transformations=transformations)):
document = Document(file=staging_file.upload())
if document_type:
document.document_type = document_type
document.save()
document.apply_default_transformations(transformations)
_handle_save_document(request, document, form)
messages.success(request, _(u'Staging file: %s, uploaded successfully.') % staging_file.filename)
if staging_folder.delete_after_upload:
staging_file.delete(staging_folder.get_preview_size())
staging_file.delete(preview_size=staging_folder.get_preview_size(), transformations=transformations)
messages.success(request, _(u'Staging file: %s, deleted successfully.') % staging_file.filename)
except Exception, e:
messages.error(request, e)
@@ -260,7 +266,7 @@ def _handle_save_document(request, document, form=None):
create_history(HISTORY_DOCUMENT_CREATED, document, {'user': request.user})
def _handle_zip_file(request, uploaded_file, document_type=None):
def _handle_zip_file(request, uploaded_file, document_type=None, transformations=None):
filename = getattr(uploaded_file, 'filename', getattr(uploaded_file, 'name', ''))
if filename.lower().endswith('zip'):
zfobj = zipfile.ZipFile(uploaded_file)
@@ -285,7 +291,12 @@ def staging_file_preview(request, source_type, source_id, staging_file_id):
staging_folder = get_object_or_404(StagingFolder, pk=source_id)
StagingFile = create_staging_file_class(request, staging_folder.folder_path)
try:
output_file, errors = StagingFile.get(staging_file_id).preview(staging_folder.get_preview_size())
transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder)
output_file, errors = StagingFile.get(staging_file_id).preview(
preview_size=staging_folder.get_preview_size(),
transformations=transformations
)
if errors and (request.user.is_staff or request.user.is_superuser):
for error in errors:
messages.warning(request, _(u'Staging file transformation error: %(error)s') % {
@@ -313,15 +324,19 @@ def staging_file_delete(request, source_type, source_id, staging_file_id):
StagingFile = create_staging_file_class(request, staging_folder.folder_path)
staging_file = StagingFile.get(staging_file_id)
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', '/')))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', '/')))
if request.method == 'POST':
try:
staging_file.delete(staging_folder.get_preview_size())
transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder)
staging_file.delete(
preview_size=staging_folder.get_preview_size(),
transformations=transformations
)
messages.success(request, _(u'Staging file delete successfully.'))
except Exception, e:
messages.error(request, e)
messages.error(request, _(u'Staging file delete error; %s.') % e)
return HttpResponseRedirect(next)
results = get_active_tab_links()
@@ -509,11 +524,17 @@ def setup_source_transformation_edit(request, transformation_id):
form = SourceTransformationForm(instance=source_transformation, data=request.POST)
if form.is_valid():
try:
form.save()
messages.success(request, _(u'Source transformation edited successfully'))
return HttpResponseRedirect(next)
except Exception, e:
messages.error(request, _(u'Error editing source transformation; %s') % e)
# Test the validity of the argument field
eval(form.cleaned_data['arguments'], {})
except:
messages.error(request, _(u'Source transformation argument error.'))
else:
try:
form.save()
messages.success(request, _(u'Source transformation edited successfully'))
return HttpResponseRedirect(next)
except Exception, e:
messages.error(request, _(u'Error editing source transformation; %s') % e)
else:
form = SourceTransformationForm(instance=source_transformation)
@@ -598,13 +619,19 @@ def setup_source_transformation_create(request, source_type, source_id):
form = SourceTransformationForm_create(request.POST)
if form.is_valid():
try:
source_tranformation = form.save(commit=False)
source_tranformation.content_object = source
source_tranformation.save()
messages.success(request, _(u'Source transformation created successfully'))
return HttpResponseRedirect(redirect_view)
except Exception, e:
messages.error(request, _(u'Error creating source transformation; %s') % e)
# Test the validity of the argument field
eval(form.cleaned_data['arguments'], {})
except:
messages.error(request, _(u'Source transformation argument error.'))
else:
try:
source_tranformation = form.save(commit=False)
source_tranformation.content_object = source
source_tranformation.save()
messages.success(request, _(u'Source transformation created successfully'))
return HttpResponseRedirect(redirect_view)
except Exception, e:
messages.error(request, _(u'Error creating source transformation; %s') % e)
else:
form = SourceTransformationForm_create()