Source, document page and thumbnails working, new document transformations and OCR yet to convert
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import subprocess
|
||||
import hashlib
|
||||
|
||||
from django.utils.importlib import import_module
|
||||
from django.template.defaultfilters import slugify
|
||||
@@ -22,6 +23,7 @@ from converter.literals import TRANSFORMATION_RESIZE, \
|
||||
TRANSFORMATION_ZOOM
|
||||
from converter.literals import DIMENSION_SEPARATOR
|
||||
|
||||
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
|
||||
|
||||
CONVERTER_OFFICE_FILE_EXTENSIONS = [
|
||||
u'ods', u'docx', u'doc'
|
||||
@@ -75,19 +77,11 @@ def cache_cleanup(input_filepath, *args, **kwargs):
|
||||
|
||||
def create_image_cache_filename(input_filepath, *args, **kwargs):
|
||||
if input_filepath:
|
||||
temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))
|
||||
temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename)
|
||||
|
||||
final_filepath = []
|
||||
[final_filepath.append(str(arg)) for arg in args]
|
||||
final_filepath.extend([u'%s_%s' % (key, value) for key, value in kwargs.items()])
|
||||
|
||||
temp_path += slugify(u'_'.join(final_filepath))
|
||||
|
||||
return temp_path
|
||||
hash_value = HASH_FUNCTION(u''.join([input_filepath, unicode(args), unicode(kwargs)]))
|
||||
return os.path.join(TEMPORARY_DIRECTORY, hash_value)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def convert_office_document(input_filepath):
|
||||
if os.path.exists(UNOCONV_PATH):
|
||||
@@ -104,21 +98,21 @@ def convert_document(document, *args, **kwargs):
|
||||
return convert(document_save_to_temp_dir(document, document.checksum), *args, **kwargs)
|
||||
|
||||
|
||||
def convert(input_filepath, *args, **kwargs):
|
||||
def convert(input_filepath, cleanup_files=True, *args, **kwargs):
|
||||
size = kwargs.get('size')
|
||||
file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT)
|
||||
zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL)
|
||||
rotation = kwargs.get('rotation', DEFAULT_ROTATION)
|
||||
page = kwargs.get('page', DEFAULT_PAGE_NUMBER)
|
||||
cleanup_files = kwargs.get('cleanup_files', True)
|
||||
quality = kwargs.get('quality', QUALITY_DEFAULT)
|
||||
transformations = kwargs.get('transformations', [])
|
||||
|
||||
unoconv_output = None
|
||||
|
||||
output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs)
|
||||
#if os.path.exists(output_filepath):
|
||||
# return output_filepath
|
||||
print 'output_filepath', output_filepath
|
||||
if os.path.exists(output_filepath):
|
||||
return output_filepath
|
||||
|
||||
path, extension = os.path.splitext(input_filepath)
|
||||
if extension[1:].lower() in CONVERTER_OFFICE_FILE_EXTENSIONS:
|
||||
@@ -128,8 +122,6 @@ def convert(input_filepath, *args, **kwargs):
|
||||
input_filepath = result
|
||||
extra_options = u''
|
||||
|
||||
#TODO: not here in the backend
|
||||
input_arg = u'%s[%s]' % (input_filepath, page-1)
|
||||
transformations.append(
|
||||
{
|
||||
'transformation': TRANSFORMATION_RESIZE,
|
||||
@@ -154,7 +146,7 @@ def convert(input_filepath, *args, **kwargs):
|
||||
)
|
||||
|
||||
try:
|
||||
backend.convert_file(input_filepath=input_arg, output_filepath=u'%s:%s' % (file_format, output_filepath), quality=quality, transformations=transformations)
|
||||
backend.convert_file(input_filepath=input_filepath, output_filepath=output_filepath, quality=quality, transformations=transformations, page=page, file_format=file_format)
|
||||
finally:
|
||||
if cleanup_files:
|
||||
cleanup(input_filepath)
|
||||
@@ -189,14 +181,12 @@ def convert_document_for_ocr(document, page=DEFAULT_PAGE_NUMBER, file_format=DEF
|
||||
unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep)
|
||||
convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format)
|
||||
|
||||
input_arg = u'%s[%s]' % (input_filepath, page-1)
|
||||
|
||||
try:
|
||||
document_page = document.documentpage_set.get(page_number=page + 1)
|
||||
document_page = document.documentpage_set.get(page_number=page)
|
||||
transformation_string, warnings = document_page.get_transformation_string()
|
||||
|
||||
#Apply default transformations
|
||||
backend.convert_file(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file)
|
||||
backend.convert_file(input_filepath=input_filepath, page=page, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file)
|
||||
#Do OCR operations
|
||||
backend.convert_file(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
|
||||
# Process by unpaper
|
||||
|
||||
@@ -11,7 +11,8 @@ from converter.backends import ConverterBase
|
||||
from converter.literals import TRANSFORMATION_RESIZE, \
|
||||
TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
|
||||
TRANSFORMATION_ZOOM
|
||||
from converter.literals import DIMENSION_SEPARATOR
|
||||
from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \
|
||||
DEFAULT_FILE_FORMAT
|
||||
|
||||
CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format'
|
||||
CONVERTER_ERROR_STARTS_WITH = u'starts with'
|
||||
@@ -32,10 +33,12 @@ class ConverterClass(ConverterBase):
|
||||
return proc.stdout.read()
|
||||
|
||||
|
||||
def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT):
|
||||
def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
|
||||
print 'convert_file'
|
||||
arguments = []
|
||||
if transformations:
|
||||
for transformation in transformations:
|
||||
print 'transformation: %s' % transformation
|
||||
if transformation['transformation'] == TRANSFORMATION_RESIZE:
|
||||
dimensions = []
|
||||
dimensions.append(unicode(transformation['arguments']['width']))
|
||||
@@ -46,21 +49,31 @@ class ConverterClass(ConverterBase):
|
||||
|
||||
elif transformation['transformation'] == TRANSFORMATION_ZOOM:
|
||||
arguments.append(u'-resize')
|
||||
arguments.append(u'%d%%' % transformation['arguments']['zoom'])
|
||||
arguments.append(u'%d%%' % transformation['arguments']['percent'])
|
||||
|
||||
elif transformation['transformation'] == TRANSFORMATION_ROTATE:
|
||||
print 'Do rotate'
|
||||
arguments.append(u'-rotate')
|
||||
arguments.append(u'%s' % transformation['arguments']['degrees'])
|
||||
|
||||
print 'arguments: %s' % arguments
|
||||
#if format == u'jpg':
|
||||
# extra_options += u' -quality 85'
|
||||
if format == u'jpg':
|
||||
arguments.append(u'-quality')
|
||||
arguments.append(u'85')
|
||||
|
||||
|
||||
# Graphicsmagick page number is 0 base
|
||||
input_arg = u'%s[%d]' % (input_filepath, page - 1)
|
||||
|
||||
# Specify the file format next to the output filename
|
||||
output_filepath = u'%s:%s' % (file_format, output_filepath)
|
||||
|
||||
command = []
|
||||
command.append(unicode(GM_PATH))
|
||||
command.append(u'convert')
|
||||
command.extend(unicode(QUALITY_SETTINGS[quality]).split())
|
||||
command.extend(unicode(GM_SETTINGS).split())
|
||||
command.append(unicode(input_filepath))
|
||||
command.append(unicode(input_arg))
|
||||
if arguments:
|
||||
command.extend(arguments)
|
||||
command.append(unicode(output_filepath))
|
||||
@@ -115,10 +128,3 @@ class ConverterClass(ConverterBase):
|
||||
except:
|
||||
#TODO: send to other page number identifying program
|
||||
return 1
|
||||
|
||||
|
||||
def _get_transformation_string():
|
||||
pass
|
||||
#'command_line': u'-rotate %(degrees)d'
|
||||
# }
|
||||
#}
|
||||
|
||||
@@ -31,15 +31,42 @@ class ConverterClass(ConverterBase):
|
||||
return proc.stdout.read()
|
||||
|
||||
|
||||
def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
|
||||
#if format == u'jpg':
|
||||
# extra_options += u' -quality 85'
|
||||
def convert_file(self, input_filepath, output_filepath, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
|
||||
arguments = []
|
||||
if transformations:
|
||||
for transformation in transformations:
|
||||
if transformation['transformation'] == TRANSFORMATION_RESIZE:
|
||||
dimensions = []
|
||||
dimensions.append(unicode(transformation['arguments']['width']))
|
||||
if 'height' in transformation['arguments']:
|
||||
dimensions.append(unicode(transformation['arguments']['height']))
|
||||
arguments.append(u'-resize')
|
||||
arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions))
|
||||
|
||||
elif transformation['transformation'] == TRANSFORMATION_ZOOM:
|
||||
arguments.append(u'-resize')
|
||||
arguments.append(u'%d%%' % transformation['arguments']['percent'])
|
||||
|
||||
elif transformation['transformation'] == TRANSFORMATION_ROTATE:
|
||||
arguments.append(u'-rotate')
|
||||
arguments.append(u'%s' % transformation['arguments']['degrees'])
|
||||
|
||||
if format == u'jpg':
|
||||
arguments.append(u'-quality')
|
||||
arguments.append(u'85')
|
||||
|
||||
# Imagemagick page number is 0 base
|
||||
input_arg = u'%s[%d]' % (input_filepath, page - 1)
|
||||
|
||||
# Specify the file format next to the output filename
|
||||
output_filepath = u'%s:%s' % (file_format, output_filepath)
|
||||
|
||||
command = []
|
||||
command.append(unicode(IM_CONVERT_PATH))
|
||||
command.extend(unicode(QUALITY_SETTINGS[quality]).split())
|
||||
command.append(unicode(input_filepath))
|
||||
command.append(unicode(input_arg))
|
||||
if arguments:
|
||||
command.extend(unicode(arguments).split())
|
||||
command.extend(arguments)
|
||||
command.append(unicode(output_filepath))
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
return_code = proc.wait()
|
||||
|
||||
@@ -20,4 +20,17 @@ class DocumentPageTransformationManager(models.Manager):
|
||||
return self.model.objects.filter(document_page=document_page)
|
||||
|
||||
def get_for_document_page_as_list(self, document_page):
|
||||
return list([{'transformation': transformation['transformation'], 'arguments': eval(transformation['arguments'])} for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments')])
|
||||
warnings = []
|
||||
transformations = []
|
||||
for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments'):
|
||||
try:
|
||||
transformations.append(
|
||||
{
|
||||
'transformation': transformation['transformation'],
|
||||
'arguments': eval(transformation['arguments'], {})
|
||||
}
|
||||
)
|
||||
except Exception, e:
|
||||
warnings.append(e)
|
||||
|
||||
return transformations, warnings
|
||||
|
||||
@@ -20,10 +20,11 @@ from common.widgets import two_state_template
|
||||
from common.literals import PAGE_SIZE_DIMENSIONS, \
|
||||
PAGE_ORIENTATION_PORTRAIT, PAGE_ORIENTATION_LANDSCAPE
|
||||
from common.conf.settings import DEFAULT_PAPER_SIZE
|
||||
from converter.api import convert_document, QUALITY_DEFAULT
|
||||
from converter.api import convert_document
|
||||
from converter.exceptions import UnkownConvertError, UnknownFormat
|
||||
from converter.api import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
|
||||
DEFAULT_FILE_FORMAT, QUALITY_PRINT
|
||||
from converter.literals import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
|
||||
DEFAULT_FILE_FORMAT, QUALITY_PRINT, QUALITY_DEFAULT, \
|
||||
DEFAULT_PAGE_NUMBER
|
||||
from filetransfers.api import serve_file
|
||||
from grouping.utils import get_document_group_subtemplate
|
||||
from metadata.api import save_metadata_list, \
|
||||
@@ -285,39 +286,15 @@ def document_edit(request, document_id):
|
||||
'object': document,
|
||||
}, context_instance=RequestContext(request))
|
||||
|
||||
'''
|
||||
def calculate_converter_arguments(document, *args, **kwargs):
|
||||
size = kwargs.pop('size', PREVIEW_SIZE)
|
||||
quality = kwargs.pop('quality', QUALITY_DEFAULT)
|
||||
page = kwargs.pop('page', 1)
|
||||
file_format = kwargs.pop('file_format', DEFAULT_FILE_FORMAT)
|
||||
zoom = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL)
|
||||
rotation = kwargs.pop('rotation', DEFAULT_ROTATION)
|
||||
|
||||
document_page = DocumentPage.objects.get(document=document, page_number=page)
|
||||
transformation_string, warnings = document_page.get_transformation_string()
|
||||
|
||||
arguments = {
|
||||
'size': size,
|
||||
'file_format': file_format,
|
||||
'quality': quality,
|
||||
'extra_options': transformation_string,
|
||||
'page': page - 1,
|
||||
'zoom': zoom,
|
||||
'rotation': rotation
|
||||
}
|
||||
|
||||
return arguments, warnings
|
||||
'''
|
||||
|
||||
def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_DEFAULT):
|
||||
check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])
|
||||
|
||||
document = get_object_or_404(Document, pk=document_id)
|
||||
|
||||
page = int(request.GET.get('page', 1))
|
||||
page = int(request.GET.get('page', DEFAULT_PAGE_NUMBER))
|
||||
|
||||
zoom = int(request.GET.get('zoom', 100))
|
||||
zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))
|
||||
|
||||
if zoom < ZOOM_MIN_LEVEL:
|
||||
zoom = ZOOM_MIN_LEVEL
|
||||
@@ -325,18 +302,16 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
|
||||
if zoom > ZOOM_MAX_LEVEL:
|
||||
zoom = ZOOM_MAX_LEVEL
|
||||
|
||||
rotation = int(request.GET.get('rotation', 0)) % 360
|
||||
rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360
|
||||
|
||||
#arguments, warnings = calculate_converter_arguments(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation)
|
||||
|
||||
#if warnings and (request.user.is_staff or request.user.is_superuser):
|
||||
# for warning in warnings:
|
||||
# messages.warning(request, _(u'Page transformation error: %s') % warning)
|
||||
|
||||
transformations = DocumentPageTransformation.objects.get_for_document_page_as_list(document)
|
||||
document_page = get_object_or_404(document.documentpage_set, page_number=page)
|
||||
transformations, warnings = DocumentPageTransformation.objects.get_for_document_page_as_list(document_page)
|
||||
|
||||
if warnings and (request.user.is_staff or request.user.is_superuser):
|
||||
for warning in warnings:
|
||||
messages.warning(request, _(u'Page transformation error: %s') % warning)
|
||||
|
||||
try:
|
||||
#output_file = convert_document(document, **arguments)
|
||||
output_file = convert_document(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation, transformations=transformations)
|
||||
except UnkownConvertError, e:
|
||||
if request.user.is_staff or request.user.is_superuser:
|
||||
@@ -595,13 +570,13 @@ def document_page_view(request, document_page_id):
|
||||
|
||||
document_page = get_object_or_404(DocumentPage, pk=document_page_id)
|
||||
|
||||
zoom = int(request.GET.get('zoom', 100))
|
||||
rotation = int(request.GET.get('rotation', 0))
|
||||
zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))
|
||||
rotation = int(request.GET.get('rotation', DEFAULT_ROTATION))
|
||||
document_page_form = DocumentPageForm(instance=document_page, zoom=zoom, rotation=rotation)
|
||||
|
||||
base_title = _(u'details for: %s') % document_page
|
||||
|
||||
if zoom != 100:
|
||||
if zoom != DEFAULT_ZOOM_LEVEL:
|
||||
zoom_text = u'(%d%%)' % zoom
|
||||
else:
|
||||
zoom_text = u''
|
||||
|
||||
@@ -7,5 +7,21 @@ class SourceTransformationManager(models.Manager):
|
||||
ct = ContentType.objects.get_for_model(obj)
|
||||
return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk)
|
||||
|
||||
#def get_for_object_as_list(self, obj):
|
||||
# return list([{'transformation': transformation['transformation'], 'arguments': eval(transformation['arguments'])} for transformation in self.get_for_object(obj).values('transformation', 'arguments')])
|
||||
|
||||
def get_for_object_as_list(self, obj):
|
||||
return list([{'transformation': transformation['transformation'], 'arguments': eval(transformation['arguments'])} for transformation in self.get_for_object(obj).values('transformation', 'arguments')])
|
||||
warnings = []
|
||||
transformations = []
|
||||
for transformation in self.get_for_object(obj).values('transformation', 'arguments'):
|
||||
try:
|
||||
transformations.append(
|
||||
{
|
||||
'transformation': transformation['transformation'],
|
||||
'arguments': eval(transformation['arguments'], {})
|
||||
}
|
||||
)
|
||||
except Exception, e:
|
||||
warnings.append(e)
|
||||
|
||||
return transformations, warnings
|
||||
|
||||
@@ -285,9 +285,11 @@ def staging_file_preview(request, source_type, source_id, staging_file_id):
|
||||
staging_folder = get_object_or_404(StagingFolder, pk=source_id)
|
||||
StagingFile = create_staging_file_class(request, staging_folder.folder_path)
|
||||
try:
|
||||
transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder)
|
||||
|
||||
output_file, errors = StagingFile.get(staging_file_id).preview(
|
||||
preview_size=staging_folder.get_preview_size(),
|
||||
transformations=SourceTransformation.objects.get_for_object_as_list(staging_folder)
|
||||
transformations=transformations
|
||||
)
|
||||
if errors and (request.user.is_staff or request.user.is_superuser):
|
||||
for error in errors:
|
||||
@@ -321,9 +323,10 @@ def staging_file_delete(request, source_type, source_id, staging_file_id):
|
||||
|
||||
if request.method == 'POST':
|
||||
try:
|
||||
transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder)
|
||||
staging_file.delete(
|
||||
preview_size=staging_folder.get_preview_size(),
|
||||
transformations=SourceTransformation.objects.get_for_object_as_list(staging_folder)
|
||||
transformations=transformations
|
||||
)
|
||||
messages.success(request, _(u'Staging file delete successfully.'))
|
||||
except Exception, e:
|
||||
@@ -516,12 +519,16 @@ def setup_source_transformation_edit(request, transformation_id):
|
||||
if form.is_valid():
|
||||
try:
|
||||
# Test the validity of the argument field
|
||||
eval(form.cleaned_data['arguments'])
|
||||
form.save()
|
||||
messages.success(request, _(u'Source transformation edited successfully'))
|
||||
return HttpResponseRedirect(next)
|
||||
except Exception, e:
|
||||
messages.error(request, _(u'Error editing source transformation; %s') % e)
|
||||
eval(form.cleaned_data['arguments'], {})
|
||||
except:
|
||||
messages.error(request, _(u'Source transformation argument error.'))
|
||||
else:
|
||||
try:
|
||||
form.save()
|
||||
messages.success(request, _(u'Source transformation edited successfully'))
|
||||
return HttpResponseRedirect(next)
|
||||
except Exception, e:
|
||||
messages.error(request, _(u'Error editing source transformation; %s') % e)
|
||||
else:
|
||||
form = SourceTransformationForm(instance=source_transformation)
|
||||
|
||||
@@ -607,14 +614,18 @@ def setup_source_transformation_create(request, source_type, source_id):
|
||||
if form.is_valid():
|
||||
try:
|
||||
# Test the validity of the argument field
|
||||
eval(form.cleaned_data['arguments'])
|
||||
source_tranformation = form.save(commit=False)
|
||||
source_tranformation.content_object = source
|
||||
source_tranformation.save()
|
||||
messages.success(request, _(u'Source transformation created successfully'))
|
||||
return HttpResponseRedirect(redirect_view)
|
||||
except Exception, e:
|
||||
messages.error(request, _(u'Error creating source transformation; %s') % e)
|
||||
eval(form.cleaned_data['arguments'], {})
|
||||
except:
|
||||
messages.error(request, _(u'Source transformation argument error.'))
|
||||
else:
|
||||
try:
|
||||
source_tranformation = form.save(commit=False)
|
||||
source_tranformation.content_object = source
|
||||
source_tranformation.save()
|
||||
messages.success(request, _(u'Source transformation created successfully'))
|
||||
return HttpResponseRedirect(redirect_view)
|
||||
except Exception, e:
|
||||
messages.error(request, _(u'Error creating source transformation; %s') % e)
|
||||
else:
|
||||
form = SourceTransformationForm_create()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user