Added multipage document support and document page transformation

This commit is contained in:
Roberto Rosario
2011-02-14 00:18:16 -04:00
parent 65d1e5b176
commit 06d7e5a46a
21 changed files with 219 additions and 73 deletions

View File

@@ -6,10 +6,8 @@ import shutil
from django.template.defaultfilters import slugify
from documents.utils import from_descriptor_to_tempfile
from converter.conf.settings import CONVERT_PATH
from converter.conf.settings import IDENTIFY_PATH
from converter.conf.settings import OCR_OPTIONS
from converter.conf.settings import DEFAULT_OPTIONS
from converter.conf.settings import LOW_QUALITY_OPTIONS
@@ -18,6 +16,7 @@ from converter.conf.settings import HIGH_QUALITY_OPTIONS
#from converter.conf.settings import UNOCONV_PATH
from converter import TEMPORARY_DIRECTORY
from utils import from_descriptor_to_tempfile
QUALITY_DEFAULT = 'quality_default'
@@ -73,6 +72,16 @@ def execute_unoconv(input_filepath, output_filepath, arguments=''):
return (proc.wait(), proc.stderr.read())
def execute_identify(input_filepath, arguments):
command = []
command.append(IDENTIFY_PATH)
command.extend(shlex.split(str(arguments)))
command.append(input_filepath)
proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return (proc.wait(), proc.stderr.read(), proc.stdout.read())
def cache_cleanup(input_filepath, size, page=0, format='jpg'):
filepath = create_image_cache_filename(input_filepath, size, page, format)
try:
@@ -126,7 +135,6 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
try:
input_arg = '%s[%s]' % (input_filepath, page)
extra_options += ' -resize %s' % size
print 'extra_options', extra_options
status, error_string = execute_convert(input_arg, extra_options, '%s:%s' % (format, output_filepath), quality=quality)
if status:
errors = get_errors(error_string)
@@ -136,7 +144,16 @@ def convert(input_filepath, size, quality=QUALITY_DEFAULT, cache=True, page=0, f
if unoconv_output:
cleanup(unoconv_output)
return output_filepath
def get_page_count(input_filepath):
try:
status, error_string, output = execute_identify(input_filepath, '-format %n')
if status:
errors = get_errors(error_string)
raise ConvertError(status, errors)
finally:
return int(output)
#TODO: slugify OCR_OPTIONS and add to file name to cache
def convert_document_for_ocr(document, page=0, format='tif'):